From e6cfcb8352dadc972e492f9a50a0bbc8ee69cc2f Mon Sep 17 00:00:00 2001 From: fuhui Date: Wed, 15 May 2024 11:13:28 +0800 Subject: [PATCH] debug mmultimodel cache --- modelcache/embedding/clip_demo.py | 49 ------------------- modelcache_mm/adapter/adapter_insert.py | 19 +++---- modelcache_mm/adapter/adapter_query.py | 28 ++++++----- modelcache_mm/core.py | 3 -- modelcache_mm/embedding/__init__.py | 31 ++---------- .../embedding/clip.py | 12 ++--- 6 files changed, 33 insertions(+), 109 deletions(-) delete mode 100644 modelcache/embedding/clip_demo.py rename {modelcache => modelcache_mm}/embedding/clip.py (88%) diff --git a/modelcache/embedding/clip_demo.py b/modelcache/embedding/clip_demo.py deleted file mode 100644 index f6e157b..0000000 --- a/modelcache/embedding/clip_demo.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -""" - Alipay.com Inc. - Copyright (c) 2004-2023 All Rights Reserved. - ------------------------------------------------------ - File Name : clip_demo.py - Author : fuhui.phe - Create Time : 2024/5/7 11:58 - Description : description what the main function of this file - Change Activity: - version0 : 2024/5/7 11:58 by fuhui.phe init -""" -import torch -from modelscope.utils.constant import Tasks -from modelscope.pipelines import pipeline -from modelscope.preprocessors.image import load_image - - -pipeline = pipeline(task=Tasks.multi_modal_embedding, - model='damo/multi-modal_clip-vit-base-patch16_zh', model_revision='v1.0.1') - -# pipeline = pipeline(task=Tasks.multi_modal_embedding, -# model='/Users/penghongen/PycharmProjects/CodeFuse-ModelCache/model/clip_zh', model_revision='v1.0.1') - -# pipeline = pipeline(task=Tasks.multi_modal_embedding, model='/Users/penghongen/PycharmProjects/CodeFuse-ModelCache/model/clip_zh') - - -input_img = load_image('https://clip-cn-beijing.oss-cn-beijing.aliyuncs.com/pokemon.jpeg') # 支持皮卡丘示例图片路径/本地图片 返回PIL.Image - - -input_texts = ["杰尼龟", "妙蛙种子", "小火龙", "皮卡丘"] - -# 支持一张图片(PIL.Image)或多张图片(List[PIL.Image])输入,输出归一化特征向量 -img_embedding = pipeline.forward({'img': input_img})['img_embedding'] # 2D Tensor, [图片数, 特征维度] -print('img_embedding: {}'.format(img_embedding)) - -# 支持一条文本(str)或多条文本(List[str])输入,输出归一化特征向量 -text_embedding = pipeline.forward({'text': input_texts})['text_embedding'] # 2D Tensor, [文本数, 特征维度] - -# 计算图文相似度 -with torch.no_grad(): - # 计算内积得到logit,考虑模型temperature - logits_per_image = (img_embedding / pipeline.model.temperature) @ text_embedding.t() - # 根据logit计算概率分布 - probs = logits_per_image.softmax(dim=-1).cpu().numpy() - -print("图文匹配概率:", probs) - - diff --git a/modelcache_mm/adapter/adapter_insert.py b/modelcache_mm/adapter/adapter_insert.py index 2d94798..0659710 100644 --- a/modelcache_mm/adapter/adapter_insert.py +++ b/modelcache_mm/adapter/adapter_insert.py @@ -38,12 +38,13 @@ def adapt_insert(*args, **kwargs): raise ValueError("Both pre_embedding_image_url and pre_embedding_image_raw cannot be non-empty at the same time.") if pre_embedding_image_url: - url_start_time = time.time() - response = requests.get(pre_embedding_image_url) - image_data = response.content - pre_embedding_image = base64.b64encode(image_data).decode('utf-8') - get_image_time = '{}s'.format(round(time.time() - url_start_time, 2)) - print('get_image_time: {}'.format(get_image_time)) + # url_start_time = time.time() + # response = requests.get(pre_embedding_image_url) + # image_data = response.content + # pre_embedding_image = base64.b64encode(image_data).decode('utf-8') + # get_image_time = '{}s'.format(round(time.time() - url_start_time, 2)) + # print('get_image_time: {}'.format(get_image_time)) + pre_embedding_image = pre_embedding_image_url elif pre_embedding_image_raw: pre_embedding_image = pre_embedding_image_raw else: @@ -70,9 +71,9 @@ def adapt_insert(*args, **kwargs): print('text_embeddings: {}'.format(text_embeddings)) if len(image_embeddings) > 0 and len(image_embeddings) > 0: - image_embedding = np.array(image_embeddings[0]) - text_embedding = text_embeddings[0] - embedding_data = np.concatenate((image_embedding, text_embedding)) + # image_embedding = np.array(image_embeddings[0]) + # text_embedding = text_embeddings[0] + embedding_data = np.concatenate((image_embeddings, text_embeddings)) mm_type = 'mm' elif len(image_embeddings) > 0: image_embedding = np.array(image_embeddings[0]) diff --git a/modelcache_mm/adapter/adapter_query.py b/modelcache_mm/adapter/adapter_query.py index 88a52c5..7eeeab5 100644 --- a/modelcache_mm/adapter/adapter_query.py +++ b/modelcache_mm/adapter/adapter_query.py @@ -30,8 +30,6 @@ def adapt_query(cache_data_convert, *args, **kwargs): pre_embedding_image_raw = pre_embedding_data_dict['imageRaw'] pre_embedding_image_url = pre_embedding_data_dict['imageUrl'] pre_multi_type = pre_embedding_data_dict['multiType'] - # print('pre_embedding_image_url: {}'.format(pre_embedding_image_url)) - # print('pre_embedding_text: {}'.format(pre_embedding_text)) # 判断逻辑 if pre_multi_type == 'IMG_TEXT': @@ -39,12 +37,13 @@ def adapt_query(cache_data_convert, *args, **kwargs): raise ValueError( "Both pre_embedding_imageUrl and pre_embedding_imageRaw cannot be non-empty at the same time.") if pre_embedding_image_url: - url_start_time = time.time() - response = requests.get(pre_embedding_image_url) - image_data = response.content - pre_embedding_image = base64.b64encode(image_data).decode('utf-8') - get_image_time = '{}s'.format(round(time.time() - url_start_time, 2)) - print('get_image_time: {}'.format(get_image_time)) + # url_start_time = time.time() + # response = requests.get(pre_embedding_image_url) + # image_data = response.content + # pre_embedding_image = base64.b64encode(image_data).decode('utf-8') + # get_image_time = '{}s'.format(round(time.time() - url_start_time, 2)) + # print('get_image_time: {}'.format(get_image_time)) + pre_embedding_image = pre_embedding_image_url elif pre_embedding_image_raw: pre_embedding_image = pre_embedding_image_raw else: @@ -63,7 +62,7 @@ def adapt_query(cache_data_convert, *args, **kwargs): if cache_enable: if pre_multi_type == 'IMG_TEXT': embedding_data_resp = time_cal( - chat_cache.embedding_concurrent_func, + chat_cache.embedding_func, func_name="iat_embedding", report_func=chat_cache.report.embedding, )(data_dict) @@ -76,10 +75,15 @@ def adapt_query(cache_data_convert, *args, **kwargs): image_embeddings = embedding_data_resp['image_embedding'] text_embeddings = embedding_data_resp['text_embeddings'] + print('image_embeddings: {}'.format(image_embeddings)) + print('image_embeddings_len: {}'.format(len(image_embeddings))) + print('text_embeddings: {}'.format(text_embeddings)) + print('text_embeddings_len: {}'.format(len(text_embeddings))) + if len(image_embeddings) > 0 and len(image_embeddings) > 0: - image_embedding = np.array(image_embeddings[0]) - text_embedding = np.array(text_embeddings[0]) - embedding_data = np.concatenate((image_embedding, text_embedding)) + # image_embedding = np.array(image_embeddings[0]) + # text_embedding = np.array(text_embeddings[0]) + embedding_data = np.concatenate((image_embeddings, text_embeddings)) mm_type = 'mm' elif len(image_embeddings) > 0: image_embedding = np.array(image_embeddings[0]) diff --git a/modelcache_mm/core.py b/modelcache_mm/core.py index 2c6c037..d6ff6fb 100644 --- a/modelcache_mm/core.py +++ b/modelcache_mm/core.py @@ -20,7 +20,6 @@ def __init__(self): self.query_pre_embedding_func = None self.insert_pre_embedding_func = None self.embedding_func = None - self.embedding_concurrent_func = None self.data_manager: Optional[DataManager] = None self.similarity_evaluation: Optional[SimilarityEvaluation] = None self.post_process_messages_func = None @@ -34,7 +33,6 @@ def init( query_pre_embedding_func=None, insert_pre_embedding_func=None, embedding_func=string_embedding, - embedding_concurrent_func=string_embedding, data_manager: DataManager = get_data_manager(), similarity_evaluation=ExactMatchEvaluation(), post_process_messages_func=first, @@ -46,7 +44,6 @@ def init( self.query_pre_embedding_func = query_pre_embedding_func self.insert_pre_embedding_func = insert_pre_embedding_func self.embedding_func = embedding_func - self.embedding_concurrent_func = embedding_concurrent_func self.data_manager: DataManager = data_manager self.similarity_evaluation = similarity_evaluation self.post_process_messages_func = post_process_messages_func diff --git a/modelcache_mm/embedding/__init__.py b/modelcache_mm/embedding/__init__.py index 03b6762..1275963 100644 --- a/modelcache_mm/embedding/__init__.py +++ b/modelcache_mm/embedding/__init__.py @@ -1,32 +1,7 @@ # -*- coding: utf-8 -*- from modelcache.utils.lazy_import import LazyImport -huggingface = LazyImport("huggingface", globals(), "modelcache.embedding.huggingface") -data2vec = LazyImport("data2vec", globals(), "modelcache.embedding.data2vec") -llmEmb = LazyImport("llmEmb", globals(), "modelcache.embedding.llmEmb") -fasttext = LazyImport("fasttext", globals(), "modelcache.embedding.fasttext") -paddlenlp = LazyImport("paddlenlp", globals(), "modelcache.embedding.paddlenlp") -timm = LazyImport("timm", globals(), "modelcache.embedding.timm") +clip = LazyImport("clip", globals(), "modelcache_mm.embedding.clip") -def Huggingface(model="sentence-transformers/all-mpnet-base-v2"): - return huggingface.Huggingface(model) - - -def Data2VecAudio(model="facebook/data2vec-audio-base-960h"): - return data2vec.Data2VecAudio(model) - - -def LlmEmb2vecAudio(): - return llmEmb.LlmEmb2Vec() - - -def FastText(model="en", dim=None): - return fasttext.FastText(model, dim) - - -def PaddleNLP(model="ernie-3.0-medium-zh"): - return paddlenlp.PaddleNLP(model) - - -def Timm(model="resnet50", device="default"): - return timm.Timm(model, device) +def Clip2Vec(model="damo/multi-modal_clip-vit-base-patch16_zh"): + return clip.ClipAudio(model) diff --git a/modelcache/embedding/clip.py b/modelcache_mm/embedding/clip.py similarity index 88% rename from modelcache/embedding/clip.py rename to modelcache_mm/embedding/clip.py index cbe45ff..d718e43 100644 --- a/modelcache/embedding/clip.py +++ b/modelcache_mm/embedding/clip.py @@ -14,15 +14,11 @@ class ClipAudio(BaseEmbedding): - def __init__(self, model: str = "sentence-transformers/all-MiniLM-L6-v2"): - # current_dir = os.path.dirname(os.path.abspath(__file__)) - # parent_dir = os.path.dirname(current_dir) - # model_dir = os.path.dirname(parent_dir) - # model = os.path.join(model_dir, 'model/text2vec-base-chinese/') - + def __init__(self, model: str = 'damo/multi-modal_clip-vit-base-patch16_zh'): + self.model = model + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.clip_pipeline = pipeline(task=Tasks.multi_modal_embedding, - model='damo/multi-modal_clip-vit-base-patch16_zh', model_revision='v1.0.1') - + model=model, model_revision='v1.0.1') self.__dimension = 1024 def to_embeddings(self, data_dict, **_):