From 97ff557a9b1b797cb993fddb4eebc02df6e39936 Mon Sep 17 00:00:00 2001 From: Axlfc Date: Mon, 20 Mar 2023 11:49:35 +0100 Subject: [PATCH] UPDATED: Changes related to #6 --- Content/Python/chatBot/BLOOM.py | 9 +- Content/Python/chatBot/FLAN_T5.py | 8 +- Content/Python/chatBot/GALACTICA.py | 4 + Content/Python/chatBot/GPT-Neox.py | 27 ---- Content/Python/chatBot/GPT_4.py | 149 ++++++++++++++++++ .../chatBot/{GPT-J_6B.py => GPT_J_6B.py} | 0 Content/Python/chatBot/GPT_Neo.py | 11 +- Content/Python/chatBot/OPT.py | 48 +++--- Content/Python/chatBot/Pygmalion.py | 68 +++++--- Content/Python/chatBot/T5.py | 43 +++-- Content/Python/chatBot/emotion.py | 12 ++ Content/Python/chatBot/mT5.py | 45 ++++-- .../get_huggingface_model_disk_space.py | 47 ++++++ 13 files changed, 361 insertions(+), 110 deletions(-) delete mode 100644 Content/Python/chatBot/GPT-Neox.py create mode 100644 Content/Python/chatBot/GPT_4.py rename Content/Python/chatBot/{GPT-J_6B.py => GPT_J_6B.py} (100%) create mode 100644 Content/Python/chatBot/model_card_scripts/get_huggingface_model_disk_space.py diff --git a/Content/Python/chatBot/BLOOM.py b/Content/Python/chatBot/BLOOM.py index 4c6cc76..03a86a0 100644 --- a/Content/Python/chatBot/BLOOM.py +++ b/Content/Python/chatBot/BLOOM.py @@ -3,9 +3,14 @@ import torch +models = ["bloom-560m", "bloom-1b1", "bloom-1b7", "bloom-3b", "bloom-7b1", "bloom"] +current_model_name = "bigscience/" + models[0] + + def process_bot_answer(input_text, text_length=50): - model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m") - tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m") + + model = AutoModelForCausalLM.from_pretrained(current_model_name) + tokenizer = AutoTokenizer.from_pretrained(current_model_name) # Tokenize the prompt and generate text using the BLOOM model inputs = tokenizer(input_text, return_tensors="pt") diff --git a/Content/Python/chatBot/FLAN_T5.py b/Content/Python/chatBot/FLAN_T5.py index 9873dbe..ca728d5 100644 --- a/Content/Python/chatBot/FLAN_T5.py +++ b/Content/Python/chatBot/FLAN_T5.py @@ -3,6 +3,10 @@ # pip install -q transformers accelerate sentencepiece +models = ["flan-t5-small", "flan-t5-base", "flan-t5-large", "flan-t5-xl", "flan-t5-xxl"] +current_model_name = "google/" + models[0] + + def generate(input_text, tokenizer, model): input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda") outputs = model.generate(input_ids, max_length=200, bos_token_id=0) @@ -11,8 +15,8 @@ def generate(input_text, tokenizer, model): def process_bot_answer(input_text): - tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small") - model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small", device_map="auto") + tokenizer = T5Tokenizer.from_pretrained(current_model_name) + model = T5ForConditionalGeneration.from_pretrained(current_model_name, device_map="auto") return generate(input_text, tokenizer, model) diff --git a/Content/Python/chatBot/GALACTICA.py b/Content/Python/chatBot/GALACTICA.py index e957d16..09036ef 100644 --- a/Content/Python/chatBot/GALACTICA.py +++ b/Content/Python/chatBot/GALACTICA.py @@ -3,6 +3,10 @@ import sys +models = ["galactica-125m", "galactica-1.3b", "galactica-6.7b", "galactica-30b", "galactica-120b"] +current_model_name = "facebook/" + models[0] + + def process_bot_answer(input_text, text_length=200): tokenizer = AutoTokenizer.from_pretrained("facebook/galactica-125m") model = OPTForCausalLM.from_pretrained("facebook/galactica-125m", device_map="auto") diff --git a/Content/Python/chatBot/GPT-Neox.py b/Content/Python/chatBot/GPT-Neox.py deleted file mode 100644 index 055e271..0000000 --- a/Content/Python/chatBot/GPT-Neox.py +++ /dev/null @@ -1,27 +0,0 @@ -from transformers import GPTNeoForCausalLM, GPT2Tokenizer - - -def main(): - model_name = "EleutherAI/gpt-neox-c" - tokenizer = GPT2Tokenizer.from_pretrained(model_name) - model = GPTNeoForCausalLM.from_pretrained(model_name) - - input_text = "I want to generate some text using GPT-Neox." - input_ids = tokenizer.encode(input_text, return_tensors="pt") - - sample_outputs = model.generate( - input_ids, - do_sample=True, - max_length=50, - top_k=50, - top_p=0.95, - temperature=1.0, - num_return_sequences=1 - ) - - output_text = tokenizer.decode(sample_outputs[0], skip_special_tokens=True) - print(output_text) - - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/Content/Python/chatBot/GPT_4.py b/Content/Python/chatBot/GPT_4.py new file mode 100644 index 0000000..a4cb0c3 --- /dev/null +++ b/Content/Python/chatBot/GPT_4.py @@ -0,0 +1,149 @@ +import gradio as gr +import os +import json +import requests + +# Streaming endpoint +API_URL = "https://api.openai.com/v1/chat/completions" # os.getenv("API_URL") + "/generate_stream" + +# Testing with my Open AI Key +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") + + +def predict(inputs, top_p, temperature, chat_counter, chatbot=[], history=[]): + payload = { + "model": "gpt-4", + "messages": [{"role": "user", "content": f"{inputs}"}], + "temperature": 1.0, + "top_p": 1.0, + "n": 1, + "stream": True, + "presence_penalty": 0, + "frequency_penalty": 0, + } + + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {OPENAI_API_KEY}" + } + + print(f"chat_counter - {chat_counter}") + if chat_counter != 0: + messages = [] + for data in chatbot: + temp1 = {} + temp1["role"] = "user" + temp1["content"] = data[0] + temp2 = {} + temp2["role"] = "assistant" + temp2["content"] = data[1] + messages.append(temp1) + messages.append(temp2) + temp3 = {} + temp3["role"] = "user" + temp3["content"] = inputs + messages.append(temp3) + # messages + payload = { + "model": "gpt-4", + "messages": messages, # [{"role": "user", "content": f"{inputs}"}], + "temperature": temperature, # 1.0, + "top_p": top_p, # 1.0, + "n": 1, + "stream": True, + "presence_penalty": 0, + "frequency_penalty": 0, + } + + chat_counter += 1 + + history.append(inputs) + print(f"payload is - {payload}") + # make a POST request to the API endpoint using the requests.post method, passing in stream=True + response = requests.post(API_URL, headers=headers, json=payload, stream=True) + print(f"response code - {response}") + token_counter = 0 + partial_words = "" + + counter = 0 + for chunk in response.iter_lines(): + # Skipping first chunk + if counter == 0: + counter += 1 + continue + # counter+=1 + # check whether each line is non-empty + if chunk.decode(): + chunk = chunk.decode() + # decode each line as response data is in bytes + if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']: + # if len(json.loads(chunk.decode()[6:])['choices'][0]["delta"]) == 0: + # break + partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"] + if token_counter == 0: + history.append(" " + partial_words) + else: + history[-1] = partial_words + chat = [(history[i], history[i + 1]) for i in + range(0, len(history) - 1, 2)] # convert to tuples of list + token_counter += 1 + yield chat, history, chat_counter, response # resembles {chatbot: chat, state: history} + + +def reset_textbox(): + return gr.update(value='') + + +title = """

🔥GPT4 with ChatCompletions API +🚀Gradio-Streaming

""" +description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form: +``` +User: +Assistant: +User: +Assistant: +... +``` +In this app, you can explore the outputs of a gpt-4 LLM. +""" + +theme = gr.themes.Default(primary_hue="green") + +with gr.Blocks(css="""#col_container { margin-left: auto; margin-right: auto;} + #chatbot {height: 520px; overflow: auto;}""", + theme=theme) as demo: + gr.HTML(title) + gr.HTML( + """

🔥This Huggingface Gradio Demo provides you full access to GPT4 API (4096 token limit). 🎉🥳🎉You don't need any OPENAI API key🙌

""") + gr.HTML( + '''
Duplicate SpaceDuplicate the Space and run securely with your OpenAI API Key
''') + with gr.Column(elem_id="col_container"): + # GPT4 API Key is provided by Huggingface + # openai_api_key = gr.Textbox(type='password', label="Enter only your GPT4 OpenAI API key here") + chatbot = gr.Chatbot(elem_id='chatbot') # c + inputs = gr.Textbox(placeholder="Hi there!", label="Type an input and press Enter") # t + state = gr.State([]) # s + with gr.Row(): + with gr.Column(scale=7): + b1 = gr.Button().style(full_width=True) + with gr.Column(scale=3): + server_status_code = gr.Textbox(label="Status code from OpenAI server", ) + + # inputs, top_p, temperature, top_k, repetition_penalty + with gr.Accordion("Parameters", open=False): + top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True, + label="Top-p (nucleus sampling)", ) + temperature = gr.Slider(minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True, + label="Temperature", ) + # top_k = gr.Slider( minimum=1, maximum=50, value=4, step=1, interactive=True, label="Top-k",) + # repetition_penalty = gr.Slider( minimum=0.1, maximum=3.0, value=1.03, step=0.01, interactive=True, label="Repetition Penalty", ) + chat_counter = gr.Number(value=0, visible=False, precision=0) + + inputs.submit(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], + [chatbot, state, chat_counter, server_status_code], ) # openai_api_key + b1.click(predict, [inputs, top_p, temperature, chat_counter, chatbot, state], + [chatbot, state, chat_counter, server_status_code], ) # openai_api_key + b1.click(reset_textbox, [], [inputs]) + inputs.submit(reset_textbox, [], [inputs]) + + # gr.Markdown(description) + demo.queue(max_size=20, concurrency_count=10).launch(debug=True) \ No newline at end of file diff --git a/Content/Python/chatBot/GPT-J_6B.py b/Content/Python/chatBot/GPT_J_6B.py similarity index 100% rename from Content/Python/chatBot/GPT-J_6B.py rename to Content/Python/chatBot/GPT_J_6B.py diff --git a/Content/Python/chatBot/GPT_Neo.py b/Content/Python/chatBot/GPT_Neo.py index 8409e04..f760746 100644 --- a/Content/Python/chatBot/GPT_Neo.py +++ b/Content/Python/chatBot/GPT_Neo.py @@ -4,15 +4,18 @@ import torch +models = ["gpt-neo-125M", "gpt-neo-1.3B", "gpt-neo-2.7B"] +current_model_name = "EleutherAI/" + models[0] + + def process_bot_answer(input_text, text_length=50): - model_name = "EleutherAI/gpt-neo-125M" - tokenizer = GPT2Tokenizer.from_pretrained(model_name) + tokenizer = GPT2Tokenizer.from_pretrained(current_model_name) - model = GPTNeoForCausalLM.from_pretrained(model_name) + model = GPTNeoForCausalLM.from_pretrained(current_model_name) model.to('cuda') input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda") # input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cpu") - generator = pipeline('text-generation', model='EleutherAI/gpt-neo-2.7B') + generator = pipeline('text-generation', model=current_model_name) # A 3000 value will produce a buffer overflow so we need to prevent that. diff --git a/Content/Python/chatBot/OPT.py b/Content/Python/chatBot/OPT.py index 4cc3d00..7146c91 100644 --- a/Content/Python/chatBot/OPT.py +++ b/Content/Python/chatBot/OPT.py @@ -1,29 +1,31 @@ -from transformers import AutoTokenizer, AutoModelForCausalLM +from transformers import pipeline, set_seed +import sys + +models = ["opt-125m", "opt-350m", "opt-1.3b", "opt-2.7b", "opt-6.7b", "opt-13b", "opt-30b", "opt-66b"] +current_model_name = "facebook/" + models[0] + + +def process_bot_answer(input_text, seed=None): + if seed is None: + generator = pipeline('text-generation', model=current_model_name) + else: + set_seed(seed) + generator = pipeline('text-generation', model=current_model_name, do_sample=True) + return generator(input_text)[0]["generated_text"] def main(): - model_name = "facebook/opt-6.7b" - # "facebook/opt-6.7b" - # "facebook/opt-2.7b" - # "facebook/opt-1.3b" - # facebook/opt-350m" - - tokenizer = AutoTokenizer.from_pretrained(model_name) - model = AutoModelForCausalLM.from_pretrained(model_name) - input_text = "I want to generate some text using OPT 6.7b." - input_ids = tokenizer.encode(input_text, return_tensors="pt") - - sample_outputs = model.generate( - input_ids, - do_sample=True, - max_length=50, - top_k=0, - temperature=0.7, - num_return_sequences=1 - ) - - output_text = tokenizer.decode(sample_outputs[0], skip_special_tokens=True) - print(output_text) + if len(sys.argv) < 2: + print("Please provide a text prompt as the first argument or a text prompt and a seed.") + return + + if len(sys.argv) == 2: + answer = process_bot_answer(sys.argv[1]) + elif len(sys.argv) == 3: + answer = process_bot_answer(sys.argv[1], int(sys.argv[2])) + + print(answer) + return answer if __name__ == '__main__': diff --git a/Content/Python/chatBot/Pygmalion.py b/Content/Python/chatBot/Pygmalion.py index 5e276df..d678712 100644 --- a/Content/Python/chatBot/Pygmalion.py +++ b/Content/Python/chatBot/Pygmalion.py @@ -1,29 +1,49 @@ -from transformers import GPT2LMHeadModel, GPT2Tokenizer +from transformers import AutoTokenizer, AutoModelForSequenceClassification +from transformers import pipeline +import sys + +models = ["pygmalion-350m", "pygmalion-1.3b", "pygmalion-2.7b", "pygmalion-6b"] +current_model_name = "PygmalionAI/" + models[0] + + +def generate(article, length=256): + generator = pipeline('text-generation', model=current_model_name) + outputs = generator(article, do_sample=True, max_length=length, num_return_sequences=5) + + return [s["generated_text"] for s in outputs] + + +def process_bot_answer(input_text): + candidates = generate(input_text) + + tokenizer = AutoTokenizer.from_pretrained("gpt2") + model = AutoModelForSequenceClassification.from_pretrained("ChaiML/gpt2_base_retry_and_continue_12m_reward_model") + # model = AutoModelForSequenceClassification.from_pretrained(current_model_name) + tokenizer.pad_token_id = 50256 + tokenizer.truncation_side = "left" + tokenizer.padding_side = "right" + tokens = tokenizer(candidates, return_tensors='pt', return_attention_mask=True, padding='longest', truncation=True, + max_length=256) + reward = model(**tokens).logits[:, 1] + idx = reward.argmax() + + chosen_reply = candidates[idx][len(input_text):] + + return chosen_reply + def main(): - model_name = "PygmalionAI/pygmalion-6b" - # "PygmalionAI/pygmalion-6b" - # "PygmalionAI/pygmalion-2.7b" - # "PygmalionAI/pygmalion-1.3b" - # "PygmalionAI/pygmalion-350m" - - tokenizer = GPT2Tokenizer.from_pretrained(model_name) - model = GPT2LMHeadModel.from_pretrained(model_name) - input_text = "I want to generate some text using Pygmalion." - input_ids = tokenizer.encode(input_text, return_tensors="pt") - - sample_outputs = model.generate( - input_ids, - do_sample=True, - max_length=50, - top_k=50, - top_p=0.95, - temperature=1.0, - num_return_sequences=1 - ) - - output_text = tokenizer.decode(sample_outputs[0], skip_special_tokens=True) - print(output_text) + if len(sys.argv) < 2: + print("Please provide a text prompt as the first argument.") + return + + if len(sys.argv) == 2: + # Using default model as language_model + print(process_bot_answer(sys.argv[1])) + return process_bot_answer(sys.argv[1]) + elif len(sys.argv) == 3: + print(process_bot_answer(sys.argv[1], int(sys.argv[2]))) + return process_bot_answer(sys.argv[1], int(sys.argv[2])) if __name__ == '__main__': diff --git a/Content/Python/chatBot/T5.py b/Content/Python/chatBot/T5.py index 42425b8..5f96192 100644 --- a/Content/Python/chatBot/T5.py +++ b/Content/Python/chatBot/T5.py @@ -1,22 +1,37 @@ -import torch -from transformers import T5ForConditionalGeneration, T5Tokenizer +from transformers import MT5Model, T5Tokenizer +import sys +# pip3 install sentencepiece requests_html +models = ["t5-small", "t5-base", "t5-large", "t5-3b", "t5-11b"] +current_model_name = models[0] -def main(): - # Load the T5 model and tokenizer - model = T5ForConditionalGeneration.from_pretrained('t5-base') - tokenizer = T5Tokenizer.from_pretrained('t5-base') - # Define your prompt text - prompt = 'The quick brown fox' +def generate(article, summary, tokenizer, model): + inputs = tokenizer(article, return_tensors="pt") + labels = tokenizer(text_target=summary, return_tensors="pt") + + outputs = model(input_ids=inputs["input_ids"], decoder_input_ids=labels["input_ids"]) + + hidden_states = outputs.last_hidden_state + + return outputs + - # Generate the text - input_ids = tokenizer.encode(prompt, return_tensors='pt') - outputs = model.generate(input_ids=input_ids, max_length=100, do_sample=True) +def process_bot_answer(input_text, summary_text): + model = MT5Model.from_pretrained(current_model_name) + tokenizer = T5Tokenizer.from_pretrained(current_model_name) + + return generate(input_text, summary_text, tokenizer, model) + + +def main(): + if len(sys.argv) < 3: + print("Please provide a text prompt as the first argument and a summary as the second.") + return - # Decode the generated text and print it - generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) - print(generated_text) + answer = process_bot_answer(sys.argv[1], sys.argv[2]) + print(answer) + return answer if __name__ == '__main__': diff --git a/Content/Python/chatBot/emotion.py b/Content/Python/chatBot/emotion.py index 1ba24d6..e654721 100644 --- a/Content/Python/chatBot/emotion.py +++ b/Content/Python/chatBot/emotion.py @@ -6,6 +6,18 @@ from nltk.collocations import BigramAssocMeasures, BigramCollocationFinder #nltk.download('vader_lexicon') +from transformers import pipeline + +classifier = pipeline( + "text-classification", + model="j-hartmann/emotion-english-distilroberta-base", + return_all_scores=True +) + +def classify(txt_input): + return classifier(txt_input) + + # Compile a large corpus of text data corpus = "I feel sad today. I am very sad. The sky is cloudy and it's raining. I don't want to go outside." diff --git a/Content/Python/chatBot/mT5.py b/Content/Python/chatBot/mT5.py index 1da59de..1df120c 100644 --- a/Content/Python/chatBot/mT5.py +++ b/Content/Python/chatBot/mT5.py @@ -1,20 +1,37 @@ -from transformers import MT5ForConditionalGeneration, MT5Tokenizer +from transformers import MT5Model, AutoTokenizer +import sys +# pip3 install sentencepiece requests_html + +models = ["mt5-small", "mt5-base", "mt5-large", "mt5-xl", "mt5-xxl"] +current_model_name = "google/" + models[0] + + +def generate(article, summary, tokenizer, model): + inputs = tokenizer(article, return_tensors="pt") + labels = tokenizer(text_target=summary, return_tensors="pt") + + outputs = model(input_ids=inputs["input_ids"], decoder_input_ids=labels["input_ids"]) + + hidden_states = outputs.last_hidden_state + + return outputs + + +def process_bot_answer(input_text, summary_text): + model = MT5Model.from_pretrained(current_model_name) + tokenizer = AutoTokenizer.from_pretrained(current_model_name) + + return generate(input_text, summary_text, tokenizer, model) def main(): - # Load the mT5 model and tokenizer - model_name = 'google/mt5-small' - tokenizer = MT5Tokenizer.from_pretrained(model_name) - model = MT5ForConditionalGeneration.from_pretrained(model_name) - - # Set the input prompt and generate text - prompt = "translate English to French: Hello, how are you?" - inputs = tokenizer.encode(prompt, return_tensors="pt") - outputs = model.generate(inputs, max_length=128, num_beams=4, early_stopping=True) - - # Decode the output and print the generated text - generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) - print(generated_text) + if len(sys.argv) < 3: + print("Please provide a text prompt as the first argument and a summary as the second.") + return + + answer = process_bot_answer(sys.argv[1], sys.argv[2]) + print(answer) + return answer if __name__ == '__main__': diff --git a/Content/Python/chatBot/model_card_scripts/get_huggingface_model_disk_space.py b/Content/Python/chatBot/model_card_scripts/get_huggingface_model_disk_space.py new file mode 100644 index 0000000..86aaecf --- /dev/null +++ b/Content/Python/chatBot/model_card_scripts/get_huggingface_model_disk_space.py @@ -0,0 +1,47 @@ +from bs4 import BeautifulSoup +import requests +import sys + + +current_page = "https://huggingface.co/" + sys.argv[1] + "/tree/main" + + +# Description: This function gives back a request to an url of a webpage and returns the HTML content +# Arguments: url of the webpage +def get_webpage(url): + # Hacer una solicitud GET a la página web + url = url + response = requests.get(url) + return BeautifulSoup(response.content, 'html.parser') + + +def main(): + soup = get_webpage(current_page) + items = soup.find_all('a') + + test_list = [] + for i in items: + text = i.text + if (".bin" in text and "pytorch" in text) or ("GB" in text or "MB" in text): + test_list.append(str(text)) + + print("Test LIST:\t", test_list) + + odd_i = [] + even_i = [] + for i in range(0, len(test_list)): + if i % 2: + even_i.append(test_list[i]) + else: + odd_i.append(test_list[i]) + + n = float(0) + for size in even_i: + print("SIZE =\t", size) + n += float(size.split("\n")[0].split(" ")[0]) + + print("The " + sys.argv[1].split("/")[1] + " model disk size is " + str(n) + " GB") + + +if __name__ == '__main__': + main()