Skip to content

Commit

Permalink
UPDATED: Changes related to #6
Browse files Browse the repository at this point in the history
  • Loading branch information
Axlfc committed Mar 20, 2023
1 parent 715c61f commit 97ff557
Show file tree
Hide file tree
Showing 13 changed files with 361 additions and 110 deletions.
9 changes: 7 additions & 2 deletions Content/Python/chatBot/BLOOM.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@
import torch


models = ["bloom-560m", "bloom-1b1", "bloom-1b7", "bloom-3b", "bloom-7b1", "bloom"]
current_model_name = "bigscience/" + models[0]


def process_bot_answer(input_text, text_length=50):
model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m")
tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")

model = AutoModelForCausalLM.from_pretrained(current_model_name)
tokenizer = AutoTokenizer.from_pretrained(current_model_name)

# Tokenize the prompt and generate text using the BLOOM model
inputs = tokenizer(input_text, return_tensors="pt")
Expand Down
8 changes: 6 additions & 2 deletions Content/Python/chatBot/FLAN_T5.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
# pip install -q transformers accelerate sentencepiece


models = ["flan-t5-small", "flan-t5-base", "flan-t5-large", "flan-t5-xl", "flan-t5-xxl"]
current_model_name = "google/" + models[0]


def generate(input_text, tokenizer, model):
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(input_ids, max_length=200, bos_token_id=0)
Expand All @@ -11,8 +15,8 @@ def generate(input_text, tokenizer, model):


def process_bot_answer(input_text):
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small", device_map="auto")
tokenizer = T5Tokenizer.from_pretrained(current_model_name)
model = T5ForConditionalGeneration.from_pretrained(current_model_name, device_map="auto")

return generate(input_text, tokenizer, model)

Expand Down
4 changes: 4 additions & 0 deletions Content/Python/chatBot/GALACTICA.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import sys


models = ["galactica-125m", "galactica-1.3b", "galactica-6.7b", "galactica-30b", "galactica-120b"]
current_model_name = "facebook/" + models[0]


def process_bot_answer(input_text, text_length=200):
tokenizer = AutoTokenizer.from_pretrained("facebook/galactica-125m")
model = OPTForCausalLM.from_pretrained("facebook/galactica-125m", device_map="auto")
Expand Down
27 changes: 0 additions & 27 deletions Content/Python/chatBot/GPT-Neox.py

This file was deleted.

149 changes: 149 additions & 0 deletions Content/Python/chatBot/GPT_4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import gradio as gr
import os
import json
import requests

# Streaming endpoint
API_URL = "https://api.openai.com/v1/chat/completions" # os.getenv("API_URL") + "/generate_stream"

# Testing with my Open AI Key
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


def predict(inputs, top_p, temperature, chat_counter, chatbot=[], history=[]):
payload = {
"model": "gpt-4",
"messages": [{"role": "user", "content": f"{inputs}"}],
"temperature": 1.0,
"top_p": 1.0,
"n": 1,
"stream": True,
"presence_penalty": 0,
"frequency_penalty": 0,
}

headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}"
}

print(f"chat_counter - {chat_counter}")
if chat_counter != 0:
messages = []
for data in chatbot:
temp1 = {}
temp1["role"] = "user"
temp1["content"] = data[0]
temp2 = {}
temp2["role"] = "assistant"
temp2["content"] = data[1]
messages.append(temp1)
messages.append(temp2)
temp3 = {}
temp3["role"] = "user"
temp3["content"] = inputs
messages.append(temp3)
# messages
payload = {
"model": "gpt-4",
"messages": messages, # [{"role": "user", "content": f"{inputs}"}],
"temperature": temperature, # 1.0,
"top_p": top_p, # 1.0,
"n": 1,
"stream": True,
"presence_penalty": 0,
"frequency_penalty": 0,
}

chat_counter += 1

history.append(inputs)
print(f"payload is - {payload}")
# make a POST request to the API endpoint using the requests.post method, passing in stream=True
response = requests.post(API_URL, headers=headers, json=payload, stream=True)
print(f"response code - {response}")
token_counter = 0
partial_words = ""

counter = 0
for chunk in response.iter_lines():
# Skipping first chunk
if counter == 0:
counter += 1
continue
# counter+=1
# check whether each line is non-empty
if chunk.decode():
chunk = chunk.decode()
# decode each line as response data is in bytes
if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
# if len(json.loads(chunk.decode()[6:])['choices'][0]["delta"]) == 0:
# break
partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
if token_counter == 0:
history.append(" " + partial_words)
else:
history[-1] = partial_words
chat = [(history[i], history[i + 1]) for i in
range(0, len(history) - 1, 2)] # convert to tuples of list
token_counter += 1
yield chat, history, chat_counter, response # resembles {chatbot: chat, state: history}


def reset_textbox():
return gr.update(value='')


title = """<h1 align="center">🔥GPT4 with ChatCompletions API +🚀Gradio-Streaming</h1>"""
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
```
User: <utterance>
Assistant: <utterance>
User: <utterance>
Assistant: <utterance>
...
```
In this app, you can explore the outputs of a gpt-4 LLM.
"""

theme = gr.themes.Default(primary_hue="green")

with gr.Blocks(css="""#col_container { margin-left: auto; margin-right: auto;}
#chatbot {height: 520px; overflow: auto;}""",
theme=theme) as demo:
gr.HTML(title)
gr.HTML(
"""<h3 align="center">🔥This Huggingface Gradio Demo provides you full access to GPT4 API (4096 token limit). 🎉🥳🎉You don't need any OPENAI API key🙌</h1>""")
gr.HTML(
'''<center><a href="https://huggingface.co/spaces/ysharma/ChatGPT4?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space and run securely with your OpenAI API Key</center>''')
with gr.Column(elem_id="col_container"):
# GPT4 API Key is provided by Huggingface
# openai_api_key = gr.Textbox(type='password', label="Enter only your GPT4 OpenAI API key here")
chatbot = gr.Chatbot(elem_id='chatbot') # c
inputs = gr.Textbox(placeholder="Hi there!", label="Type an input and press Enter") # t
state = gr.State([]) # s
with gr.Row():
with gr.Column(scale=7):
b1 = gr.Button().style(full_width=True)
with gr.Column(scale=3):
server_status_code = gr.Textbox(label="Status code from OpenAI server", )

# inputs, top_p, temperature, top_k, repetition_penalty
with gr.Accordion("Parameters", open=False):
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True,
label="Top-p (nucleus sampling)", )
temperature = gr.Slider(minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True,
label="Temperature", )
# top_k = gr.Slider( minimum=1, maximum=50, value=4, step=1, interactive=True, label="Top-k",)
# repetition_penalty = gr.Slider( minimum=0.1, maximum=3.0, value=1.03, step=0.01, interactive=True, label="Repetition Penalty", )
chat_counter = gr.Number(value=0, visible=False, precision=0)

inputs.submit(predict, [inputs, top_p, temperature, chat_counter, chatbot, state],
[chatbot, state, chat_counter, server_status_code], ) # openai_api_key
b1.click(predict, [inputs, top_p, temperature, chat_counter, chatbot, state],
[chatbot, state, chat_counter, server_status_code], ) # openai_api_key
b1.click(reset_textbox, [], [inputs])
inputs.submit(reset_textbox, [], [inputs])

# gr.Markdown(description)
demo.queue(max_size=20, concurrency_count=10).launch(debug=True)
File renamed without changes.
11 changes: 7 additions & 4 deletions Content/Python/chatBot/GPT_Neo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@
import torch


models = ["gpt-neo-125M", "gpt-neo-1.3B", "gpt-neo-2.7B"]
current_model_name = "EleutherAI/" + models[0]


def process_bot_answer(input_text, text_length=50):
model_name = "EleutherAI/gpt-neo-125M"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(current_model_name)

model = GPTNeoForCausalLM.from_pretrained(model_name)
model = GPTNeoForCausalLM.from_pretrained(current_model_name)
model.to('cuda')
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
# input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cpu")
generator = pipeline('text-generation', model='EleutherAI/gpt-neo-2.7B')
generator = pipeline('text-generation', model=current_model_name)

# A 3000 value will produce a buffer overflow so we need to prevent that.

Expand Down
48 changes: 25 additions & 23 deletions Content/Python/chatBot/OPT.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,31 @@
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline, set_seed
import sys

models = ["opt-125m", "opt-350m", "opt-1.3b", "opt-2.7b", "opt-6.7b", "opt-13b", "opt-30b", "opt-66b"]
current_model_name = "facebook/" + models[0]


def process_bot_answer(input_text, seed=None):
if seed is None:
generator = pipeline('text-generation', model=current_model_name)
else:
set_seed(seed)
generator = pipeline('text-generation', model=current_model_name, do_sample=True)
return generator(input_text)[0]["generated_text"]


def main():
model_name = "facebook/opt-6.7b"
# "facebook/opt-6.7b"
# "facebook/opt-2.7b"
# "facebook/opt-1.3b"
# facebook/opt-350m"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
input_text = "I want to generate some text using OPT 6.7b."
input_ids = tokenizer.encode(input_text, return_tensors="pt")

sample_outputs = model.generate(
input_ids,
do_sample=True,
max_length=50,
top_k=0,
temperature=0.7,
num_return_sequences=1
)

output_text = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
print(output_text)
if len(sys.argv) < 2:
print("Please provide a text prompt as the first argument or a text prompt and a seed.")
return

if len(sys.argv) == 2:
answer = process_bot_answer(sys.argv[1])
elif len(sys.argv) == 3:
answer = process_bot_answer(sys.argv[1], int(sys.argv[2]))

print(answer)
return answer


if __name__ == '__main__':
Expand Down
68 changes: 44 additions & 24 deletions Content/Python/chatBot/Pygmalion.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,49 @@
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
import sys

models = ["pygmalion-350m", "pygmalion-1.3b", "pygmalion-2.7b", "pygmalion-6b"]
current_model_name = "PygmalionAI/" + models[0]


def generate(article, length=256):
generator = pipeline('text-generation', model=current_model_name)
outputs = generator(article, do_sample=True, max_length=length, num_return_sequences=5)

return [s["generated_text"] for s in outputs]


def process_bot_answer(input_text):
candidates = generate(input_text)

tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForSequenceClassification.from_pretrained("ChaiML/gpt2_base_retry_and_continue_12m_reward_model")
# model = AutoModelForSequenceClassification.from_pretrained(current_model_name)
tokenizer.pad_token_id = 50256
tokenizer.truncation_side = "left"
tokenizer.padding_side = "right"
tokens = tokenizer(candidates, return_tensors='pt', return_attention_mask=True, padding='longest', truncation=True,
max_length=256)
reward = model(**tokens).logits[:, 1]
idx = reward.argmax()

chosen_reply = candidates[idx][len(input_text):]

return chosen_reply


def main():
model_name = "PygmalionAI/pygmalion-6b"
# "PygmalionAI/pygmalion-6b"
# "PygmalionAI/pygmalion-2.7b"
# "PygmalionAI/pygmalion-1.3b"
# "PygmalionAI/pygmalion-350m"

tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
input_text = "I want to generate some text using Pygmalion."
input_ids = tokenizer.encode(input_text, return_tensors="pt")

sample_outputs = model.generate(
input_ids,
do_sample=True,
max_length=50,
top_k=50,
top_p=0.95,
temperature=1.0,
num_return_sequences=1
)

output_text = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
print(output_text)
if len(sys.argv) < 2:
print("Please provide a text prompt as the first argument.")
return

if len(sys.argv) == 2:
# Using default model as language_model
print(process_bot_answer(sys.argv[1]))
return process_bot_answer(sys.argv[1])
elif len(sys.argv) == 3:
print(process_bot_answer(sys.argv[1], int(sys.argv[2])))
return process_bot_answer(sys.argv[1], int(sys.argv[2]))


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 97ff557

Please sign in to comment.