From 97ff557a9b1b797cb993fddb4eebc02df6e39936 Mon Sep 17 00:00:00 2001
From: Axlfc <axelfernandezcurros@gmail.com>
Date: Mon, 20 Mar 2023 11:49:35 +0100
Subject: [PATCH] UPDATED: Changes related to #6

---
 Content/Python/chatBot/BLOOM.py               |   9 +-
 Content/Python/chatBot/FLAN_T5.py             |   8 +-
 Content/Python/chatBot/GALACTICA.py           |   4 +
 Content/Python/chatBot/GPT-Neox.py            |  27 ----
 Content/Python/chatBot/GPT_4.py               | 149 ++++++++++++++++++
 .../chatBot/{GPT-J_6B.py => GPT_J_6B.py}      |   0
 Content/Python/chatBot/GPT_Neo.py             |  11 +-
 Content/Python/chatBot/OPT.py                 |  48 +++---
 Content/Python/chatBot/Pygmalion.py           |  68 +++++---
 Content/Python/chatBot/T5.py                  |  43 +++--
 Content/Python/chatBot/emotion.py             |  12 ++
 Content/Python/chatBot/mT5.py                 |  45 ++++--
 .../get_huggingface_model_disk_space.py       |  47 ++++++
 13 files changed, 361 insertions(+), 110 deletions(-)
 delete mode 100644 Content/Python/chatBot/GPT-Neox.py
 create mode 100644 Content/Python/chatBot/GPT_4.py
 rename Content/Python/chatBot/{GPT-J_6B.py => GPT_J_6B.py} (100%)
 create mode 100644 Content/Python/chatBot/model_card_scripts/get_huggingface_model_disk_space.py

diff --git a/Content/Python/chatBot/BLOOM.py b/Content/Python/chatBot/BLOOM.py
index 4c6cc76..03a86a0 100644
--- a/Content/Python/chatBot/BLOOM.py
+++ b/Content/Python/chatBot/BLOOM.py
@@ -3,9 +3,14 @@
 import torch
 
 
+models = ["bloom-560m", "bloom-1b1", "bloom-1b7", "bloom-3b", "bloom-7b1", "bloom"]
+current_model_name = "bigscience/" + models[0]
+
+
 def process_bot_answer(input_text, text_length=50):
-    model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m")
-    tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
+
+    model = AutoModelForCausalLM.from_pretrained(current_model_name)
+    tokenizer = AutoTokenizer.from_pretrained(current_model_name)
 
     # Tokenize the prompt and generate text using the BLOOM model
     inputs = tokenizer(input_text, return_tensors="pt")
diff --git a/Content/Python/chatBot/FLAN_T5.py b/Content/Python/chatBot/FLAN_T5.py
index 9873dbe..ca728d5 100644
--- a/Content/Python/chatBot/FLAN_T5.py
+++ b/Content/Python/chatBot/FLAN_T5.py
@@ -3,6 +3,10 @@
 # pip install -q transformers accelerate sentencepiece
 
 
+models = ["flan-t5-small", "flan-t5-base", "flan-t5-large", "flan-t5-xl", "flan-t5-xxl"]
+current_model_name = "google/" + models[0]
+
+
 def generate(input_text, tokenizer, model):
     input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
     outputs = model.generate(input_ids, max_length=200, bos_token_id=0)
@@ -11,8 +15,8 @@ def generate(input_text, tokenizer, model):
 
 
 def process_bot_answer(input_text):
-    tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
-    model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small", device_map="auto")
+    tokenizer = T5Tokenizer.from_pretrained(current_model_name)
+    model = T5ForConditionalGeneration.from_pretrained(current_model_name, device_map="auto")
 
     return generate(input_text, tokenizer, model)
 
diff --git a/Content/Python/chatBot/GALACTICA.py b/Content/Python/chatBot/GALACTICA.py
index e957d16..09036ef 100644
--- a/Content/Python/chatBot/GALACTICA.py
+++ b/Content/Python/chatBot/GALACTICA.py
@@ -3,6 +3,10 @@
 import sys
 
 
+models = ["galactica-125m", "galactica-1.3b", "galactica-6.7b", "galactica-30b", "galactica-120b"]
+current_model_name = "facebook/" + models[0]
+
+
 def process_bot_answer(input_text, text_length=200):
     tokenizer = AutoTokenizer.from_pretrained("facebook/galactica-125m")
     model = OPTForCausalLM.from_pretrained("facebook/galactica-125m", device_map="auto")
diff --git a/Content/Python/chatBot/GPT-Neox.py b/Content/Python/chatBot/GPT-Neox.py
deleted file mode 100644
index 055e271..0000000
--- a/Content/Python/chatBot/GPT-Neox.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from transformers import GPTNeoForCausalLM, GPT2Tokenizer
-
-
-def main():
-    model_name = "EleutherAI/gpt-neox-c"
-    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
-    model = GPTNeoForCausalLM.from_pretrained(model_name)
-
-    input_text = "I want to generate some text using GPT-Neox."
-    input_ids = tokenizer.encode(input_text, return_tensors="pt")
-
-    sample_outputs = model.generate(
-        input_ids,
-        do_sample=True,
-        max_length=50,
-        top_k=50,
-        top_p=0.95,
-        temperature=1.0,
-        num_return_sequences=1
-    )
-
-    output_text = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
-    print(output_text)
-
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
diff --git a/Content/Python/chatBot/GPT_4.py b/Content/Python/chatBot/GPT_4.py
new file mode 100644
index 0000000..a4cb0c3
--- /dev/null
+++ b/Content/Python/chatBot/GPT_4.py
@@ -0,0 +1,149 @@
+import gradio as gr
+import os
+import json
+import requests
+
+# Streaming endpoint
+API_URL = "https://api.openai.com/v1/chat/completions"  # os.getenv("API_URL") + "/generate_stream"
+
+# Testing with my Open AI Key
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+
+
+def predict(inputs, top_p, temperature, chat_counter, chatbot=[], history=[]):
+    payload = {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": f"{inputs}"}],
+        "temperature": 1.0,
+        "top_p": 1.0,
+        "n": 1,
+        "stream": True,
+        "presence_penalty": 0,
+        "frequency_penalty": 0,
+    }
+
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {OPENAI_API_KEY}"
+    }
+
+    print(f"chat_counter - {chat_counter}")
+    if chat_counter != 0:
+        messages = []
+        for data in chatbot:
+            temp1 = {}
+            temp1["role"] = "user"
+            temp1["content"] = data[0]
+            temp2 = {}
+            temp2["role"] = "assistant"
+            temp2["content"] = data[1]
+            messages.append(temp1)
+            messages.append(temp2)
+        temp3 = {}
+        temp3["role"] = "user"
+        temp3["content"] = inputs
+        messages.append(temp3)
+        # messages
+        payload = {
+            "model": "gpt-4",
+            "messages": messages,  # [{"role": "user", "content": f"{inputs}"}],
+            "temperature": temperature,  # 1.0,
+            "top_p": top_p,  # 1.0,
+            "n": 1,
+            "stream": True,
+            "presence_penalty": 0,
+            "frequency_penalty": 0,
+        }
+
+    chat_counter += 1
+
+    history.append(inputs)
+    print(f"payload is - {payload}")
+    # make a POST request to the API endpoint using the requests.post method, passing in stream=True
+    response = requests.post(API_URL, headers=headers, json=payload, stream=True)
+    print(f"response code - {response}")
+    token_counter = 0
+    partial_words = ""
+
+    counter = 0
+    for chunk in response.iter_lines():
+        # Skipping first chunk
+        if counter == 0:
+            counter += 1
+            continue
+        # counter+=1
+        # check whether each line is non-empty
+        if chunk.decode():
+            chunk = chunk.decode()
+            # decode each line as response data is in bytes
+            if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
+                # if len(json.loads(chunk.decode()[6:])['choices'][0]["delta"]) == 0:
+                #  break
+                partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
+                if token_counter == 0:
+                    history.append(" " + partial_words)
+                else:
+                    history[-1] = partial_words
+                chat = [(history[i], history[i + 1]) for i in
+                        range(0, len(history) - 1, 2)]  # convert to tuples of list
+                token_counter += 1
+                yield chat, history, chat_counter, response  # resembles {chatbot: chat, state: history}
+
+
+def reset_textbox():
+    return gr.update(value='')
+
+
+title = """<h1 align="center">🔥GPT4 with ChatCompletions API +🚀Gradio-Streaming</h1>"""
+description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
+```
+User: <utterance>
+Assistant: <utterance>
+User: <utterance>
+Assistant: <utterance>
+...
+```
+In this app, you can explore the outputs of a gpt-4 LLM.
+"""
+
+theme = gr.themes.Default(primary_hue="green")
+
+with gr.Blocks(css="""#col_container { margin-left: auto; margin-right: auto;}
+                #chatbot {height: 520px; overflow: auto;}""",
+               theme=theme) as demo:
+    gr.HTML(title)
+    gr.HTML(
+        """<h3 align="center">🔥This Huggingface Gradio Demo provides you full access to GPT4 API (4096 token limit). 🎉🥳🎉You don't need any OPENAI API key🙌</h1>""")
+    gr.HTML(
+        '''<center><a href="https://huggingface.co/spaces/ysharma/ChatGPT4?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space and run securely with your OpenAI API Key</center>''')
+    with gr.Column(elem_id="col_container"):
+        # GPT4 API Key is provided by Huggingface
+        # openai_api_key = gr.Textbox(type='password', label="Enter only your GPT4 OpenAI API key here")
+        chatbot = gr.Chatbot(elem_id='chatbot')  # c
+        inputs = gr.Textbox(placeholder="Hi there!", label="Type an input and press Enter")  # t
+        state = gr.State([])  # s
+        with gr.Row():
+            with gr.Column(scale=7):
+                b1 = gr.Button().style(full_width=True)
+            with gr.Column(scale=3):
+                server_status_code = gr.Textbox(label="Status code from OpenAI server", )
+
+        # inputs, top_p, temperature, top_k, repetition_penalty
+        with gr.Accordion("Parameters", open=False):
+            top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.05, interactive=True,
+                              label="Top-p (nucleus sampling)", )
+            temperature = gr.Slider(minimum=-0, maximum=5.0, value=1.0, step=0.1, interactive=True,
+                                    label="Temperature", )
+            # top_k = gr.Slider( minimum=1, maximum=50, value=4, step=1, interactive=True, label="Top-k",)
+            # repetition_penalty = gr.Slider( minimum=0.1, maximum=3.0, value=1.03, step=0.01, interactive=True, label="Repetition Penalty", )
+            chat_counter = gr.Number(value=0, visible=False, precision=0)
+
+    inputs.submit(predict, [inputs, top_p, temperature, chat_counter, chatbot, state],
+                  [chatbot, state, chat_counter, server_status_code], )  # openai_api_key
+    b1.click(predict, [inputs, top_p, temperature, chat_counter, chatbot, state],
+             [chatbot, state, chat_counter, server_status_code], )  # openai_api_key
+    b1.click(reset_textbox, [], [inputs])
+    inputs.submit(reset_textbox, [], [inputs])
+
+    # gr.Markdown(description)
+    demo.queue(max_size=20, concurrency_count=10).launch(debug=True)
\ No newline at end of file
diff --git a/Content/Python/chatBot/GPT-J_6B.py b/Content/Python/chatBot/GPT_J_6B.py
similarity index 100%
rename from Content/Python/chatBot/GPT-J_6B.py
rename to Content/Python/chatBot/GPT_J_6B.py
diff --git a/Content/Python/chatBot/GPT_Neo.py b/Content/Python/chatBot/GPT_Neo.py
index 8409e04..f760746 100644
--- a/Content/Python/chatBot/GPT_Neo.py
+++ b/Content/Python/chatBot/GPT_Neo.py
@@ -4,15 +4,18 @@
 import torch
 
 
+models = ["gpt-neo-125M", "gpt-neo-1.3B", "gpt-neo-2.7B"]
+current_model_name = "EleutherAI/" + models[0]
+
+
 def process_bot_answer(input_text, text_length=50):
-    model_name = "EleutherAI/gpt-neo-125M"
-    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
+    tokenizer = GPT2Tokenizer.from_pretrained(current_model_name)
 
-    model = GPTNeoForCausalLM.from_pretrained(model_name)
+    model = GPTNeoForCausalLM.from_pretrained(current_model_name)
     model.to('cuda')
     input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
     # input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cpu")
-    generator = pipeline('text-generation', model='EleutherAI/gpt-neo-2.7B')
+    generator = pipeline('text-generation', model=current_model_name)
 
     # A 3000 value will produce a buffer overflow so we need to prevent that.
 
diff --git a/Content/Python/chatBot/OPT.py b/Content/Python/chatBot/OPT.py
index 4cc3d00..7146c91 100644
--- a/Content/Python/chatBot/OPT.py
+++ b/Content/Python/chatBot/OPT.py
@@ -1,29 +1,31 @@
-from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers import pipeline, set_seed
+import sys
+
+models = ["opt-125m", "opt-350m", "opt-1.3b", "opt-2.7b", "opt-6.7b", "opt-13b", "opt-30b", "opt-66b"]
+current_model_name = "facebook/" + models[0]
+
+
+def process_bot_answer(input_text, seed=None):
+    if seed is None:
+        generator = pipeline('text-generation', model=current_model_name)
+    else:
+        set_seed(seed)
+        generator = pipeline('text-generation', model=current_model_name, do_sample=True)
+    return generator(input_text)[0]["generated_text"]
 
 
 def main():
-    model_name = "facebook/opt-6.7b"
-    # "facebook/opt-6.7b"
-    # "facebook/opt-2.7b"
-    # "facebook/opt-1.3b"
-    # facebook/opt-350m"
-
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(model_name)
-    input_text = "I want to generate some text using OPT 6.7b."
-    input_ids = tokenizer.encode(input_text, return_tensors="pt")
-
-    sample_outputs = model.generate(
-        input_ids,
-        do_sample=True,
-        max_length=50,
-        top_k=0,
-        temperature=0.7,
-        num_return_sequences=1
-    )
-
-    output_text = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
-    print(output_text)
+    if len(sys.argv) < 2:
+        print("Please provide a text prompt as the first argument or a text prompt and a seed.")
+        return
+
+    if len(sys.argv) == 2:
+        answer = process_bot_answer(sys.argv[1])
+    elif len(sys.argv) == 3:
+        answer = process_bot_answer(sys.argv[1], int(sys.argv[2]))
+
+    print(answer)
+    return answer
 
 
 if __name__ == '__main__':
diff --git a/Content/Python/chatBot/Pygmalion.py b/Content/Python/chatBot/Pygmalion.py
index 5e276df..d678712 100644
--- a/Content/Python/chatBot/Pygmalion.py
+++ b/Content/Python/chatBot/Pygmalion.py
@@ -1,29 +1,49 @@
-from transformers import GPT2LMHeadModel, GPT2Tokenizer
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from transformers import pipeline
+import sys
+
+models = ["pygmalion-350m", "pygmalion-1.3b", "pygmalion-2.7b", "pygmalion-6b"]
+current_model_name = "PygmalionAI/" + models[0]
+
+
+def generate(article, length=256):
+    generator = pipeline('text-generation', model=current_model_name)
+    outputs = generator(article, do_sample=True, max_length=length, num_return_sequences=5)
+
+    return [s["generated_text"] for s in outputs]
+
+
+def process_bot_answer(input_text):
+    candidates = generate(input_text)
+
+    tokenizer = AutoTokenizer.from_pretrained("gpt2")
+    model = AutoModelForSequenceClassification.from_pretrained("ChaiML/gpt2_base_retry_and_continue_12m_reward_model")
+    # model = AutoModelForSequenceClassification.from_pretrained(current_model_name)
+    tokenizer.pad_token_id = 50256
+    tokenizer.truncation_side = "left"
+    tokenizer.padding_side = "right"
+    tokens = tokenizer(candidates, return_tensors='pt', return_attention_mask=True, padding='longest', truncation=True,
+                       max_length=256)
+    reward = model(**tokens).logits[:, 1]
+    idx = reward.argmax()
+
+    chosen_reply = candidates[idx][len(input_text):]
+
+    return chosen_reply
+
 
 def main():
-    model_name = "PygmalionAI/pygmalion-6b"
-    # "PygmalionAI/pygmalion-6b"
-    # "PygmalionAI/pygmalion-2.7b"
-    # "PygmalionAI/pygmalion-1.3b"
-    # "PygmalionAI/pygmalion-350m"
-
-    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
-    model = GPT2LMHeadModel.from_pretrained(model_name)
-    input_text = "I want to generate some text using Pygmalion."
-    input_ids = tokenizer.encode(input_text, return_tensors="pt")
-
-    sample_outputs = model.generate(
-        input_ids,
-        do_sample=True,
-        max_length=50,
-        top_k=50,
-        top_p=0.95,
-        temperature=1.0,
-        num_return_sequences=1
-    )
-
-    output_text = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
-    print(output_text)
+    if len(sys.argv) < 2:
+        print("Please provide a text prompt as the first argument.")
+        return
+
+    if len(sys.argv) == 2:
+        # Using default model as language_model
+        print(process_bot_answer(sys.argv[1]))
+        return process_bot_answer(sys.argv[1])
+    elif len(sys.argv) == 3:
+        print(process_bot_answer(sys.argv[1], int(sys.argv[2])))
+        return process_bot_answer(sys.argv[1], int(sys.argv[2]))
 
 
 if __name__ == '__main__':
diff --git a/Content/Python/chatBot/T5.py b/Content/Python/chatBot/T5.py
index 42425b8..5f96192 100644
--- a/Content/Python/chatBot/T5.py
+++ b/Content/Python/chatBot/T5.py
@@ -1,22 +1,37 @@
-import torch
-from transformers import T5ForConditionalGeneration, T5Tokenizer
+from transformers import MT5Model, T5Tokenizer
+import sys
+# pip3 install sentencepiece requests_html
 
+models = ["t5-small", "t5-base", "t5-large", "t5-3b", "t5-11b"]
+current_model_name = models[0]
 
-def main():
-    # Load the T5 model and tokenizer
-    model = T5ForConditionalGeneration.from_pretrained('t5-base')
-    tokenizer = T5Tokenizer.from_pretrained('t5-base')
 
-    # Define your prompt text
-    prompt = 'The quick brown fox'
+def generate(article, summary, tokenizer, model):
+    inputs = tokenizer(article, return_tensors="pt")
+    labels = tokenizer(text_target=summary, return_tensors="pt")
+
+    outputs = model(input_ids=inputs["input_ids"], decoder_input_ids=labels["input_ids"])
+
+    hidden_states = outputs.last_hidden_state
+
+    return outputs
+
 
-    # Generate the text
-    input_ids = tokenizer.encode(prompt, return_tensors='pt')
-    outputs = model.generate(input_ids=input_ids, max_length=100, do_sample=True)
+def process_bot_answer(input_text, summary_text):
+    model = MT5Model.from_pretrained(current_model_name)
+    tokenizer = T5Tokenizer.from_pretrained(current_model_name)
+
+    return generate(input_text, summary_text, tokenizer, model)
+
+
+def main():
+    if len(sys.argv) < 3:
+        print("Please provide a text prompt as the first argument and a summary as the second.")
+        return
 
-    # Decode the generated text and print it
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    print(generated_text)
+    answer = process_bot_answer(sys.argv[1], sys.argv[2])
+    print(answer)
+    return answer
 
 
 if __name__ == '__main__':
diff --git a/Content/Python/chatBot/emotion.py b/Content/Python/chatBot/emotion.py
index 1ba24d6..e654721 100644
--- a/Content/Python/chatBot/emotion.py
+++ b/Content/Python/chatBot/emotion.py
@@ -6,6 +6,18 @@
 from nltk.collocations import BigramAssocMeasures, BigramCollocationFinder
 #nltk.download('vader_lexicon')
 
+from transformers import pipeline
+
+classifier = pipeline(
+    "text-classification", 
+    model="j-hartmann/emotion-english-distilroberta-base", 
+    return_all_scores=True
+)
+
+def classify(txt_input):
+	return classifier(txt_input)
+	
+
 # Compile a large corpus of text data
 corpus = "I feel sad today. I am very sad. The sky is cloudy and it's raining. I don't want to go outside."
 
diff --git a/Content/Python/chatBot/mT5.py b/Content/Python/chatBot/mT5.py
index 1da59de..1df120c 100644
--- a/Content/Python/chatBot/mT5.py
+++ b/Content/Python/chatBot/mT5.py
@@ -1,20 +1,37 @@
-from transformers import MT5ForConditionalGeneration, MT5Tokenizer
+from transformers import MT5Model, AutoTokenizer
+import sys
+# pip3 install sentencepiece requests_html
+
+models = ["mt5-small", "mt5-base", "mt5-large", "mt5-xl", "mt5-xxl"]
+current_model_name = "google/" + models[0]
+
+
+def generate(article, summary, tokenizer, model):
+    inputs = tokenizer(article, return_tensors="pt")
+    labels = tokenizer(text_target=summary, return_tensors="pt")
+
+    outputs = model(input_ids=inputs["input_ids"], decoder_input_ids=labels["input_ids"])
+
+    hidden_states = outputs.last_hidden_state
+
+    return outputs
+
+
+def process_bot_answer(input_text, summary_text):
+    model = MT5Model.from_pretrained(current_model_name)
+    tokenizer = AutoTokenizer.from_pretrained(current_model_name)
+
+    return generate(input_text, summary_text, tokenizer, model)
 
 
 def main():
-    # Load the mT5 model and tokenizer
-    model_name = 'google/mt5-small'
-    tokenizer = MT5Tokenizer.from_pretrained(model_name)
-    model = MT5ForConditionalGeneration.from_pretrained(model_name)
-
-    # Set the input prompt and generate text
-    prompt = "translate English to French: Hello, how are you?"
-    inputs = tokenizer.encode(prompt, return_tensors="pt")
-    outputs = model.generate(inputs, max_length=128, num_beams=4, early_stopping=True)
-
-    # Decode the output and print the generated text
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    print(generated_text)
+    if len(sys.argv) < 3:
+        print("Please provide a text prompt as the first argument and a summary as the second.")
+        return
+
+    answer = process_bot_answer(sys.argv[1], sys.argv[2])
+    print(answer)
+    return answer
 
 
 if __name__ == '__main__':
diff --git a/Content/Python/chatBot/model_card_scripts/get_huggingface_model_disk_space.py b/Content/Python/chatBot/model_card_scripts/get_huggingface_model_disk_space.py
new file mode 100644
index 0000000..86aaecf
--- /dev/null
+++ b/Content/Python/chatBot/model_card_scripts/get_huggingface_model_disk_space.py
@@ -0,0 +1,47 @@
+from bs4 import BeautifulSoup
+import requests
+import sys
+
+
+current_page = "https://huggingface.co/" + sys.argv[1] + "/tree/main"
+
+
+# Description: This function gives back a request to an url of a webpage and returns the HTML content
+# Arguments: url of the webpage
+def get_webpage(url):
+    # Hacer una solicitud GET a la página web
+    url = url
+    response = requests.get(url)
+    return BeautifulSoup(response.content, 'html.parser')
+
+
+def main():
+    soup = get_webpage(current_page)
+    items = soup.find_all('a')
+
+    test_list = []
+    for i in items:
+        text = i.text
+        if (".bin" in text and "pytorch" in text) or ("GB" in text or "MB" in text):
+            test_list.append(str(text))
+
+    print("Test LIST:\t", test_list)
+
+    odd_i = []
+    even_i = []
+    for i in range(0, len(test_list)):
+        if i % 2:
+            even_i.append(test_list[i])
+        else:
+            odd_i.append(test_list[i])
+
+    n = float(0)
+    for size in even_i:
+        print("SIZE =\t", size)
+        n += float(size.split("\n")[0].split(" ")[0])
+
+    print("The " + sys.argv[1].split("/")[1] + " model disk size is " + str(n) + " GB")
+
+
+if __name__ == '__main__':
+    main()