AI-Commandos · sjoerdoffringa · Nov 16, 2024 · Nov 20, 2024 · Nov 25, 2024 · Nov 28, 2024
diff --git a/Colab_RAG_Eval.ipynb b/Colab_RAG_Eval.ipynb
@@ -0,0 +1,136 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Prepare Colab environment"
+      ],
+      "metadata": {
+        "id": "7qU5hiy9W3Wc"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "id": "AxBJHd2ckD0c"
+      },
+      "outputs": [],
+      "source": [
+        "# clone github repo\n",
+        "!git clone https://github.com/sjoerdoffringa/RAGMeUp.git\n",
+        "\n",
+        "# delete torch from requirements for Colab\n",
+        "!sed -i '/torch/d' RAGMeUp/server/requirements.txt\n",
+        "\n",
+        "# change ragas version\n",
+        "!sed -i '/ragas/d' RAGMeUp/server/requirements.txt && echo \"ragas==0.2.6\" >> RAGMeUp/server/requirements.txt\n",
+        "\n",
+        "# install requirements\n",
+        "!pip install -r RAGMeUp/server/requirements.txt"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {
+        "id": "Ia_OPalmnHKo"
+      },
+      "outputs": [],
+      "source": [
+        "# set working directory in server folder\n",
+        "%cd RAGMeUp/server\n",
+        "\n",
+        "# copy environment template\n",
+        "!mv .env.evaltemplate .env"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "id": "wX4rdS7RDRR2"
+      },
+      "outputs": [],
+      "source": [
+        "from huggingface_hub import login\n",
+        "from dotenv import load_dotenv\n",
+        "import os\n",
+        "\n",
+        "!git config --global credential.helper store\n",
+        "\n",
+        "# login by inserting token manually\n",
+        "!huggingface-cli login"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Run scripts"
+      ],
+      "metadata": {
+        "id": "JOHcyUi1XOV8"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Run testset generation\n",
+        "\n",
+        "# load environment\n",
+        "load_dotenv()\n",
+        "\n",
+        "# change environment variables\n",
+        "os.environ['eval_qa_pairs'] = \"5\"\n",
+        "os.environ['llm_model'] = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
+        "\n",
+        "# run script\n",
+        "!python eval_create_testset.py"
+      ],
+      "metadata": {
+        "id": "O5sa6kRJWG8G"
+      },
+      "execution_count": 8,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Run RAG evaluation\n",
+        "\n",
+        "# load environment\n",
+        "load_dotenv()\n",
+        "\n",
+        "# change environment variables\n",
+        "os.environ[\"llm_model\"] = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
+        "os.environ[\"rerank_model\"] = \"cross-encoder/ms-marco-TinyBERT-L-2-v2\"\n",
+        "os.environ[\"eval_testset_directory\"] = \"testsets/30QA/\"\n",
+        "os.environ[\"eval_RAG_instance_name\"] = \"3.1-8B_TinyBERT\"\n",
+        "\n",
+        "# run script\n",
+        "!python eval_evaluate_RAG.py"
+      ],
+      "metadata": {
+        "id": "veM8VorXZWnA"
+      },
+      "execution_count": 9,
+      "outputs": []
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/README.md b/README.md
diff --git a/server/.env.evaltemplate b/server/.env.evaltemplate
@@ -0,0 +1,113 @@
+HF_TOKEN=
+llm_model=meta-llama/Meta-Llama-3.1-8B-Instruct
+llm_assistant_token="<|eot_id|>assistant\n\n"
+embedding_model=avsolatorio/GIST-small-Embedding-v0
+trust_remote_code=True
+force_cpu=False
+
+provenance_method=rerank
+provenance_similarity_llm=sentence-transformers/distiluse-base-multilingual-cased-v2
+provenance_include_query=False
+provenance_llm_prompt="Instruction: You are a provenance auditor that needs to exactly determine how much an answer given to a user question was based on a given input document, knowing that more than just that one document were considered. Documents may be fully used verbatim, partially used or even translated. You need to give a score indicating how much a source document was used in creating the answer given to a user query, this score must be 0 = source document is not used at all, 1 = barely used, 2 = moderately used, 3 = mostly used, 4 = almost fully used and 5 = full text included in answer. You are forced to always answer only with the score from 0 to 5, don't explain yourself or add more text than just the score.
+
+The user's query is:
+
+{query}
+
+The answer given is to this user query is:
+
+{answer}
+
+The source document that you need to score is the following:
+
+{context}"
+
+data_directory='data'
+file_types="pdf,json,docx,pptx,xslx,csv,xml"
+json_schema="."
+json_text_content=False
+xml_xpath="//"
+
+vector_store=milvus
+vector_store_uri='data.db'
+vector_store_collection=ragmeup_documents
+vector_store_sparse_uri=bm25_db.pickle
+vector_store_initial_load=True
+vector_store_k=10
+document_chunks_pickle=rag_chunks.pickle
+rerank=True
+rerank_k=1
+rerank_model=flashrank
+
+temperature=0.2
+repetition_penalty=1.1
+max_new_tokens=1000
+
+rag_instruction="Instruction: You are a digital librarian that can answer generic questions on relevant content quickly and succinctly. Here are a few documents from the library that you can use to answer the user's question, retrieved as documents from a database. Be sure to motivate your answer and always mention your source, so which of the documents you used to formulate the answer:
+
+{context}"
+rag_question_initial="The initial question you have to answer:
+
+{question}"
+rag_question_followup="The follow-up question you have to answer:
+
+{question}"
+rag_fetch_new_instruction="Instruction: You are a digital librarian with a database that contains relevant documents for user queries. Users want to ask questions based on those documents and ask questions that either need you to fetch new documents from the database or that are a followup question on previously obtained documents. You need to decide whether you are going to fetch new documents or whether the user is asking a follow-up question but you don't get to see the actual documents the user potentially is looking at.\nShould new documents be fetched from the database based on this user query? Answer with yes or no."
+rag_fetch_new_question="The user question is the following: \"{question}\"\n"
+
+use_rewrite_loop=False
+rewrite_query_instruction="You have to answer a user question based on documents retrieved from a document database. It is your task to decide whether or not the documents contain the answer to the user's query. You can always only answer with exactly yes or no. The documents that are currently fetched from the database are:
+
+{context}"
+rewrite_query_question="The user's question is:
+
+{question}"
+rewrite_query_prompt="You are given a user query that should be answered by looking up documents that from a document store using a distance based similarity measure. The documents fetched from the document store were found to be irrelevant to answer the question. Rewrite the following question into an alternative that increases the likelihood of finding relevant documents from the database. You may only answer with the exact rephrasing. The original question is: {question}"
+
+use_re2=True
+re2_prompt="Read the question again: "
+
+splitter='RecursiveCharacterTextSplitter'
+chunk_size=1024
+chunk_overlap=40
+breakpoint_threshold_type=percentile
+breakpoint_threshold_amount=None
+number_of_chunks=None
+
+use_openai=False
+openai_model_name='gpt-4o-mini'
+use_gemini=False
+gemini_model_name='gemini-pro'
+use_azure=False
+use_ollama=False
+ollama_model='llama3.1'
+
+eval_sample_size=200
+eval_qa_pairs=10
+eval_timeout=300
+eval_max_workers=1
+
+eval_catch_irrelevant_chunks=False
+eval_catch_irrelevant_chunks_prompt="If it does not make sense to ask a question about the document at all, reply only with 'None'"
+eval_check_sample_relevance=False
+eval_check_sample_relevance_instruction="You judge documents on the potential to ask a meaningful question about its content. if this is the case, reply with 'True'. If not, reply with 'False'. You cannot reply with anything else. Document: {context}"
+eval_check_sample_relevance_query="Judge the document on its potential to ask a meaningful question about its content. Output nothing else but 'True' or 'False'."
+eval_retrieve_samples=False
+eval_retrieve_samples_folder=1
+eval_use_example_questions=False
+eval_example_questions=None
+eval_example_questions_prompt="Here are a few example questions. generate the question in a similar fashion:\n"
+eval_question_instruction="You direct another LLM with questions. Write a question we can ask to an LLM that it will be able to answer based on these existing documents. Make sure the question can be accurately answered using the documents' contents and never ever reply with anything else but the question we need to supply to the LLM:
+
+{context}"
+eval_question_query="Generate a question to that can be answered given the input documents, nothing else but the question and no explanation."
+eval_answer_instruction="You are a digital librarian and need to answer questions based on input documents. Here are the documents you are forced to base your answer on:
+
+{context}"
+eval_answer_query="Answer the following question, never give any explanation or other output than the generated article itself:
+
+{question}"
+
+eval_testset_directory='testsets/30QA/'
+eval_RAG_instance_name='Model1'
+eval_ragas=False
diff --git a/server/.env.template b/server/.env.template
@@ -82,7 +82,7 @@ use_azure=False
 use_ollama=False
 ollama_model='llama3.1'
 
-ragas_sample_size=200
+ragas_sample_size=5 # default is 200
 ragas_qa_pairs=10
 ragas_timeout=300
 ragas_max_workers=1

diff --git a/server/data/2021-SSCI-AComparativeStudyOfFuzzyTopicModelsAndLDAInTermsOfInterpretability.pdf b/server/data/2021-SSCI-AComparativeStudyOfFuzzyTopicModelsAndLDAInTermsOfInterpretability.pdf
diff --git a/server/data/2023-IFSA-InterpretingTopicModelsWithChatGPT (1).pdf b/server/data/2023-IFSA-InterpretingTopicModelsWithChatGPT (1).pdf
diff --git a/server/data/A Joint Introduction to Natural language processing and deep learning.pdf b/server/data/A Joint Introduction to Natural language processing and deep learning.pdf
diff --git a/server/data/BERTopic-NeuralTopicModelingWithAClassBasedTF-IDFprocedure.pdf b/server/data/BERTopic-NeuralTopicModelingWithAClassBasedTF-IDFprocedure.pdf
diff --git a/server/data/Efficient Estimation of Word Representations in vector space.pdf b/server/data/Efficient Estimation of Word Representations in vector space.pdf
diff --git a/server/data/LanguageModelsAreFewShotLearners.pdf b/server/data/LanguageModelsAreFewShotLearners.pdf
diff --git a/server/data/LatenDirichletAllocation.pdf b/server/data/LatenDirichletAllocation.pdf
diff --git a/server/data/attention is all you need.pdf b/server/data/attention is all you need.pdf
diff --git a/server/data/enriching word vectors with subword information.pdf b/server/data/enriching word vectors with subword information.pdf