AI-Commandos · Naoom01 · Dec 2, 2024 · Dec 2, 2024 · Dec 2, 2024 · Dec 2, 2024
diff --git a/README.md b/README.md
diff --git a/server/.env.template → server/.env b/server/.env.template → server/.env
@@ -1,98 +1,98 @@
-HF_TOKEN=
-llm_model=meta-llama/Meta-Llama-3.1-8B-Instruct
-llm_assistant_token="<|eot_id|>assistant\n\n"
-embedding_model=avsolatorio/GIST-small-Embedding-v0
-trust_remote_code=True
-force_cpu=False
-
-provenance_method=rerank
-provenance_similarity_llm=sentence-transformers/distiluse-base-multilingual-cased-v2
-provenance_include_query=False
-provenance_llm_prompt="Instruction: You are a provenance auditor that needs to exactly determine how much an answer given to a user question was based on a given input document, knowing that more than just that one document were considered. Documents may be fully used verbatim, partially used or even translated. You need to give a score indicating how much a source document was used in creating the answer given to a user query, this score must be 0 = source document is not used at all, 1 = barely used, 2 = moderately used, 3 = mostly used, 4 = almost fully used and 5 = full text included in answer. You are forced to always answer only with the score from 0 to 5, don't explain yourself or add more text than just the score.
-
-The user's query is:
-
-{query}
-
-The answer given is to this user query is:
-
-{answer}
-
-The source document that you need to score is the following:
-
-{context}"
-
-data_directory='data'
-file_types="pdf,json,docx,pptx,xslx,csv,xml"
-json_schema="."
-json_text_content=False
-xml_xpath="//"
-
-vector_store=milvus
-vector_store_uri='data.db'
-vector_store_collection=ragmeup_documents
-vector_store_sparse_uri=bm25_db.pickle
-vector_store_initial_load=True
-vector_store_k=10
-document_chunks_pickle=rag_chunks.pickle
-rerank=True
-rerank_k=3
-rerank_model=flashrank
-
-temperature=0.2
-repetition_penalty=1.1
-max_new_tokens=1000
-
-rag_instruction="Instruction: You are a digital librarian that can answer generic questions on relevant content quickly and succinctly. Here are a few documents from the library that you can use to answer the user's question, retrieved as documents from a database. Be sure to motivate your answer and always mention your source, so which of the documents you used to formulate the answer:
-
-{context}"
-rag_question_initial="The initial question you have to answer:
-
-{question}"
-rag_question_followup="The follow-up question you have to answer:
-
-{question}"
-rag_fetch_new_instruction="Instruction: You are a digital librarian with a database that contains relevant documents for user queries. Users want to ask questions based on those documents and ask questions that either need you to fetch new documents from the database or that are a followup question on previously obtained documents. You need to decide whether you are going to fetch new documents or whether the user is asking a follow-up question but you don't get to see the actual documents the user potentially is looking at.\nShould new documents be fetched from the database based on this user query? Answer with yes or no."
-rag_fetch_new_question="The user question is the following: \"{question}\"\n"
-
-use_rewrite_loop=True
-rewrite_query_instruction="You have to answer a user question based on documents retrieved from a document database. It is your task to decide whether or not the documents contain the answer to the user's query. You can always only answer with exactly yes or no. The documents that are currently fetched from the database are:
-
-{context}"
-rewrite_query_question="The user's question is:
-
-{question}"
-rewrite_query_prompt="You are given a user query that should be answered by looking up documents that from a document store using a distance based similarity measure. The documents fetched from the document store were found to be irrelevant to answer the question. Rewrite the following question into an alternative that increases the likelihood of finding relevant documents from the database. You may only answer with the exact rephrasing. The original question is: {question}"
-
-use_re2=True
-re2_prompt="Read the question again: "
-
-splitter='RecursiveCharacterTextSplitter'
-chunk_size=512
-chunk_overlap=20
-breakpoint_threshold_type=percentile
-breakpoint_threshold_amount=None
-number_of_chunks=None
-
-use_openai=False
-openai_model_name='gpt-4o-mini'
-use_gemini=False
-gemini_model_name='gemini-pro'
-use_azure=False
-use_ollama=False
-ollama_model='llama3.1'
-
-ragas_sample_size=200
-ragas_qa_pairs=10
-ragas_timeout=300
-ragas_max_workers=1
-ragas_question_instruction="You direct another LLM with questions. Write a question we can ask to an LLM that it will be able to answer based on these existing documents. Make sure the question can be accurately answered using the documents' contents and never ever reply with anything else but the question we need to supply to the LLM:
-
-{context}"
-ragas_question_query="Generate a question to that can be answered given the input documents, nothing else but the question and no explanation."
-ragas_answer_instruction="You are a digital librarian and need to answer questions based on input documents. Here are the documents you are forced to base your answer on:
-
-{context}"
-ragas_answer_query="Answer the following question, never give any explanation or other output than the generated article itself:
-
-{question}"
+HF_TOKEN=
+llm_model=meta-llama/Meta-Llama-3.1-8B-Instruct
+llm_assistant_token="<|eot_id|>assistant\n\n"
+embedding_model=avsolatorio/GIST-small-Embedding-v0
+trust_remote_code=True
+force_cpu=False
+
+provenance_method=rerank
+provenance_similarity_llm=sentence-transformers/distiluse-base-multilingual-cased-v2
+provenance_include_query=False
+provenance_llm_prompt="Instruction: You are a provenance auditor that needs to exactly determine how much an answer given to a user question was based on a given input document, knowing that more than just that one document were considered. Documents may be fully used verbatim, partially used or even translated. You need to give a score indicating how much a source document was used in creating the answer given to a user query, this score must be 0 = source document is not used at all, 1 = barely used, 2 = moderately used, 3 = mostly used, 4 = almost fully used and 5 = full text included in answer. You are forced to always answer only with the score from 0 to 5, don't explain yourself or add more text than just the score.
+
+The user's query is:
+
+{query}
+
+The answer given is to this user query is:
+
+{answer}
+
+The source document that you need to score is the following:
+
+{context}"
+
+data_directory='data'
+file_types="pdf,json,docx,pptx,xslx,csv,xml"
+json_schema="."
+json_text_content=False
+xml_xpath="//"
+
+vector_store=milvus
+vector_store_uri='data.db'
+vector_store_collection=ragmeup_documents
+vector_store_sparse_uri=bm25_db.pickle
+vector_store_initial_load=True
+vector_store_k=10
+document_chunks_pickle=rag_chunks.pickle
+rerank=True
+rerank_k=3
+rerank_model=ScoredCrossEncoderReranker
+
+temperature=0.2
+repetition_penalty=1.1
+max_new_tokens=1000
+
+rag_instruction="Instruction: You are a digital librarian that can answer generic questions on relevant content quickly and succinctly. Here are a few documents from the library that you can use to answer the user's question, retrieved as documents from a database. Be sure to motivate your answer and always mention your source, so which of the documents you used to formulate the answer:
+
+{context}"
+rag_question_initial="The initial question you have to answer:
+
+{question}"
+rag_question_followup="The follow-up question you have to answer:
+
+{question}"
+rag_fetch_new_instruction="Instruction: You are a digital librarian with a database that contains relevant documents for user queries. Users want to ask questions based on those documents and ask questions that either need you to fetch new documents from the database or that are a followup question on previously obtained documents. You need to decide whether you are going to fetch new documents or whether the user is asking a follow-up question but you don't get to see the actual documents the user potentially is looking at.\nShould new documents be fetched from the database based on this user query? Answer with yes or no."
+rag_fetch_new_question="The user question is the following: \"{question}\"\n"
+
+use_rewrite_loop=True
+rewrite_query_instruction="You have to answer a user question based on documents retrieved from a document database. It is your task to decide whether or not the documents contain the answer to the user's query. You can always only answer with exactly yes or no. The documents that are currently fetched from the database are:
+
+{context}"
+rewrite_query_question="The user's question is:
+
+{question}"
+rewrite_query_prompt="You are given a user query that should be answered by looking up documents that from a document store using a distance based similarity measure. The documents fetched from the document store were found to be irrelevant to answer the question. Rewrite the following question into an alternative that increases the likelihood of finding relevant documents from the database. You may only answer with the exact rephrasing. The original question is: {question}"
+
+use_re2=True
+re2_prompt="Read the question again: "
+
+splitter='RecursiveCharacterTextSplitter'
+chunk_size=512
+chunk_overlap=20
+breakpoint_threshold_type=percentile
+breakpoint_threshold_amount=None
+number_of_chunks=None
+
+use_openai=False
+openai_model_name='gpt-4o-mini'
+use_gemini=False
+gemini_model_name='gemini-pro'
+use_azure=False
+use_ollama=False
+ollama_model='llama3.1'
+
+ragas_sample_size=200
+ragas_qa_pairs=10
+ragas_timeout=300
+ragas_max_workers=1
+ragas_question_instruction="You direct another LLM with questions. Write a question we can ask to an LLM that it will be able to answer based on these existing documents. Make sure the question can be accurately answered using the documents' contents and never ever reply with anything else but the question we need to supply to the LLM:
+
+{context}"
+ragas_question_query="Generate a question to that can be answered given the input documents, nothing else but the question and no explanation."
+ragas_answer_instruction="You are a digital librarian and need to answer questions based on input documents. Here are the documents you are forced to base your answer on:
+
+{context}"
+ragas_answer_query="Answer the following question, never give any explanation or other output than the generated article itself:
+
+{question}"
diff --git a/server/.flashrank_cache/__MACOSX/._ms-marco-MiniLM-L-12-v2 b/server/.flashrank_cache/__MACOSX/._ms-marco-MiniLM-L-12-v2
diff --git a/server/.flashrank_cache/__MACOSX/ms-marco-MiniLM-L-12-v2/._.DS_Store b/server/.flashrank_cache/__MACOSX/ms-marco-MiniLM-L-12-v2/._.DS_Store
diff --git a/server/.flashrank_cache/__MACOSX/ms-marco-MiniLM-L-12-v2/._config.json b/server/.flashrank_cache/__MACOSX/ms-marco-MiniLM-L-12-v2/._config.json
diff --git a/server/.flashrank_cache/__MACOSX/ms-marco-MiniLM-L-12-v2/._flashrank-MiniLM-L-12-v2_Q.onnx b/server/.flashrank_cache/__MACOSX/ms-marco-MiniLM-L-12-v2/._flashrank-MiniLM-L-12-v2_Q.onnx
diff --git a/server/.flashrank_cache/__MACOSX/ms-marco-MiniLM-L-12-v2/._special_tokens_map.json b/server/.flashrank_cache/__MACOSX/ms-marco-MiniLM-L-12-v2/._special_tokens_map.json
diff --git a/server/.flashrank_cache/__MACOSX/ms-marco-MiniLM-L-12-v2/._tokenizer.json b/server/.flashrank_cache/__MACOSX/ms-marco-MiniLM-L-12-v2/._tokenizer.json
diff --git a/server/.flashrank_cache/__MACOSX/ms-marco-MiniLM-L-12-v2/._tokenizer_config.json b/server/.flashrank_cache/__MACOSX/ms-marco-MiniLM-L-12-v2/._tokenizer_config.json
diff --git a/server/.flashrank_cache/ms-marco-MiniLM-L-12-v2/.DS_Store b/server/.flashrank_cache/ms-marco-MiniLM-L-12-v2/.DS_Store
diff --git a/server/.flashrank_cache/ms-marco-MiniLM-L-12-v2/config.json b/server/.flashrank_cache/ms-marco-MiniLM-L-12-v2/config.json
@@ -0,0 +1,31 @@
+{
+  "_name_or_path": "nreimers/BERT-Tiny_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.4.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522,
+  "sbert_ce_default_activation_function": "torch.nn.modules.linear.Identity"
+}
diff --git a/server/.flashrank_cache/ms-marco-MiniLM-L-12-v2/flashrank-MiniLM-L-12-v2_Q.onnx b/server/.flashrank_cache/ms-marco-MiniLM-L-12-v2/flashrank-MiniLM-L-12-v2_Q.onnx
diff --git a/server/.flashrank_cache/ms-marco-MiniLM-L-12-v2/special_tokens_map.json b/server/.flashrank_cache/ms-marco-MiniLM-L-12-v2/special_tokens_map.json
@@ -0,0 +1 @@
+{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
diff --git a/server/.flashrank_cache/ms-marco-MiniLM-L-12-v2/tokenizer.json b/server/.flashrank_cache/ms-marco-MiniLM-L-12-v2/tokenizer.json
diff --git a/server/.flashrank_cache/ms-marco-MiniLM-L-12-v2/tokenizer_config.json b/server/.flashrank_cache/ms-marco-MiniLM-L-12-v2/tokenizer_config.json
@@ -0,0 +1 @@
+{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": "/home/ukp-reimers/.cache/huggingface/transformers/448f85f42d7f87f0254da1997bc5cd60cb4607800084132993017232e82432a3.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "name_or_path": "nreimers/BERT-Tiny_L-2_H-128_A-2", "do_basic_tokenize": true, "never_split": null}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": "/home/ukp-reimers/.cache/huggingface/transformers/448f85f42d7f87f0254da1997bc5cd60cb4607800084132993017232e82432a3.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "name_or_path": "nreimers/BERT-Tiny_L-2_H-128_A-2", "do_basic_tokenize": true, "never_split": null}