From b2795d6b15f2feef3b282bc2f9b26064eb9fa367 Mon Sep 17 00:00:00 2001 From: Rasmus Date: Fri, 1 Dec 2023 09:15:47 +0100 Subject: [PATCH 01/19] dotenv in requirements was wrong. It is now fixed --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 223145d..bdce480 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ requests==2.31.0 strsimpy==0.2.1 mock==5.1.0 -dotenv==0.21.0 \ No newline at end of file +python-dotenv==0.21.0 \ No newline at end of file From e5650c0c00f8987b80fb4724ea31df3c1c50e875 Mon Sep 17 00:00:00 2001 From: Rasmus Date: Fri, 1 Dec 2023 14:52:27 +0100 Subject: [PATCH 02/19] The frame for prompting llama 2 has been implemented --- .../multilingual/llm_messenger.py | 73 +++++++++++++------ relation_extraction/multilingual/main.py | 6 +- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index 6c50835..edcbbb2 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -1,5 +1,6 @@ from relation_extraction.API_handler import APIHandler import requests +import re from llama_cpp import Llama class LLMMessenger(APIHandler): @@ -10,33 +11,33 @@ def API_endpoint(): def send_request(request): # Put the location of to the GGUF model that you've download from HuggingFace here - model_path = "llama-2-7b-chat.Q2_K.gguf" + model_path = "./relation_extraction/multilingual/llama-2-7b-chat.Q2_K.gguf" # Create a llama model - #model = Llama(model_path=model_path, n_ctx=4092) + model = Llama(model_path=model_path, n_ctx=4092) # Prompt creation - system_message = """### Instruction ### - When given a sentence and the entity mentions in the sentence, you should perform relation extraction. This includes marking an entity mention as subject, marking another entity mention as object, and identifying the relation between the subject and object. You should only use entity mentions specified in the prompt. You should only use relations from the list of relations given in the context. + # system_message = """### Instruction ### + # When given a sentence and the entity mentions in the sentence, you should perform relation extraction. This includes marking an entity mention as subject, marking another entity mention as object, and identifying the relation between the subject and object. You should only use entity mentions specified in the prompt. You should only use relations from the list of relations given in the context. - ### Context ### - List of relations: [location, birthPlace, deathPlace, owns, sibling, child, parent, title, employer, age, residence, headquarter, deathCause, member, foundedBy, religion] + # ### Context ### + # List of relations: [location, birthPlace, deathPlace, owns, sibling, child, parent, title, employer, age, residence, headquarter, deathCause, member, foundedBy, religion] - ### Input Data ### - You should perform relation extraction when prompted with input on the following format: - "sentence", [comma_separated_list_of_entity_mentions] + # ### Input Data ### + # You should perform relation extraction when prompted with input on the following format: + # "sentence", [comma_separated_list_of_entity_mentions] - ### Output Indicator ### - If no relation can be found in the sentence, or the entity mentions have not been specified in the user prompt, you should respond with "undefined". In all other cases, your output should be a list of triples on the following format: - + # ### Output Indicator ### + # If no relation can be found in the sentence, or the entity mentions have not been specified in the user prompt, you should respond with "undefined". In all other cases, your output should be a list of triples on the following format: + # - """ - user_message = '"Casper and Rytter has the same mother", [Casper, Rytter]' + # """ + # user_message = '"Casper and Rytter has the same mother", [Casper, Rytter]' prompt = f"""[INST] <> - {system_message} + {request["system_message"]} <> - {user_message} [/INST]""" + {request["user_message"]} [/INST]""" # Model parameters max_tokens = 4092 @@ -55,15 +56,37 @@ def send_request(request): def process_message(response): print("Recieved response from Llama2...") - print(response) - - - def costruct_prompt_message(data): - system_message = """### Instruction ### + triples = [] + answer = re.split("/INST]", response["choices"][0]["text"])[1] + print(answer) + llama_triples = re.findall("<.*,.*,.*>|\[.*,.*,.*\]", answer) + for llama_triple in llama_triples: + triple = re.split(",", llama_triple.replace("<", "").replace(">", "").replace("]", "").replace("[", "")) + if len(triple) == 3: + triple_object = {} + for i, entry in enumerate(triple): + triple_object[i.__str__()] = entry.strip() + triples.append(triple_object) + print(triples) + return triples + + def check_validity_of_response(sentence, response, relations): + triples = [] + valid_entity_mentions = [em["name"] for em in sentence["entityMentions"]] + for triple in response: + if triple["0"] in valid_entity_mentions and triple["1"] in relations and triple["2"] in valid_entity_mentions: # 0 = subject, 1 = predicate, and 2 = object + triples.append([triple["0"], triple["1"], triple["2"]]) + return triples + + def prompt_llm(data, relations): + triples = [] + relations_test = ["married", "location", "birthPlace", "deathPlace", "owns", "sibling", "child", "parent", "title", "employer", "age", "residence", "headquarter", "deathCause", "member", "foundedBy", "religion"] + relations_text = "[" + ", ".join(["location", "birthPlace", "deathPlace", "owns", "sibling", "child", "parent", "title", "employer", "age", "residence", "headquarter", "deathCause", "member", "foundedBy", "religion"]) + "]" + system_message = f"""### Instruction ### When given a sentence and the entity mentions in the sentence, you should perform relation extraction. This includes marking an entity mention as subject, marking another entity mention as object, and identifying the relation between the subject and object. You should only use entity mentions specified in the prompt. You should only use relations from the list of relations given in the context. ### Context ### - List of relations: [location, birthPlace, deathPlace, owns, sibling, child, parent, title, employer, age, residence, headquarter, deathCause, member, foundedBy, religion] + List of relations: {relations_text} ### Input Data ### You should perform relation extraction when prompted with input on the following format: @@ -82,7 +105,9 @@ def costruct_prompt_message(data): user_message = f'"{sentence["sentence"]}", [' for em in sentence["entityMentions"]: user_message += f"{em['name']}, " - user_message = user_message[:-2] + ']' #Remove comma and space after last entity mention + user_message = user_message[:-2] + ']' #Remove comma and space after last entity mention in message request["user_message"] = user_message response = LLMMessenger.send_request(request) - LLMMessenger.process_message(response) + process_response = LLMMessenger.process_message(response) + triples = LLMMessenger.check_validity_of_response(sentence, process_response, relations_test) + print(triples) \ No newline at end of file diff --git a/relation_extraction/multilingual/main.py b/relation_extraction/multilingual/main.py index 95dff90..cc1be52 100644 --- a/relation_extraction/multilingual/main.py +++ b/relation_extraction/multilingual/main.py @@ -29,7 +29,7 @@ def begin_relation_extraction(data): raise Exception("Incorrectly formatted input. Exception during parsing") try: - triples = LLMMessenger.costruct_prompt_message(parsed_data) + triples = LLMMessenger.prompt_llm(parsed_data, relations) except Exception as E: print(f"Exception during prompt to Llama 2: {str(E)}") raise Exception("Exception during prompt to Llama 2") @@ -49,12 +49,12 @@ def test(): "language": "en", "sentences": [ { - "sentence": "Barrack Obama is married to Michelle Obama.", + "sentence": "Barack Obama is married to Michelle Obama.", "sentenceStartIndex": 20, "sentenceEndIndex": 62, "entityMentions": [ - { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Barack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama" } ] } From 1978b43ccc53194660099d832f9e25ad4453b0ce Mon Sep 17 00:00:00 2001 From: Rasmus Date: Mon, 4 Dec 2023 12:53:05 +0100 Subject: [PATCH 03/19] Get iri of entity mentions in triples returned by Llama --- relation_extraction/multilingual/llm_messenger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index edcbbb2..585f768 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -75,7 +75,7 @@ def check_validity_of_response(sentence, response, relations): valid_entity_mentions = [em["name"] for em in sentence["entityMentions"]] for triple in response: if triple["0"] in valid_entity_mentions and triple["1"] in relations and triple["2"] in valid_entity_mentions: # 0 = subject, 1 = predicate, and 2 = object - triples.append([triple["0"], triple["1"], triple["2"]]) + triples.append([[em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["0"]], [em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["1"]], [em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["2"]]]) return triples def prompt_llm(data, relations): From a3340db003c09c10b9e40c5e5e8ea2f1e3028025 Mon Sep 17 00:00:00 2001 From: Rasmus Date: Mon, 4 Dec 2023 15:27:35 +0100 Subject: [PATCH 04/19] Begun writing tests and changing the structure of the test folder --- .../multilingual/llm_messenger.py | 8 +- relation_extraction/multilingual/main.py | 25 --- relation_extraction/relation_extractor.py | 2 + test/test_concept_linking/__init__.py | 0 test/test_relation_extraction/__init__.py | 0 .../test_get_relations.py | 0 .../test_llama_relation_extractor.py | 64 ++++++ .../test_llm_messenger.py | 211 ++++++++++++++++++ .../test_output.py | 0 .../test_relation_extraction.py | 0 10 files changed, 280 insertions(+), 30 deletions(-) create mode 100644 test/test_concept_linking/__init__.py create mode 100644 test/test_relation_extraction/__init__.py rename test/{test_server => test_relation_extraction}/test_get_relations.py (100%) create mode 100644 test/test_relation_extraction/test_llama_relation_extractor.py create mode 100644 test/test_relation_extraction/test_llm_messenger.py rename test/{test_server => test_relation_extraction}/test_output.py (100%) rename test/{test_server => test_relation_extraction}/test_relation_extraction.py (100%) diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index 585f768..acb5239 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -58,8 +58,7 @@ def process_message(response): print("Recieved response from Llama2...") triples = [] answer = re.split("/INST]", response["choices"][0]["text"])[1] - print(answer) - llama_triples = re.findall("<.*,.*,.*>|\[.*,.*,.*\]", answer) + llama_triples = re.findall("<[\s\w\d]*,[\s\w\d]*,[\s\w\d]*>|\[[\s\w\d]*,[\s\w\d]*,[\s\w\d]*\]", answer) for llama_triple in llama_triples: triple = re.split(",", llama_triple.replace("<", "").replace(">", "").replace("]", "").replace("[", "")) if len(triple) == 3: @@ -67,7 +66,6 @@ def process_message(response): for i, entry in enumerate(triple): triple_object[i.__str__()] = entry.strip() triples.append(triple_object) - print(triples) return triples def check_validity_of_response(sentence, response, relations): @@ -75,7 +73,7 @@ def check_validity_of_response(sentence, response, relations): valid_entity_mentions = [em["name"] for em in sentence["entityMentions"]] for triple in response: if triple["0"] in valid_entity_mentions and triple["1"] in relations and triple["2"] in valid_entity_mentions: # 0 = subject, 1 = predicate, and 2 = object - triples.append([[em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["0"]], [em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["1"]], [em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["2"]]]) + triples.append([[em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["0"]][0], triple["1"], [em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["2"]][0]]) return triples def prompt_llm(data, relations): @@ -110,4 +108,4 @@ def prompt_llm(data, relations): response = LLMMessenger.send_request(request) process_response = LLMMessenger.process_message(response) triples = LLMMessenger.check_validity_of_response(sentence, process_response, relations_test) - print(triples) \ No newline at end of file + return triples diff --git a/relation_extraction/multilingual/main.py b/relation_extraction/multilingual/main.py index cc1be52..d0bb596 100644 --- a/relation_extraction/multilingual/main.py +++ b/relation_extraction/multilingual/main.py @@ -39,28 +39,3 @@ def begin_relation_extraction(data): except Exception as E: print(f"Exception during request to database. {str(E)}") raise Exception("Data was not sent to database due to connection error") - - -def test(): - begin_relation_extraction(data= - [ - { - "filename": "path/to/Artikel.txt", - "language": "en", - "sentences": [ - { - "sentence": "Barack Obama is married to Michelle Obama.", - "sentenceStartIndex": 20, - "sentenceEndIndex": 62, - "entityMentions": - [ - { "name": "Barack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, - { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama" } - ] - } - ] - } - ] - ) - -test() \ No newline at end of file diff --git a/relation_extraction/relation_extractor.py b/relation_extraction/relation_extractor.py index 3db32ae..7a333fd 100644 --- a/relation_extraction/relation_extractor.py +++ b/relation_extraction/relation_extractor.py @@ -1,7 +1,9 @@ from relation_extraction.NaiveMVP.main import handle_relation_post_request +from relation_extraction.multilingual.main import begin_relation_extraction class RelationExtractor(): @classmethod def begin_extraction(self, data): handle_relation_post_request(data) + begin_relation_extraction(data) diff --git a/test/test_concept_linking/__init__.py b/test/test_concept_linking/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/test_relation_extraction/__init__.py b/test/test_relation_extraction/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/test_server/test_get_relations.py b/test/test_relation_extraction/test_get_relations.py similarity index 100% rename from test/test_server/test_get_relations.py rename to test/test_relation_extraction/test_get_relations.py diff --git a/test/test_relation_extraction/test_llama_relation_extractor.py b/test/test_relation_extraction/test_llama_relation_extractor.py new file mode 100644 index 0000000..5c79d49 --- /dev/null +++ b/test/test_relation_extraction/test_llama_relation_extractor.py @@ -0,0 +1,64 @@ +import unittest +from unittest import mock +from relation_extraction.multilingual.main import * + +class TestHandleRelationPostRequest(unittest.TestCase): + @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') + def test_handle_post_request_raises_exception_if_relations_fail(self, mock_extract_specific_relations): + mock_extract_specific_relations.side_effect = Exception() + data = dict() + with self.assertRaises(Exception): + begin_relation_extraction(data) + + mock_extract_specific_relations.assert_called_once() + + @mock.patch('relation_extraction.multilingual.main.parse_data') + @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') + def test_handle_post_request_raises_exception_if_parse_fail(self, mock_extract_specific_relations, mock_parse_data): + mock_extract_specific_relations.return_value = [] + mock_parse_data.side_effect = Exception() + + data = dict() + with self.assertRaises(Exception): + begin_relation_extraction(data) + + mock_extract_specific_relations.assert_called_once() + mock_parse_data.assert_called_once() + + @mock.patch('relation_extraction.multilingual.llm_messenger.LLMMessenger.prompt_llm') + @mock.patch('relation_extraction.multilingual.main.parse_data') + @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') + def test_handle_post_request_raises_exception_if_prompt_llm_fail(self, mock_extract_specific_relations, mock_parse_data, mock_prompt_llm): + mock_extract_specific_relations.return_value = [] + mock_parse_data.return_value = [] + mock_prompt_llm.side_effect = Exception() + + data = dict() + with self.assertRaises(Exception): + begin_relation_extraction(data) + + mock_extract_specific_relations.assert_called_once() + mock_parse_data.assert_called_once() + mock_prompt_llm.assert_called_once() + + @mock.patch('relation_extraction.knowledge_graph_messenger.KnowledgeGraphMessenger.send_request') + @mock.patch('relation_extraction.multilingual.llm_messenger.LLMMessenger.prompt_llm') + @mock.patch('relation_extraction.multilingual.main.parse_data') + @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') + def test_handle_post_request_raises_exception_if_prompt_llm_fail(self, mock_extract_specific_relations, mock_parse_data, mock_prompt_llm, mock_send_to_db): + mock_extract_specific_relations.return_value = [] + mock_parse_data.return_value = [] + mock_prompt_llm.return_value = {} + mock_send_to_db.side_effect = Exception() + + data = dict() + with self.assertRaises(Exception): + begin_relation_extraction(data) + + mock_extract_specific_relations.assert_called_once() + mock_parse_data.assert_called_once() + mock_prompt_llm.assert_called_once() + mock_send_to_db.assert_called_once() + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_relation_extraction/test_llm_messenger.py b/test/test_relation_extraction/test_llm_messenger.py new file mode 100644 index 0000000..d57cb7e --- /dev/null +++ b/test/test_relation_extraction/test_llm_messenger.py @@ -0,0 +1,211 @@ +import unittest +from unittest import mock +from relation_extraction.multilingual.llm_messenger import * + +class TestProcessMessage(unittest.TestCase): + def test_send_request(self): + testdata = [ + { + + } + ] + + def test_process_message(self): + testdata = [ + { + "choices": [ + { + "text":"[INST] Barack Obama is married to Michelle Obama. [/INST] In this sentence the triples are: and " + } + ], + "expected": [ + { + "0":"Barack Obama", + "1":"married", + "2":"Michelle Obama" + }, + { + "0":"Michelle Obama", + "1":"married", + "2":"Barack Obama" + } + ] + }, + { + "choices": [ + { + "text":"[INST] Peter and Marianne has the same mother. [/INST] In this sentence the triples are: and " + } + ], + "expected":[ + { + "0":"Peter", + "1":"sibling", + "2":"Marianne" + }, + { + "0":"Marianne", + "1":"sibling", + "2":"Peter" + } + ] + } + ] + + for td in testdata: + res = LLMMessenger.process_message(td) + self.assertEqual(res, td["expected"]) + + def test_process_message_wrong_format(self): + testdata = [ + { + "choices": [ + { + "text":"[INST] Barack Obama is married to Michelle Obama. [/INST] In this sentence the triples are: Subject: Barack Obama\n Relation: married\n Object: Michelle Obama" + } + ], + "expected": [] + }, + { + "choices": [ + { + "text":"[INST] Peter and Marianne has the same mother. [/INST] In this sentence the triples are: Subject: Peter\n Relation: sibling\n Object: Marianne" + } + ], + "expected":[] + } + ] + + for td in testdata: + res = LLMMessenger.process_message(td) + self.assertEqual(res, td["expected"]) + + def test_check_validity_of_response(self): + relations = ["married", "sibling", "child", "parent"] + testdata = [ + { + "Sentence": { + "sentence": "Barack Obama is married to Michelle Obama.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michelle_Obama" } + ] + }, + "response": [ + { + "0":"Barack Obama", + "1":"married", + "2":"Michelle Obama" + }, + { + "0":"Michelle Obama", + "1":"married", + "2":"Barack Obama" + } + ], + "expected": [ + [ + "knox-kb01.srv.aau.dk/Barack_Obama", + "married", + "knox-kb01.srv.aau.dk/Michelle_Obama" + ], + [ + "knox-kb01.srv.aau.dk/Michelle_Obama", + "married", + "knox-kb01.srv.aau.dk/Barack_Obama" + ], + ] + } + ] + + for td in testdata: + res = LLMMessenger.check_validity_of_response(td["Sentence"], td["response"], relations) + self.assertEqual(res, td["expected"]) + + @mock.patch("relation_extraction.multilingual.llm_messenger.LLMMessenger.send_request") + @mock.patch("relation_extraction.multilingual.llm_messenger.LLMMessenger.process_message") + @mock.patch("relation_extraction.multilingual.llm_messenger.LLMMessenger.check_validity_of_response") + def test_prompt_llm(self, mock_check_validity, mock_process_message, mock_send_request): + relations = ["married", "sibling", "child", "parent"] + testdata = [ + { + "response": { + "choices": [ + { + "text":"[INST] Barack Obama is married to Michelle Obama. [/INST] In this sentence the triples are: and " + } + ], + }, + "process_response": [ + { + "0":"Barack Obama", + "1":"married", + "2":"Michelle Obama" + }, + { + "0":"Michelle Obama", + "1":"married", + "2":"Barack Obama" + } + ], + "validity_response": [ + [ + "knox-kb01.srv.aau.dk/Barack_Obama", + "married", + "knox-kb01.srv.aau.dk/Michelle_Obama" + ], + [ + "knox-kb01.srv.aau.dk/Michelle_Obama", + "married", + "knox-kb01.srv.aau.dk/Barack_Obama" + ], + ], + "expected": [ + [ + "knox-kb01.srv.aau.dk/Barack_Obama", + "married", + "knox-kb01.srv.aau.dk/Michelle_Obama" + ], + [ + "knox-kb01.srv.aau.dk/Michelle_Obama", + "married", + "knox-kb01.srv.aau.dk/Barack_Obama" + ], + ], + "data": [ + { + "language": "en", + "metadataId":"790261e8-b8ec-4801-9cbd-00263bcc666d", + "sentences": [ + { + "sentence": "Barack Obama is married to Michelle Obama.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama" } + ] + } + ] + } + ], + "relations": relations + } + ] + + for td in testdata: + mock_send_request.return_value = td["response"] + mock_process_message.return_value = td["process_response"] + mock_check_validity.return_value = td["validity_response"] + res = LLMMessenger.prompt_llm(td["data"], td["relations"]) + for triple in res: + self.assertEqual(len(triple), 3) #All must be triples + self.assertEqual(td["expected"], res) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/test/test_server/test_output.py b/test/test_relation_extraction/test_output.py similarity index 100% rename from test/test_server/test_output.py rename to test/test_relation_extraction/test_output.py diff --git a/test/test_server/test_relation_extraction.py b/test/test_relation_extraction/test_relation_extraction.py similarity index 100% rename from test/test_server/test_relation_extraction.py rename to test/test_relation_extraction/test_relation_extraction.py From 91468d4877c405499b95bcb08e4e0fd7091a7b1e Mon Sep 17 00:00:00 2001 From: Rasmus Date: Mon, 4 Dec 2023 15:51:21 +0100 Subject: [PATCH 05/19] All functions have been tested apart from send_request in llm_messenger --- .../test_llama_relation_extractor.py | 95 +++++++++++++++++++ .../test_llm_messenger.py | 1 + 2 files changed, 96 insertions(+) diff --git a/test/test_relation_extraction/test_llama_relation_extractor.py b/test/test_relation_extraction/test_llama_relation_extractor.py index 5c79d49..3de1fa4 100644 --- a/test/test_relation_extraction/test_llama_relation_extractor.py +++ b/test/test_relation_extraction/test_llama_relation_extractor.py @@ -60,5 +60,100 @@ def test_handle_post_request_raises_exception_if_prompt_llm_fail(self, mock_extr mock_prompt_llm.assert_called_once() mock_send_to_db.assert_called_once() +class TestParseData(unittest.TestCase): + def test_parse_remove_ems_without_iri(self): + data = [ + { + "filename": "path/to/Artikel.txt", + "language": "en", + "sentences": [ + { + "sentence": "Barrack Obama is married to Michelle Obama and they have a dog.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama"}, + { "name": "Dog", "startIndex": 27, "endIndex": 40, "iri": None} + ] + } + ] + } + ] + res = parse_data(data) + + expected = [ + { + "filename": "path/to/Artikel.txt", + "language": "en", + "sentences": [ + { + "sentence": "Barrack Obama is married to Michelle Obama and they have a dog.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama"}, + ] + } + ] + } + ] + + self.assertEqual(res, expected) + + def test_parse_remove_sentences_with_lt_two_ems(self): + data = [ + { + "filename": "path/to/Artikel.txt", + "language": "en", + "sentences": [ + { + "sentence": "Barrack Obama is married to Michelle Obama and they have a dog.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + ] + }, + { + "sentence": "Barrack Obama is married to Michelle Obama and they have a dog.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama"}, + ] + } + ] + } + ] + res = parse_data(data) + + expected = [ + { + "filename": "path/to/Artikel.txt", + "language": "en", + "sentences": [ + { + "sentence": "Barrack Obama is married to Michelle Obama and they have a dog.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama"}, + ] + } + ] + } + ] + + self.assertEqual(res, expected) + if __name__ == '__main__': unittest.main() diff --git a/test/test_relation_extraction/test_llm_messenger.py b/test/test_relation_extraction/test_llm_messenger.py index d57cb7e..4301025 100644 --- a/test/test_relation_extraction/test_llm_messenger.py +++ b/test/test_relation_extraction/test_llm_messenger.py @@ -3,6 +3,7 @@ from relation_extraction.multilingual.llm_messenger import * class TestProcessMessage(unittest.TestCase): + #Needs testing def test_send_request(self): testdata = [ { From dd262682966395a876f82cf017a15e009d911d98 Mon Sep 17 00:00:00 2001 From: Rasmus Date: Mon, 4 Dec 2023 15:56:45 +0100 Subject: [PATCH 06/19] Duplicate test names fixed --- test/test_relation_extraction/test_llama_relation_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_relation_extraction/test_llama_relation_extractor.py b/test/test_relation_extraction/test_llama_relation_extractor.py index 3de1fa4..1b531ea 100644 --- a/test/test_relation_extraction/test_llama_relation_extractor.py +++ b/test/test_relation_extraction/test_llama_relation_extractor.py @@ -45,7 +45,7 @@ def test_handle_post_request_raises_exception_if_prompt_llm_fail(self, mock_extr @mock.patch('relation_extraction.multilingual.llm_messenger.LLMMessenger.prompt_llm') @mock.patch('relation_extraction.multilingual.main.parse_data') @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') - def test_handle_post_request_raises_exception_if_prompt_llm_fail(self, mock_extract_specific_relations, mock_parse_data, mock_prompt_llm, mock_send_to_db): + def test_handle_post_request_raises_exception_if_send_to_db_fail(self, mock_extract_specific_relations, mock_parse_data, mock_prompt_llm, mock_send_to_db): mock_extract_specific_relations.return_value = [] mock_parse_data.return_value = [] mock_prompt_llm.return_value = {} From e2fbea27d6849a8c3299438827e6a0ab16c34d44 Mon Sep 17 00:00:00 2001 From: Rasmus Date: Wed, 6 Dec 2023 10:50:17 +0100 Subject: [PATCH 07/19] Fixed description of a method --- relation_extraction/multilingual/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relation_extraction/multilingual/main.py b/relation_extraction/multilingual/main.py index d0bb596..a83e209 100644 --- a/relation_extraction/multilingual/main.py +++ b/relation_extraction/multilingual/main.py @@ -4,7 +4,7 @@ from relation_extraction.multilingual.llm_messenger import LLMMessenger def parse_data(data): - "Parses JSON data and converts it into a dictionary with information on sentence, tokens, and entity mentions" + "Removes entity mentions with no iri and sentences with less than two entity mentions" for file in data: for i, sentence in enumerate(file["sentences"]): From 72668cb2a7a90d36b9a45243086c8f2594709328 Mon Sep 17 00:00:00 2001 From: Johannes Karstoft Pedersen Date: Thu, 7 Dec 2023 09:24:59 +0100 Subject: [PATCH 08/19] New prompt --- relation_extraction/evaluation/evaluation.py | 6 ++- .../multilingual/llm_messenger.py | 47 +++++++++++-------- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/relation_extraction/evaluation/evaluation.py b/relation_extraction/evaluation/evaluation.py index 52e2ec7..492e32e 100644 --- a/relation_extraction/evaluation/evaluation.py +++ b/relation_extraction/evaluation/evaluation.py @@ -3,13 +3,14 @@ from relation_extraction.ontology_messenger import OntologyMessenger from relation_extraction.LessNaive.lessNaive import do_relation_extraction from relation_extraction.NaiveMVP.main import parse_data +from relation_extraction.multilingual.llm_messenger import LLMMessenger import re import datetime import json -def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 3, length = 100, fill = '█', printEnd = "\r"): +def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 3, length = 100, fill = '█', printEnd = "\n"): percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) filledLength = int(length * iteration // total) bar = fill * filledLength + '-' * (length - filledLength) @@ -60,7 +61,8 @@ def main(): solutions_to_test = { # "less_naive": do_relation_extraction - "naive": parse_data + # "naive": parse_data + "multilingual": LLMMessenger.prompt_llm } evaluation_results = dict() #dictionary to hold results of tests for name, solution in solutions_to_test.items(): diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index acb5239..018f9d7 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -11,10 +11,10 @@ def API_endpoint(): def send_request(request): # Put the location of to the GGUF model that you've download from HuggingFace here - model_path = "./relation_extraction/multilingual/llama-2-7b-chat.Q2_K.gguf" + model_path = "./relation_extraction/multilingual/llama-2-13b-chat.Q2_K.gguf" # Create a llama model - model = Llama(model_path=model_path, n_ctx=4092) + model = Llama(model_path=model_path, n_ctx=4096) # Prompt creation # system_message = """### Instruction ### @@ -40,7 +40,7 @@ def send_request(request): {request["user_message"]} [/INST]""" # Model parameters - max_tokens = 4092 + max_tokens = 4096 # Run the model output = model(prompt, max_tokens=max_tokens, echo=True) @@ -58,6 +58,7 @@ def process_message(response): print("Recieved response from Llama2...") triples = [] answer = re.split("/INST]", response["choices"][0]["text"])[1] + print(response["choices"][0]["text"]) llama_triples = re.findall("<[\s\w\d]*,[\s\w\d]*,[\s\w\d]*>|\[[\s\w\d]*,[\s\w\d]*,[\s\w\d]*\]", answer) for llama_triple in llama_triples: triple = re.split(",", llama_triple.replace("<", "").replace(">", "").replace("]", "").replace("[", "")) @@ -78,21 +79,27 @@ def check_validity_of_response(sentence, response, relations): def prompt_llm(data, relations): triples = [] - relations_test = ["married", "location", "birthPlace", "deathPlace", "owns", "sibling", "child", "parent", "title", "employer", "age", "residence", "headquarter", "deathCause", "member", "foundedBy", "religion"] + relations_test = ["spouse", "location", "birthPlace", "deathPlace", "owns", "sibling", "child", "parent", "title", "employer", "age", "residence", "headquarter", "deathCause", "member", "foundedBy", "religion"] relations_text = "[" + ", ".join(["location", "birthPlace", "deathPlace", "owns", "sibling", "child", "parent", "title", "employer", "age", "residence", "headquarter", "deathCause", "member", "foundedBy", "religion"]) + "]" system_message = f"""### Instruction ### - When given a sentence and the entity mentions in the sentence, you should perform relation extraction. This includes marking an entity mention as subject, marking another entity mention as object, and identifying the relation between the subject and object. You should only use entity mentions specified in the prompt. You should only use relations from the list of relations given in the context. - - ### Context ### - List of relations: {relations_text} - - ### Input Data ### - You should perform relation extraction when prompted with input on the following format: - "sentence", [comma_separated_list_of_entity_mentions] - - ### Output Indicator ### - If no relation can be found in the sentence, or the entity mentions have not been specified in the user prompt, you should respond with "undefined". In all other cases, your output should be a list of triples on the following format: - +When given a sentence in either danish or english and the entity mentions in the sentence, you should find triples by performing relation extraction. This includes marking an entity mention as subject, marking another entity mention as object, and identifying the relation between the subject and object. You should only use entity mentions specified in the prompt. You should only use relations from the list of relations given in the context. You should provide reasoning for why each of the triples you find is correct. +S +### Context ### +List of relations: [spouse, location, birthPlace, deathPlace, owns, sibling, child, parent, title, employer, age, residence, headquarter, deathCause, member, foundedBy, religion] +Here is a transcript with you. You are called Llama. +User: Sentence: "Aalborg is in Denmark" Entity mentions: ["Aalborg", "Denmark"] +Llama: The relation "is in" is not in the list of relations but "location" is in the list of relations. "Aalborg is in Denmark" implies that Aalborg is located in Denmark. Therefore, the triple <"Aalborg", location, "Denmark"> is correct. +User: Sentence: "Peter has a subscription to Pure Gym" Entity mentions: ["Peter", "Pure Gym"] +Llama: The relation "subscription" is not in the list of relations, but "member" is in the list of relations. "Peter has a subscription to Pure Gym" implies that Peter is a member of Pure Gym. Therefore, the triple <"Peter", member, "Pure Gym"> is correct. +User: Sentence: "Martin Eberhard and Marc Tarpenning are the original founders of Tesla" Entity mentions: ["Martin Eberhard", "Marc Tarpenning", "Tesla"] +Llama: The sentence states that Tesla was founded by both Martin Eberhard and Marc Tarpenning. The relation "foundedBy" is in the list of relations. Therefore, the two triples <"Tesla", foundedBy, "Martin Eberhard"> and <"Tesla", foundedBy, "Marc Tarpenning"> are correct. +User: Sentence: "Sofie was born in Kolding" Entity mentions: ["Sofie", "Kolding"] +Llama: The relation "born in" is not in the list of relations. But "born in" implies a place of birth, and "birthPlace" is in the list of relations. Therefore, the triple <"Sofie", birthPlace, "Kolding"> is correct. +User: Sentence: "Frederik is the father of Christian" Entity mentions: ["Frederik", "Christian"] +Llama: The relation "father" is not in the list of relations. However, a father is a parent and "parent" is in the list of relations. Therefore, the triple <"Frederik", parent, "Christian"> is correct. + +### Output Indicator ### +Before answering with a triple, you should explain why it is correct. If no relation can be found in the sentence, or the entity mentions have not been specified in the user prompt, you should respond with “undefined”. In all other cases, your output should be triples on the format and an explanation for each triple. """ @@ -100,12 +107,14 @@ def prompt_llm(data, relations): for file in data: for sentence in file["sentences"]: - user_message = f'"{sentence["sentence"]}", [' + user_message = f'Sentence: "{sentence["sentence"]}" Entity mentions: [' for em in sentence["entityMentions"]: - user_message += f"{em['name']}, " + user_message += f'"{em["name"]}", ' user_message = user_message[:-2] + ']' #Remove comma and space after last entity mention in message request["user_message"] = user_message response = LLMMessenger.send_request(request) process_response = LLMMessenger.process_message(response) - triples = LLMMessenger.check_validity_of_response(sentence, process_response, relations_test) + triples = LLMMessenger.check_validity_of_response(sentence, process_response, relations) + print(triples) return triples + From d5c38abafdfeed6a420a4b6998ca07bc763191f8 Mon Sep 17 00:00:00 2001 From: Rasmus Date: Thu, 7 Dec 2023 10:28:29 +0100 Subject: [PATCH 09/19] Relations are now split up before prompting Llama --- .../multilingual/llm_messenger.py | 33 ++++--------------- relation_extraction/multilingual/main.py | 26 ++++++++++++++- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index 018f9d7..f1cf68c 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -6,7 +6,7 @@ class LLMMessenger(APIHandler): def API_endpoint(): - return "" + return "http://knox-func01.srv.aau.dk:5004/llama" def send_request(request): @@ -16,24 +16,6 @@ def send_request(request): # Create a llama model model = Llama(model_path=model_path, n_ctx=4096) - # Prompt creation - # system_message = """### Instruction ### - # When given a sentence and the entity mentions in the sentence, you should perform relation extraction. This includes marking an entity mention as subject, marking another entity mention as object, and identifying the relation between the subject and object. You should only use entity mentions specified in the prompt. You should only use relations from the list of relations given in the context. - - # ### Context ### - # List of relations: [location, birthPlace, deathPlace, owns, sibling, child, parent, title, employer, age, residence, headquarter, deathCause, member, foundedBy, religion] - - # ### Input Data ### - # You should perform relation extraction when prompted with input on the following format: - # "sentence", [comma_separated_list_of_entity_mentions] - - # ### Output Indicator ### - # If no relation can be found in the sentence, or the entity mentions have not been specified in the user prompt, you should respond with "undefined". In all other cases, your output should be a list of triples on the following format: - # - - # """ - # user_message = '"Casper and Rytter has the same mother", [Casper, Rytter]' - prompt = f"""[INST] <> {request["system_message"]} <> @@ -51,17 +33,18 @@ def send_request(request): # # Write content to the file # file.write(output["choices"][0]["text"]) - #response = requests.post(url=LLMMessenger.API_endpoint) + #response = requests.post(url=LLMMessenger.API_endpoint, json=request) return output def process_message(response): print("Recieved response from Llama2...") triples = [] + print(response) answer = re.split("/INST]", response["choices"][0]["text"])[1] print(response["choices"][0]["text"]) - llama_triples = re.findall("<[\s\w\d]*,[\s\w\d]*,[\s\w\d]*>|\[[\s\w\d]*,[\s\w\d]*,[\s\w\d]*\]", answer) + llama_triples = re.findall('<["\s\w\d,"]*,[\s\w\d]*,["\s\w\d,"]*>|\[["\s\w\d,"]*,[\s\w\d]*,["\s\w\d,"]*\]', answer) for llama_triple in llama_triples: - triple = re.split(",", llama_triple.replace("<", "").replace(">", "").replace("]", "").replace("[", "")) + triple = re.split('"."', llama_triple.replace("<", "").replace(">", "").replace("]", "").replace("[", "")) if len(triple) == 3: triple_object = {} for i, entry in enumerate(triple): @@ -79,13 +62,11 @@ def check_validity_of_response(sentence, response, relations): def prompt_llm(data, relations): triples = [] - relations_test = ["spouse", "location", "birthPlace", "deathPlace", "owns", "sibling", "child", "parent", "title", "employer", "age", "residence", "headquarter", "deathCause", "member", "foundedBy", "religion"] - relations_text = "[" + ", ".join(["location", "birthPlace", "deathPlace", "owns", "sibling", "child", "parent", "title", "employer", "age", "residence", "headquarter", "deathCause", "member", "foundedBy", "religion"]) + "]" system_message = f"""### Instruction ### When given a sentence in either danish or english and the entity mentions in the sentence, you should find triples by performing relation extraction. This includes marking an entity mention as subject, marking another entity mention as object, and identifying the relation between the subject and object. You should only use entity mentions specified in the prompt. You should only use relations from the list of relations given in the context. You should provide reasoning for why each of the triples you find is correct. S ### Context ### -List of relations: [spouse, location, birthPlace, deathPlace, owns, sibling, child, parent, title, employer, age, residence, headquarter, deathCause, member, foundedBy, religion] +List of relations: [{", ".join(relations)}] Here is a transcript with you. You are called Llama. User: Sentence: "Aalborg is in Denmark" Entity mentions: ["Aalborg", "Denmark"] Llama: The relation "is in" is not in the list of relations but "location" is in the list of relations. "Aalborg is in Denmark" implies that Aalborg is located in Denmark. Therefore, the triple <"Aalborg", location, "Denmark"> is correct. @@ -103,7 +84,7 @@ def prompt_llm(data, relations): """ - request = {"system_message": system_message, "user_message": ""} + request = {"system_message": system_message, "user_message": "", "max_tokens": 4096} for file in data: for sentence in file["sentences"]: diff --git a/relation_extraction/multilingual/main.py b/relation_extraction/multilingual/main.py index a83e209..4f85002 100644 --- a/relation_extraction/multilingual/main.py +++ b/relation_extraction/multilingual/main.py @@ -29,7 +29,11 @@ def begin_relation_extraction(data): raise Exception("Incorrectly formatted input. Exception during parsing") try: - triples = LLMMessenger.prompt_llm(parsed_data, relations) + triples = [] + chunk_size = 250 + split_relations = [relations[i:i + chunk_size] for i in range(0, len(relations), chunk_size)] #Split the relations into lists of size chunk_size + for split_relation in split_relations: + triples.append(LLMMessenger.prompt_llm(parsed_data, split_relation)) except Exception as E: print(f"Exception during prompt to Llama 2: {str(E)}") raise Exception("Exception during prompt to Llama 2") @@ -39,3 +43,23 @@ def begin_relation_extraction(data): except Exception as E: print(f"Exception during request to database. {str(E)}") raise Exception("Data was not sent to database due to connection error") + +begin_relation_extraction([ + { + "filename": "path/to/Artikel.txt", + "language": "en", + "sentences": [ + { + "sentence": "Barrack Obama is married to Michelle Obama.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama" } + ] + } + ] + } +] +) \ No newline at end of file From 3552e5241c0d6831b3867c3c3d112180d923f95a Mon Sep 17 00:00:00 2001 From: Rasmus Date: Thu, 7 Dec 2023 10:56:40 +0100 Subject: [PATCH 10/19] Relations are now split at a reasonable size --- .../multilingual/llm_messenger.py | 4 +-- relation_extraction/multilingual/main.py | 25 +++---------------- 2 files changed, 5 insertions(+), 24 deletions(-) diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index f1cf68c..0f1a3ea 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -60,13 +60,13 @@ def check_validity_of_response(sentence, response, relations): triples.append([[em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["0"]][0], triple["1"], [em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["2"]][0]]) return triples - def prompt_llm(data, relations): + def prompt_llm(data, split_relations, relations): triples = [] system_message = f"""### Instruction ### When given a sentence in either danish or english and the entity mentions in the sentence, you should find triples by performing relation extraction. This includes marking an entity mention as subject, marking another entity mention as object, and identifying the relation between the subject and object. You should only use entity mentions specified in the prompt. You should only use relations from the list of relations given in the context. You should provide reasoning for why each of the triples you find is correct. S ### Context ### -List of relations: [{", ".join(relations)}] +List of relations: [{", ".join(split_relations)}] Here is a transcript with you. You are called Llama. User: Sentence: "Aalborg is in Denmark" Entity mentions: ["Aalborg", "Denmark"] Llama: The relation "is in" is not in the list of relations but "location" is in the list of relations. "Aalborg is in Denmark" implies that Aalborg is located in Denmark. Therefore, the triple <"Aalborg", location, "Denmark"> is correct. diff --git a/relation_extraction/multilingual/main.py b/relation_extraction/multilingual/main.py index 4f85002..ca069e4 100644 --- a/relation_extraction/multilingual/main.py +++ b/relation_extraction/multilingual/main.py @@ -30,10 +30,10 @@ def begin_relation_extraction(data): try: triples = [] - chunk_size = 250 + chunk_size = 650 split_relations = [relations[i:i + chunk_size] for i in range(0, len(relations), chunk_size)] #Split the relations into lists of size chunk_size for split_relation in split_relations: - triples.append(LLMMessenger.prompt_llm(parsed_data, split_relation)) + triples.append(LLMMessenger.prompt_llm(parsed_data, split_relation, relations)) except Exception as E: print(f"Exception during prompt to Llama 2: {str(E)}") raise Exception("Exception during prompt to Llama 2") @@ -43,23 +43,4 @@ def begin_relation_extraction(data): except Exception as E: print(f"Exception during request to database. {str(E)}") raise Exception("Data was not sent to database due to connection error") - -begin_relation_extraction([ - { - "filename": "path/to/Artikel.txt", - "language": "en", - "sentences": [ - { - "sentence": "Barrack Obama is married to Michelle Obama.", - "sentenceStartIndex": 20, - "sentenceEndIndex": 62, - "entityMentions": - [ - { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, - { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama" } - ] - } - ] - } -] -) \ No newline at end of file + \ No newline at end of file From 7c32e65e0ee985420cb52be144c6bb4d591b055b Mon Sep 17 00:00:00 2001 From: Rasmus Date: Thu, 7 Dec 2023 12:33:22 +0100 Subject: [PATCH 11/19] Llama 2 on the server will now be called when doing relation extraction --- relation_extraction/evaluation/evaluation.py | 6 ++- .../multilingual/llm_messenger.py | 38 +++---------------- 2 files changed, 10 insertions(+), 34 deletions(-) diff --git a/relation_extraction/evaluation/evaluation.py b/relation_extraction/evaluation/evaluation.py index 492e32e..ed817b7 100644 --- a/relation_extraction/evaluation/evaluation.py +++ b/relation_extraction/evaluation/evaluation.py @@ -95,7 +95,11 @@ def main(): ] }] - res = solution(input_obj, ontology_relations) + chunk_size = 650 + split_relations = [ontology_relations[i:i + chunk_size] for i in range(0, len(ontology_relations), chunk_size)] #Split the relations into lists of size chunk_size + res = [] + for split_relation in split_relations: + res.append(solution(input_obj, split_relation, ontology_relations)) res_hits = 0 for triple in res: if triple in expected_triples: diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index 0f1a3ea..15437a7 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -1,54 +1,27 @@ from relation_extraction.API_handler import APIHandler import requests import re -from llama_cpp import Llama class LLMMessenger(APIHandler): def API_endpoint(): - return "http://knox-func01.srv.aau.dk:5004/llama" + return "http://knox-proxy01.srv.aau.dk/llama-api/llama" def send_request(request): - - # Put the location of to the GGUF model that you've download from HuggingFace here - model_path = "./relation_extraction/multilingual/llama-2-13b-chat.Q2_K.gguf" - - # Create a llama model - model = Llama(model_path=model_path, n_ctx=4096) - - prompt = f"""[INST] <> - {request["system_message"]} - <> - {request["user_message"]} [/INST]""" - - # Model parameters - max_tokens = 4096 - - # Run the model - output = model(prompt, max_tokens=max_tokens, echo=True) - - # Print the model output - # print(output["choices"][0]["text"]) - # with open("LlamaResponse.txt", "w") as file: - # # Write content to the file - # file.write(output["choices"][0]["text"]) - - #response = requests.post(url=LLMMessenger.API_endpoint, json=request) - return output + response = requests.post(url=LLMMessenger.API_endpoint, json=request) + return response def process_message(response): print("Recieved response from Llama2...") triples = [] - print(response) answer = re.split("/INST]", response["choices"][0]["text"])[1] - print(response["choices"][0]["text"]) llama_triples = re.findall('<["\s\w\d,"]*,[\s\w\d]*,["\s\w\d,"]*>|\[["\s\w\d,"]*,[\s\w\d]*,["\s\w\d,"]*\]', answer) for llama_triple in llama_triples: triple = re.split('"."', llama_triple.replace("<", "").replace(">", "").replace("]", "").replace("[", "")) if len(triple) == 3: triple_object = {} for i, entry in enumerate(triple): - triple_object[i.__str__()] = entry.strip() + triple_object[i.__str__()] = entry.strip(' ,') triples.append(triple_object) return triples @@ -95,7 +68,6 @@ def prompt_llm(data, split_relations, relations): request["user_message"] = user_message response = LLMMessenger.send_request(request) process_response = LLMMessenger.process_message(response) - triples = LLMMessenger.check_validity_of_response(sentence, process_response, relations) - print(triples) + triples = LLMMessenger.check_validity_of_response(sentence, process_response, relations) return triples From 3b4b89dd5c71ad9a762684d503ab7a2b00e67046 Mon Sep 17 00:00:00 2001 From: Rasmus Date: Thu, 7 Dec 2023 13:49:47 +0100 Subject: [PATCH 12/19] Test fixed now --- .../test_llama_relation_extractor.py | 10 +++++----- test/test_relation_extraction/test_llm_messenger.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/test/test_relation_extraction/test_llama_relation_extractor.py b/test/test_relation_extraction/test_llama_relation_extractor.py index 1b531ea..021e004 100644 --- a/test/test_relation_extraction/test_llama_relation_extractor.py +++ b/test/test_relation_extraction/test_llama_relation_extractor.py @@ -29,7 +29,7 @@ def test_handle_post_request_raises_exception_if_parse_fail(self, mock_extract_s @mock.patch('relation_extraction.multilingual.main.parse_data') @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') def test_handle_post_request_raises_exception_if_prompt_llm_fail(self, mock_extract_specific_relations, mock_parse_data, mock_prompt_llm): - mock_extract_specific_relations.return_value = [] + mock_extract_specific_relations.return_value = ["relation1"] mock_parse_data.return_value = [] mock_prompt_llm.side_effect = Exception() @@ -39,16 +39,16 @@ def test_handle_post_request_raises_exception_if_prompt_llm_fail(self, mock_extr mock_extract_specific_relations.assert_called_once() mock_parse_data.assert_called_once() - mock_prompt_llm.assert_called_once() + mock_prompt_llm.assert_called() @mock.patch('relation_extraction.knowledge_graph_messenger.KnowledgeGraphMessenger.send_request') @mock.patch('relation_extraction.multilingual.llm_messenger.LLMMessenger.prompt_llm') @mock.patch('relation_extraction.multilingual.main.parse_data') @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') def test_handle_post_request_raises_exception_if_send_to_db_fail(self, mock_extract_specific_relations, mock_parse_data, mock_prompt_llm, mock_send_to_db): - mock_extract_specific_relations.return_value = [] + mock_extract_specific_relations.return_value = ["relation1"] mock_parse_data.return_value = [] - mock_prompt_llm.return_value = {} + mock_prompt_llm.return_value = [] mock_send_to_db.side_effect = Exception() data = dict() @@ -57,7 +57,7 @@ def test_handle_post_request_raises_exception_if_send_to_db_fail(self, mock_extr mock_extract_specific_relations.assert_called_once() mock_parse_data.assert_called_once() - mock_prompt_llm.assert_called_once() + mock_prompt_llm.assert_called() mock_send_to_db.assert_called_once() class TestParseData(unittest.TestCase): diff --git a/test/test_relation_extraction/test_llm_messenger.py b/test/test_relation_extraction/test_llm_messenger.py index 4301025..cdec19b 100644 --- a/test/test_relation_extraction/test_llm_messenger.py +++ b/test/test_relation_extraction/test_llm_messenger.py @@ -16,7 +16,7 @@ def test_process_message(self): { "choices": [ { - "text":"[INST] Barack Obama is married to Michelle Obama. [/INST] In this sentence the triples are: and " + "text":'[INST] Barack Obama is married to Michelle Obama. [/INST] In this sentence the triples are: <"Barack Obama", married, "Michelle Obama"> and <"Michelle Obama", married, "Barack Obama">' } ], "expected": [ @@ -35,7 +35,7 @@ def test_process_message(self): { "choices": [ { - "text":"[INST] Peter and Marianne has the same mother. [/INST] In this sentence the triples are: and " + "text":'[INST] Peter and Marianne has the same mother. [/INST] In this sentence the triples are: <"Peter", sibling, "Marianne"> and <"Marianne", sibling, "Peter">' } ], "expected":[ @@ -136,7 +136,7 @@ def test_prompt_llm(self, mock_check_validity, mock_process_message, mock_send_r "response": { "choices": [ { - "text":"[INST] Barack Obama is married to Michelle Obama. [/INST] In this sentence the triples are: and " + "text":'[INST] Barack Obama is married to Michelle Obama. [/INST] In this sentence the triples are: <"Barack Obama", married, "Michelle Obama"> and <"Michelle Obama", married, "Barack Obama">' } ], }, From 4ce9e514e9f45884bdb26bb47e61ff6cc3dc2b3c Mon Sep 17 00:00:00 2001 From: Jonas Geertsen Lund Date: Thu, 7 Dec 2023 14:51:34 +0100 Subject: [PATCH 13/19] Update docker run command in README (added name to docker container) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0d728e1..96c69d9 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Note that the ports map to the ports used in the ssh command give in "your port" Deployment is normally handled by Watchtower on push to main. However, in case of the need of manual deployment, run: -`sudo docker run -p 0.0.0.0:4444: --add-host=host.docker.internal:host-gateway -e API_SECRET=*** -e ACCESS_SECRET=*** -d ghcr.io/knox-aau/preprocessinglayer_tripleconstruction:main` +`docker run --name tc_api -p 0.0.0.0:4444: --add-host=host.docker.internal:host-gateway -e API_SECRET=*** -e ACCESS_SECRET=*** -d ghcr.io/knox-aau/preprocessinglayer_tripleconstruction:main` ### Access through access API endpoint From c35a4a90c7672550b27026eb6c6c54b3c0043488 Mon Sep 17 00:00:00 2001 From: Rasmus Date: Thu, 7 Dec 2023 14:52:26 +0100 Subject: [PATCH 14/19] Tests are now correct again after changes --- .../multilingual/llm_messenger.py | 31 +++++++++++++++++-- relation_extraction/multilingual/main.py | 2 +- .../test_llm_messenger.py | 9 ++++-- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index 15437a7..38cc2d5 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -1,6 +1,7 @@ from relation_extraction.API_handler import APIHandler import requests import re +from llama_cpp import Llama class LLMMessenger(APIHandler): @@ -8,8 +9,32 @@ def API_endpoint(): return "http://knox-proxy01.srv.aau.dk/llama-api/llama" def send_request(request): - response = requests.post(url=LLMMessenger.API_endpoint, json=request) - return response + + # Put the location of to the GGUF model that you've download from HuggingFace here + model_path = "./relation_extraction/multilingual/llama-2-7b-chat.Q2_K.gguf" + + # Create a llama model + model = Llama(model_path=model_path, n_ctx=4096) + + prompt = f"""[INST] <> + {request["system_message"]} + <> + {request["user_message"]} [/INST]""" + + # Model parameters + max_tokens = 4096 + + # Run the model + output = model(prompt, max_tokens=max_tokens, echo=True) + + # Print the model output + # print(output["choices"][0]["text"]) + # with open("LlamaResponse.txt", "w") as file: + # # Write content to the file + # file.write(output["choices"][0]["text"]) + + #response = requests.post(url=LLMMessenger.API_endpoint, json=request) + return output def process_message(response): print("Recieved response from Llama2...") @@ -17,7 +42,7 @@ def process_message(response): answer = re.split("/INST]", response["choices"][0]["text"])[1] llama_triples = re.findall('<["\s\w\d,"]*,[\s\w\d]*,["\s\w\d,"]*>|\[["\s\w\d,"]*,[\s\w\d]*,["\s\w\d,"]*\]', answer) for llama_triple in llama_triples: - triple = re.split('"."', llama_triple.replace("<", "").replace(">", "").replace("]", "").replace("[", "")) + triple = re.split('"', llama_triple.replace("<", "").replace(">", "").replace("]", "").replace("[", ""))[1:-1] if len(triple) == 3: triple_object = {} for i, entry in enumerate(triple): diff --git a/relation_extraction/multilingual/main.py b/relation_extraction/multilingual/main.py index ca069e4..cd545ab 100644 --- a/relation_extraction/multilingual/main.py +++ b/relation_extraction/multilingual/main.py @@ -33,7 +33,7 @@ def begin_relation_extraction(data): chunk_size = 650 split_relations = [relations[i:i + chunk_size] for i in range(0, len(relations), chunk_size)] #Split the relations into lists of size chunk_size for split_relation in split_relations: - triples.append(LLMMessenger.prompt_llm(parsed_data, split_relation, relations)) + triples.extend(LLMMessenger.prompt_llm(parsed_data, split_relation, relations)) except Exception as E: print(f"Exception during prompt to Llama 2: {str(E)}") raise Exception("Exception during prompt to Llama 2") diff --git a/test/test_relation_extraction/test_llm_messenger.py b/test/test_relation_extraction/test_llm_messenger.py index cdec19b..3460e9d 100644 --- a/test/test_relation_extraction/test_llm_messenger.py +++ b/test/test_relation_extraction/test_llm_messenger.py @@ -131,6 +131,7 @@ def test_check_validity_of_response(self): @mock.patch("relation_extraction.multilingual.llm_messenger.LLMMessenger.check_validity_of_response") def test_prompt_llm(self, mock_check_validity, mock_process_message, mock_send_request): relations = ["married", "sibling", "child", "parent"] + split_relations = [["married", "sibling", "child", "parent"]] testdata = [ { "response": { @@ -188,12 +189,13 @@ def test_prompt_llm(self, mock_check_validity, mock_process_message, mock_send_r "entityMentions": [ { "name": "Barack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, - { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama" } + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michelle_Obama" } ] } ] } ], + "split_relations": split_relations, "relations": relations } ] @@ -202,8 +204,11 @@ def test_prompt_llm(self, mock_check_validity, mock_process_message, mock_send_r mock_send_request.return_value = td["response"] mock_process_message.return_value = td["process_response"] mock_check_validity.return_value = td["validity_response"] - res = LLMMessenger.prompt_llm(td["data"], td["relations"]) + res = [] + for split_relation in td["split_relations"]: + res.extend(LLMMessenger.prompt_llm(td["data"], split_relation, td["relations"])) for triple in res: + print(triple) self.assertEqual(len(triple), 3) #All must be triples self.assertEqual(td["expected"], res) From 30ed73b0017fc0877bc1ccc99cefae0fdd71efbc Mon Sep 17 00:00:00 2001 From: Rasmus Date: Thu, 7 Dec 2023 14:59:16 +0100 Subject: [PATCH 15/19] Llama 2 on the server is now accessed through the Access API --- .../multilingual/llm_messenger.py | 31 +++---------------- relation_extraction/multilingual/main.py | 21 ++++++++++++- 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index 38cc2d5..eab4f04 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -1,7 +1,7 @@ from relation_extraction.API_handler import APIHandler import requests import re -from llama_cpp import Llama +import os class LLMMessenger(APIHandler): @@ -9,32 +9,9 @@ def API_endpoint(): return "http://knox-proxy01.srv.aau.dk/llama-api/llama" def send_request(request): - - # Put the location of to the GGUF model that you've download from HuggingFace here - model_path = "./relation_extraction/multilingual/llama-2-7b-chat.Q2_K.gguf" - - # Create a llama model - model = Llama(model_path=model_path, n_ctx=4096) - - prompt = f"""[INST] <> - {request["system_message"]} - <> - {request["user_message"]} [/INST]""" - - # Model parameters - max_tokens = 4096 - - # Run the model - output = model(prompt, max_tokens=max_tokens, echo=True) - - # Print the model output - # print(output["choices"][0]["text"]) - # with open("LlamaResponse.txt", "w") as file: - # # Write content to the file - # file.write(output["choices"][0]["text"]) - - #response = requests.post(url=LLMMessenger.API_endpoint, json=request) - return output + HEADERS = {"Access-Authorization": os.getenv("ACCESS_SECRET")} + response = requests.post(url=LLMMessenger.API_endpoint(), json=request, headers=HEADERS) + return response def process_message(response): print("Recieved response from Llama2...") diff --git a/relation_extraction/multilingual/main.py b/relation_extraction/multilingual/main.py index cd545ab..48244e1 100644 --- a/relation_extraction/multilingual/main.py +++ b/relation_extraction/multilingual/main.py @@ -43,4 +43,23 @@ def begin_relation_extraction(data): except Exception as E: print(f"Exception during request to database. {str(E)}") raise Exception("Data was not sent to database due to connection error") - \ No newline at end of file + +begin_relation_extraction([ + { + "filename": "path/to/Artikel.txt", + "language": "en", + "sentences": [ + { + "sentence": "Barrack Obama is married to Michelle Obama.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama" } + ] + } + ] + } +] +) \ No newline at end of file From 49d8d8b3d58f6b76d97965adb0cbdff0c121fe2d Mon Sep 17 00:00:00 2001 From: Rasmus Date: Thu, 7 Dec 2023 15:05:40 +0100 Subject: [PATCH 16/19] Removed function call that was only used to see Llama 2's response --- relation_extraction/multilingual/main.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/relation_extraction/multilingual/main.py b/relation_extraction/multilingual/main.py index 48244e1..38c9d8d 100644 --- a/relation_extraction/multilingual/main.py +++ b/relation_extraction/multilingual/main.py @@ -43,23 +43,3 @@ def begin_relation_extraction(data): except Exception as E: print(f"Exception during request to database. {str(E)}") raise Exception("Data was not sent to database due to connection error") - -begin_relation_extraction([ - { - "filename": "path/to/Artikel.txt", - "language": "en", - "sentences": [ - { - "sentence": "Barrack Obama is married to Michelle Obama.", - "sentenceStartIndex": 20, - "sentenceEndIndex": 62, - "entityMentions": - [ - { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, - { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama" } - ] - } - ] - } -] -) \ No newline at end of file From 204b46360fcdb9edc8814875586c2bc35ba57949 Mon Sep 17 00:00:00 2001 From: Xaniah <54134770+Xaniah@users.noreply.github.com> Date: Thu, 7 Dec 2023 15:10:33 +0100 Subject: [PATCH 17/19] Update llm_messenger.py --- .../multilingual/llm_messenger.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index eab4f04..1408f91 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -11,6 +11,24 @@ def API_endpoint(): def send_request(request): HEADERS = {"Access-Authorization": os.getenv("ACCESS_SECRET")} response = requests.post(url=LLMMessenger.API_endpoint(), json=request, headers=HEADERS) + + # # Put the location of to the GGUF model that you've download from HuggingFace here + # model_path = "./relation_extraction/multilingual/llama-2-7b-chat.Q2_K.gguf" + + # # Create a llama model + # model = Llama(model_path=model_path, n_ctx=4096) + + # prompt = f"""[INST] <> + # {request["system_message"]} + # <> + # {request["user_message"]} [/INST]""" + + # # Model parameters + # max_tokens = 4096 + + # # Run the model + # output = model(prompt, max_tokens=request["max_tokens"], echo=True) + return response def process_message(response): From e66b29f8b55489ac4911846ebc81a1a4a23ec4d9 Mon Sep 17 00:00:00 2001 From: Xaniah <54134770+Xaniah@users.noreply.github.com> Date: Thu, 7 Dec 2023 15:19:24 +0100 Subject: [PATCH 18/19] Update llm_messenger.py --- relation_extraction/multilingual/llm_messenger.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index 1408f91..9426f9d 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -23,9 +23,6 @@ def send_request(request): # <> # {request["user_message"]} [/INST]""" - # # Model parameters - # max_tokens = 4096 - # # Run the model # output = model(prompt, max_tokens=request["max_tokens"], echo=True) From eaff98c6ff9fb87210cbeeb020877817b30e13cc Mon Sep 17 00:00:00 2001 From: Rasmus Date: Thu, 7 Dec 2023 15:24:43 +0100 Subject: [PATCH 19/19] Relations are now IRIs instead of literals --- relation_extraction/multilingual/llm_messenger.py | 2 +- test/test_relation_extraction/test_llm_messenger.py | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index 9426f9d..249c964 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -47,7 +47,7 @@ def check_validity_of_response(sentence, response, relations): valid_entity_mentions = [em["name"] for em in sentence["entityMentions"]] for triple in response: if triple["0"] in valid_entity_mentions and triple["1"] in relations and triple["2"] in valid_entity_mentions: # 0 = subject, 1 = predicate, and 2 = object - triples.append([[em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["0"]][0], triple["1"], [em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["2"]][0]]) + triples.append([[em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["0"]][0], f'http://dbpedia.org/ontology/{triple["1"]}', [em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["2"]][0]]) return triples def prompt_llm(data, split_relations, relations): diff --git a/test/test_relation_extraction/test_llm_messenger.py b/test/test_relation_extraction/test_llm_messenger.py index 3460e9d..d0d59fc 100644 --- a/test/test_relation_extraction/test_llm_messenger.py +++ b/test/test_relation_extraction/test_llm_messenger.py @@ -110,12 +110,12 @@ def test_check_validity_of_response(self): "expected": [ [ "knox-kb01.srv.aau.dk/Barack_Obama", - "married", + "http://dbpedia.org/ontology/married", "knox-kb01.srv.aau.dk/Michelle_Obama" ], [ "knox-kb01.srv.aau.dk/Michelle_Obama", - "married", + "http://dbpedia.org/ontology/married", "knox-kb01.srv.aau.dk/Barack_Obama" ], ] @@ -156,24 +156,24 @@ def test_prompt_llm(self, mock_check_validity, mock_process_message, mock_send_r "validity_response": [ [ "knox-kb01.srv.aau.dk/Barack_Obama", - "married", + "http://dbpedia.org/ontology/married", "knox-kb01.srv.aau.dk/Michelle_Obama" ], [ "knox-kb01.srv.aau.dk/Michelle_Obama", - "married", + "http://dbpedia.org/ontology/married", "knox-kb01.srv.aau.dk/Barack_Obama" ], ], "expected": [ [ "knox-kb01.srv.aau.dk/Barack_Obama", - "married", + "http://dbpedia.org/ontology/married", "knox-kb01.srv.aau.dk/Michelle_Obama" ], [ "knox-kb01.srv.aau.dk/Michelle_Obama", - "married", + "http://dbpedia.org/ontology/married", "knox-kb01.srv.aau.dk/Barack_Obama" ], ], @@ -208,7 +208,6 @@ def test_prompt_llm(self, mock_check_validity, mock_process_message, mock_send_r for split_relation in td["split_relations"]: res.extend(LLMMessenger.prompt_llm(td["data"], split_relation, td["relations"])) for triple in res: - print(triple) self.assertEqual(len(triple), 3) #All must be triples self.assertEqual(td["expected"], res)