diff --git a/README.md b/README.md index 5e57244..eac54ed 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Note that the ports map to the ports used in the ssh command give in "your port" Deployment is normally handled by Watchtower on push to main. However, in case of the need of manual deployment, run: -`sudo docker run -p 0.0.0.0:4444: --add-host=host.docker.internal:host-gateway -e API_SECRET=*** -e ACCESS_SECRET=*** -d ghcr.io/knox-aau/preprocessinglayer_tripleconstruction:main` +`docker run --name tc_api -p 0.0.0.0:4444: --add-host=host.docker.internal:host-gateway -e API_SECRET=*** -e ACCESS_SECRET=*** -d ghcr.io/knox-aau/preprocessinglayer_tripleconstruction:main` ### Access through access API endpoint diff --git a/relation_extraction/evaluation/evaluation.py b/relation_extraction/evaluation/evaluation.py index 52e2ec7..ed817b7 100644 --- a/relation_extraction/evaluation/evaluation.py +++ b/relation_extraction/evaluation/evaluation.py @@ -3,13 +3,14 @@ from relation_extraction.ontology_messenger import OntologyMessenger from relation_extraction.LessNaive.lessNaive import do_relation_extraction from relation_extraction.NaiveMVP.main import parse_data +from relation_extraction.multilingual.llm_messenger import LLMMessenger import re import datetime import json -def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 3, length = 100, fill = '█', printEnd = "\r"): +def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 3, length = 100, fill = '█', printEnd = "\n"): percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) filledLength = int(length * iteration // total) bar = fill * filledLength + '-' * (length - filledLength) @@ -60,7 +61,8 @@ def main(): solutions_to_test = { # "less_naive": do_relation_extraction - "naive": parse_data + # "naive": parse_data + "multilingual": LLMMessenger.prompt_llm } evaluation_results = dict() #dictionary to hold results of tests for name, solution in solutions_to_test.items(): @@ -93,7 +95,11 @@ def main(): ] }] - res = solution(input_obj, ontology_relations) + chunk_size = 650 + split_relations = [ontology_relations[i:i + chunk_size] for i in range(0, len(ontology_relations), chunk_size)] #Split the relations into lists of size chunk_size + res = [] + for split_relation in split_relations: + res.append(solution(input_obj, split_relation, ontology_relations)) res_hits = 0 for triple in res: if triple in expected_triples: diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index 6c50835..249c964 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -1,88 +1,90 @@ from relation_extraction.API_handler import APIHandler import requests -from llama_cpp import Llama +import re +import os class LLMMessenger(APIHandler): def API_endpoint(): - return "" + return "http://knox-proxy01.srv.aau.dk/llama-api/llama" def send_request(request): + HEADERS = {"Access-Authorization": os.getenv("ACCESS_SECRET")} + response = requests.post(url=LLMMessenger.API_endpoint(), json=request, headers=HEADERS) - # Put the location of to the GGUF model that you've download from HuggingFace here - model_path = "llama-2-7b-chat.Q2_K.gguf" + # # Put the location of to the GGUF model that you've download from HuggingFace here + # model_path = "./relation_extraction/multilingual/llama-2-7b-chat.Q2_K.gguf" - # Create a llama model - #model = Llama(model_path=model_path, n_ctx=4092) + # # Create a llama model + # model = Llama(model_path=model_path, n_ctx=4096) - # Prompt creation - system_message = """### Instruction ### - When given a sentence and the entity mentions in the sentence, you should perform relation extraction. This includes marking an entity mention as subject, marking another entity mention as object, and identifying the relation between the subject and object. You should only use entity mentions specified in the prompt. You should only use relations from the list of relations given in the context. + # prompt = f"""[INST] <> + # {request["system_message"]} + # <> + # {request["user_message"]} [/INST]""" - ### Context ### - List of relations: [location, birthPlace, deathPlace, owns, sibling, child, parent, title, employer, age, residence, headquarter, deathCause, member, foundedBy, religion] - - ### Input Data ### - You should perform relation extraction when prompted with input on the following format: - "sentence", [comma_separated_list_of_entity_mentions] - - ### Output Indicator ### - If no relation can be found in the sentence, or the entity mentions have not been specified in the user prompt, you should respond with "undefined". In all other cases, your output should be a list of triples on the following format: - - - """ - user_message = '"Casper and Rytter has the same mother", [Casper, Rytter]' - - prompt = f"""[INST] <> - {system_message} - <> - {user_message} [/INST]""" - - # Model parameters - max_tokens = 4092 - - # Run the model - output = model(prompt, max_tokens=max_tokens, echo=True) - - # Print the model output - # print(output["choices"][0]["text"]) - # with open("LlamaResponse.txt", "w") as file: - # # Write content to the file - # file.write(output["choices"][0]["text"]) - - #response = requests.post(url=LLMMessenger.API_endpoint) - return output + # # Run the model + # output = model(prompt, max_tokens=request["max_tokens"], echo=True) + + return response def process_message(response): print("Recieved response from Llama2...") - print(response) - - - def costruct_prompt_message(data): - system_message = """### Instruction ### - When given a sentence and the entity mentions in the sentence, you should perform relation extraction. This includes marking an entity mention as subject, marking another entity mention as object, and identifying the relation between the subject and object. You should only use entity mentions specified in the prompt. You should only use relations from the list of relations given in the context. - - ### Context ### - List of relations: [location, birthPlace, deathPlace, owns, sibling, child, parent, title, employer, age, residence, headquarter, deathCause, member, foundedBy, religion] - - ### Input Data ### - You should perform relation extraction when prompted with input on the following format: - "sentence", [comma_separated_list_of_entity_mentions] - - ### Output Indicator ### - If no relation can be found in the sentence, or the entity mentions have not been specified in the user prompt, you should respond with "undefined". In all other cases, your output should be a list of triples on the following format: - + triples = [] + answer = re.split("/INST]", response["choices"][0]["text"])[1] + llama_triples = re.findall('<["\s\w\d,"]*,[\s\w\d]*,["\s\w\d,"]*>|\[["\s\w\d,"]*,[\s\w\d]*,["\s\w\d,"]*\]', answer) + for llama_triple in llama_triples: + triple = re.split('"', llama_triple.replace("<", "").replace(">", "").replace("]", "").replace("[", ""))[1:-1] + if len(triple) == 3: + triple_object = {} + for i, entry in enumerate(triple): + triple_object[i.__str__()] = entry.strip(' ,') + triples.append(triple_object) + return triples + + def check_validity_of_response(sentence, response, relations): + triples = [] + valid_entity_mentions = [em["name"] for em in sentence["entityMentions"]] + for triple in response: + if triple["0"] in valid_entity_mentions and triple["1"] in relations and triple["2"] in valid_entity_mentions: # 0 = subject, 1 = predicate, and 2 = object + triples.append([[em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["0"]][0], f'http://dbpedia.org/ontology/{triple["1"]}', [em["iri"] for em in sentence["entityMentions"] if em["name"] == triple["2"]][0]]) + return triples + + def prompt_llm(data, split_relations, relations): + triples = [] + system_message = f"""### Instruction ### +When given a sentence in either danish or english and the entity mentions in the sentence, you should find triples by performing relation extraction. This includes marking an entity mention as subject, marking another entity mention as object, and identifying the relation between the subject and object. You should only use entity mentions specified in the prompt. You should only use relations from the list of relations given in the context. You should provide reasoning for why each of the triples you find is correct. +S +### Context ### +List of relations: [{", ".join(split_relations)}] +Here is a transcript with you. You are called Llama. +User: Sentence: "Aalborg is in Denmark" Entity mentions: ["Aalborg", "Denmark"] +Llama: The relation "is in" is not in the list of relations but "location" is in the list of relations. "Aalborg is in Denmark" implies that Aalborg is located in Denmark. Therefore, the triple <"Aalborg", location, "Denmark"> is correct. +User: Sentence: "Peter has a subscription to Pure Gym" Entity mentions: ["Peter", "Pure Gym"] +Llama: The relation "subscription" is not in the list of relations, but "member" is in the list of relations. "Peter has a subscription to Pure Gym" implies that Peter is a member of Pure Gym. Therefore, the triple <"Peter", member, "Pure Gym"> is correct. +User: Sentence: "Martin Eberhard and Marc Tarpenning are the original founders of Tesla" Entity mentions: ["Martin Eberhard", "Marc Tarpenning", "Tesla"] +Llama: The sentence states that Tesla was founded by both Martin Eberhard and Marc Tarpenning. The relation "foundedBy" is in the list of relations. Therefore, the two triples <"Tesla", foundedBy, "Martin Eberhard"> and <"Tesla", foundedBy, "Marc Tarpenning"> are correct. +User: Sentence: "Sofie was born in Kolding" Entity mentions: ["Sofie", "Kolding"] +Llama: The relation "born in" is not in the list of relations. But "born in" implies a place of birth, and "birthPlace" is in the list of relations. Therefore, the triple <"Sofie", birthPlace, "Kolding"> is correct. +User: Sentence: "Frederik is the father of Christian" Entity mentions: ["Frederik", "Christian"] +Llama: The relation "father" is not in the list of relations. However, a father is a parent and "parent" is in the list of relations. Therefore, the triple <"Frederik", parent, "Christian"> is correct. + +### Output Indicator ### +Before answering with a triple, you should explain why it is correct. If no relation can be found in the sentence, or the entity mentions have not been specified in the user prompt, you should respond with “undefined”. In all other cases, your output should be triples on the format and an explanation for each triple. """ - request = {"system_message": system_message, "user_message": ""} + request = {"system_message": system_message, "user_message": "", "max_tokens": 4096} for file in data: for sentence in file["sentences"]: - user_message = f'"{sentence["sentence"]}", [' + user_message = f'Sentence: "{sentence["sentence"]}" Entity mentions: [' for em in sentence["entityMentions"]: - user_message += f"{em['name']}, " - user_message = user_message[:-2] + ']' #Remove comma and space after last entity mention + user_message += f'"{em["name"]}", ' + user_message = user_message[:-2] + ']' #Remove comma and space after last entity mention in message request["user_message"] = user_message response = LLMMessenger.send_request(request) - LLMMessenger.process_message(response) + process_response = LLMMessenger.process_message(response) + triples = LLMMessenger.check_validity_of_response(sentence, process_response, relations) + return triples + diff --git a/relation_extraction/multilingual/main.py b/relation_extraction/multilingual/main.py index 95dff90..38c9d8d 100644 --- a/relation_extraction/multilingual/main.py +++ b/relation_extraction/multilingual/main.py @@ -4,7 +4,7 @@ from relation_extraction.multilingual.llm_messenger import LLMMessenger def parse_data(data): - "Parses JSON data and converts it into a dictionary with information on sentence, tokens, and entity mentions" + "Removes entity mentions with no iri and sentences with less than two entity mentions" for file in data: for i, sentence in enumerate(file["sentences"]): @@ -29,7 +29,11 @@ def begin_relation_extraction(data): raise Exception("Incorrectly formatted input. Exception during parsing") try: - triples = LLMMessenger.costruct_prompt_message(parsed_data) + triples = [] + chunk_size = 650 + split_relations = [relations[i:i + chunk_size] for i in range(0, len(relations), chunk_size)] #Split the relations into lists of size chunk_size + for split_relation in split_relations: + triples.extend(LLMMessenger.prompt_llm(parsed_data, split_relation, relations)) except Exception as E: print(f"Exception during prompt to Llama 2: {str(E)}") raise Exception("Exception during prompt to Llama 2") @@ -39,28 +43,3 @@ def begin_relation_extraction(data): except Exception as E: print(f"Exception during request to database. {str(E)}") raise Exception("Data was not sent to database due to connection error") - - -def test(): - begin_relation_extraction(data= - [ - { - "filename": "path/to/Artikel.txt", - "language": "en", - "sentences": [ - { - "sentence": "Barrack Obama is married to Michelle Obama.", - "sentenceStartIndex": 20, - "sentenceEndIndex": 62, - "entityMentions": - [ - { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, - { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama" } - ] - } - ] - } - ] - ) - -test() \ No newline at end of file diff --git a/relation_extraction/relation_extractor.py b/relation_extraction/relation_extractor.py index 3db32ae..7a333fd 100644 --- a/relation_extraction/relation_extractor.py +++ b/relation_extraction/relation_extractor.py @@ -1,7 +1,9 @@ from relation_extraction.NaiveMVP.main import handle_relation_post_request +from relation_extraction.multilingual.main import begin_relation_extraction class RelationExtractor(): @classmethod def begin_extraction(self, data): handle_relation_post_request(data) + begin_relation_extraction(data) diff --git a/test/test_concept_linking/__init__.py b/test/test_concept_linking/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/test_relation_extraction/__init__.py b/test/test_relation_extraction/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/test_server/test_get_relations.py b/test/test_relation_extraction/test_get_relations.py similarity index 100% rename from test/test_server/test_get_relations.py rename to test/test_relation_extraction/test_get_relations.py diff --git a/test/test_relation_extraction/test_llama_relation_extractor.py b/test/test_relation_extraction/test_llama_relation_extractor.py new file mode 100644 index 0000000..021e004 --- /dev/null +++ b/test/test_relation_extraction/test_llama_relation_extractor.py @@ -0,0 +1,159 @@ +import unittest +from unittest import mock +from relation_extraction.multilingual.main import * + +class TestHandleRelationPostRequest(unittest.TestCase): + @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') + def test_handle_post_request_raises_exception_if_relations_fail(self, mock_extract_specific_relations): + mock_extract_specific_relations.side_effect = Exception() + data = dict() + with self.assertRaises(Exception): + begin_relation_extraction(data) + + mock_extract_specific_relations.assert_called_once() + + @mock.patch('relation_extraction.multilingual.main.parse_data') + @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') + def test_handle_post_request_raises_exception_if_parse_fail(self, mock_extract_specific_relations, mock_parse_data): + mock_extract_specific_relations.return_value = [] + mock_parse_data.side_effect = Exception() + + data = dict() + with self.assertRaises(Exception): + begin_relation_extraction(data) + + mock_extract_specific_relations.assert_called_once() + mock_parse_data.assert_called_once() + + @mock.patch('relation_extraction.multilingual.llm_messenger.LLMMessenger.prompt_llm') + @mock.patch('relation_extraction.multilingual.main.parse_data') + @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') + def test_handle_post_request_raises_exception_if_prompt_llm_fail(self, mock_extract_specific_relations, mock_parse_data, mock_prompt_llm): + mock_extract_specific_relations.return_value = ["relation1"] + mock_parse_data.return_value = [] + mock_prompt_llm.side_effect = Exception() + + data = dict() + with self.assertRaises(Exception): + begin_relation_extraction(data) + + mock_extract_specific_relations.assert_called_once() + mock_parse_data.assert_called_once() + mock_prompt_llm.assert_called() + + @mock.patch('relation_extraction.knowledge_graph_messenger.KnowledgeGraphMessenger.send_request') + @mock.patch('relation_extraction.multilingual.llm_messenger.LLMMessenger.prompt_llm') + @mock.patch('relation_extraction.multilingual.main.parse_data') + @mock.patch('relation_extraction.ontology_messenger.OntologyMessenger.send_request') + def test_handle_post_request_raises_exception_if_send_to_db_fail(self, mock_extract_specific_relations, mock_parse_data, mock_prompt_llm, mock_send_to_db): + mock_extract_specific_relations.return_value = ["relation1"] + mock_parse_data.return_value = [] + mock_prompt_llm.return_value = [] + mock_send_to_db.side_effect = Exception() + + data = dict() + with self.assertRaises(Exception): + begin_relation_extraction(data) + + mock_extract_specific_relations.assert_called_once() + mock_parse_data.assert_called_once() + mock_prompt_llm.assert_called() + mock_send_to_db.assert_called_once() + +class TestParseData(unittest.TestCase): + def test_parse_remove_ems_without_iri(self): + data = [ + { + "filename": "path/to/Artikel.txt", + "language": "en", + "sentences": [ + { + "sentence": "Barrack Obama is married to Michelle Obama and they have a dog.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama"}, + { "name": "Dog", "startIndex": 27, "endIndex": 40, "iri": None} + ] + } + ] + } + ] + res = parse_data(data) + + expected = [ + { + "filename": "path/to/Artikel.txt", + "language": "en", + "sentences": [ + { + "sentence": "Barrack Obama is married to Michelle Obama and they have a dog.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama"}, + ] + } + ] + } + ] + + self.assertEqual(res, expected) + + def test_parse_remove_sentences_with_lt_two_ems(self): + data = [ + { + "filename": "path/to/Artikel.txt", + "language": "en", + "sentences": [ + { + "sentence": "Barrack Obama is married to Michelle Obama and they have a dog.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + ] + }, + { + "sentence": "Barrack Obama is married to Michelle Obama and they have a dog.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama"}, + ] + } + ] + } + ] + res = parse_data(data) + + expected = [ + { + "filename": "path/to/Artikel.txt", + "language": "en", + "sentences": [ + { + "sentence": "Barrack Obama is married to Michelle Obama and they have a dog.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama"}, + ] + } + ] + } + ] + + self.assertEqual(res, expected) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_relation_extraction/test_llm_messenger.py b/test/test_relation_extraction/test_llm_messenger.py new file mode 100644 index 0000000..d0d59fc --- /dev/null +++ b/test/test_relation_extraction/test_llm_messenger.py @@ -0,0 +1,216 @@ +import unittest +from unittest import mock +from relation_extraction.multilingual.llm_messenger import * + +class TestProcessMessage(unittest.TestCase): + #Needs testing + def test_send_request(self): + testdata = [ + { + + } + ] + + def test_process_message(self): + testdata = [ + { + "choices": [ + { + "text":'[INST] Barack Obama is married to Michelle Obama. [/INST] In this sentence the triples are: <"Barack Obama", married, "Michelle Obama"> and <"Michelle Obama", married, "Barack Obama">' + } + ], + "expected": [ + { + "0":"Barack Obama", + "1":"married", + "2":"Michelle Obama" + }, + { + "0":"Michelle Obama", + "1":"married", + "2":"Barack Obama" + } + ] + }, + { + "choices": [ + { + "text":'[INST] Peter and Marianne has the same mother. [/INST] In this sentence the triples are: <"Peter", sibling, "Marianne"> and <"Marianne", sibling, "Peter">' + } + ], + "expected":[ + { + "0":"Peter", + "1":"sibling", + "2":"Marianne" + }, + { + "0":"Marianne", + "1":"sibling", + "2":"Peter" + } + ] + } + ] + + for td in testdata: + res = LLMMessenger.process_message(td) + self.assertEqual(res, td["expected"]) + + def test_process_message_wrong_format(self): + testdata = [ + { + "choices": [ + { + "text":"[INST] Barack Obama is married to Michelle Obama. [/INST] In this sentence the triples are: Subject: Barack Obama\n Relation: married\n Object: Michelle Obama" + } + ], + "expected": [] + }, + { + "choices": [ + { + "text":"[INST] Peter and Marianne has the same mother. [/INST] In this sentence the triples are: Subject: Peter\n Relation: sibling\n Object: Marianne" + } + ], + "expected":[] + } + ] + + for td in testdata: + res = LLMMessenger.process_message(td) + self.assertEqual(res, td["expected"]) + + def test_check_validity_of_response(self): + relations = ["married", "sibling", "child", "parent"] + testdata = [ + { + "Sentence": { + "sentence": "Barack Obama is married to Michelle Obama.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michelle_Obama" } + ] + }, + "response": [ + { + "0":"Barack Obama", + "1":"married", + "2":"Michelle Obama" + }, + { + "0":"Michelle Obama", + "1":"married", + "2":"Barack Obama" + } + ], + "expected": [ + [ + "knox-kb01.srv.aau.dk/Barack_Obama", + "http://dbpedia.org/ontology/married", + "knox-kb01.srv.aau.dk/Michelle_Obama" + ], + [ + "knox-kb01.srv.aau.dk/Michelle_Obama", + "http://dbpedia.org/ontology/married", + "knox-kb01.srv.aau.dk/Barack_Obama" + ], + ] + } + ] + + for td in testdata: + res = LLMMessenger.check_validity_of_response(td["Sentence"], td["response"], relations) + self.assertEqual(res, td["expected"]) + + @mock.patch("relation_extraction.multilingual.llm_messenger.LLMMessenger.send_request") + @mock.patch("relation_extraction.multilingual.llm_messenger.LLMMessenger.process_message") + @mock.patch("relation_extraction.multilingual.llm_messenger.LLMMessenger.check_validity_of_response") + def test_prompt_llm(self, mock_check_validity, mock_process_message, mock_send_request): + relations = ["married", "sibling", "child", "parent"] + split_relations = [["married", "sibling", "child", "parent"]] + testdata = [ + { + "response": { + "choices": [ + { + "text":'[INST] Barack Obama is married to Michelle Obama. [/INST] In this sentence the triples are: <"Barack Obama", married, "Michelle Obama"> and <"Michelle Obama", married, "Barack Obama">' + } + ], + }, + "process_response": [ + { + "0":"Barack Obama", + "1":"married", + "2":"Michelle Obama" + }, + { + "0":"Michelle Obama", + "1":"married", + "2":"Barack Obama" + } + ], + "validity_response": [ + [ + "knox-kb01.srv.aau.dk/Barack_Obama", + "http://dbpedia.org/ontology/married", + "knox-kb01.srv.aau.dk/Michelle_Obama" + ], + [ + "knox-kb01.srv.aau.dk/Michelle_Obama", + "http://dbpedia.org/ontology/married", + "knox-kb01.srv.aau.dk/Barack_Obama" + ], + ], + "expected": [ + [ + "knox-kb01.srv.aau.dk/Barack_Obama", + "http://dbpedia.org/ontology/married", + "knox-kb01.srv.aau.dk/Michelle_Obama" + ], + [ + "knox-kb01.srv.aau.dk/Michelle_Obama", + "http://dbpedia.org/ontology/married", + "knox-kb01.srv.aau.dk/Barack_Obama" + ], + ], + "data": [ + { + "language": "en", + "metadataId":"790261e8-b8ec-4801-9cbd-00263bcc666d", + "sentences": [ + { + "sentence": "Barack Obama is married to Michelle Obama.", + "sentenceStartIndex": 20, + "sentenceEndIndex": 62, + "entityMentions": + [ + { "name": "Barack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" }, + { "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michelle_Obama" } + ] + } + ] + } + ], + "split_relations": split_relations, + "relations": relations + } + ] + + for td in testdata: + mock_send_request.return_value = td["response"] + mock_process_message.return_value = td["process_response"] + mock_check_validity.return_value = td["validity_response"] + res = [] + for split_relation in td["split_relations"]: + res.extend(LLMMessenger.prompt_llm(td["data"], split_relation, td["relations"])) + for triple in res: + self.assertEqual(len(triple), 3) #All must be triples + self.assertEqual(td["expected"], res) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/test/test_server/test_output.py b/test/test_relation_extraction/test_output.py similarity index 100% rename from test/test_server/test_output.py rename to test/test_relation_extraction/test_output.py diff --git a/test/test_server/test_relation_extraction.py b/test/test_relation_extraction/test_relation_extraction.py similarity index 100% rename from test/test_server/test_relation_extraction.py rename to test/test_relation_extraction/test_relation_extraction.py