Skip to content

Commit

Permalink
Relations are now split up before prompting Llama
Browse files Browse the repository at this point in the history
  • Loading branch information
Rasmus authored and Rasmus committed Dec 7, 2023
1 parent cb829da commit d5c38ab
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 27 deletions.
33 changes: 7 additions & 26 deletions relation_extraction/multilingual/llm_messenger.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
class LLMMessenger(APIHandler):

def API_endpoint():
return ""
return "http://knox-func01.srv.aau.dk:5004/llama"

def send_request(request):

Expand All @@ -16,24 +16,6 @@ def send_request(request):
# Create a llama model
model = Llama(model_path=model_path, n_ctx=4096)

# Prompt creation
# system_message = """### Instruction ###
# When given a sentence and the entity mentions in the sentence, you should perform relation extraction. This includes marking an entity mention as subject, marking another entity mention as object, and identifying the relation between the subject and object. You should only use entity mentions specified in the prompt. You should only use relations from the list of relations given in the context.

# ### Context ###
# List of relations: [location, birthPlace, deathPlace, owns, sibling, child, parent, title, employer, age, residence, headquarter, deathCause, member, foundedBy, religion]

# ### Input Data ###
# You should perform relation extraction when prompted with input on the following format:
# "sentence", [comma_separated_list_of_entity_mentions]

# ### Output Indicator ###
# If no relation can be found in the sentence, or the entity mentions have not been specified in the user prompt, you should respond with "undefined". In all other cases, your output should be a list of triples on the following format:
# <subject, relation, object>

# """
# user_message = '"Casper and Rytter has the same mother", [Casper, Rytter]'

prompt = f"""<s>[INST] <<SYS>>
{request["system_message"]}
<</SYS>>
Expand All @@ -51,17 +33,18 @@ def send_request(request):
# # Write content to the file
# file.write(output["choices"][0]["text"])

#response = requests.post(url=LLMMessenger.API_endpoint)
#response = requests.post(url=LLMMessenger.API_endpoint, json=request)
return output

def process_message(response):
print("Recieved response from Llama2...")
triples = []
print(response)
answer = re.split("/INST]", response["choices"][0]["text"])[1]
print(response["choices"][0]["text"])
llama_triples = re.findall("<[\s\w\d]*,[\s\w\d]*,[\s\w\d]*>|\[[\s\w\d]*,[\s\w\d]*,[\s\w\d]*\]", answer)
llama_triples = re.findall('<["\s\w\d,"]*,[\s\w\d]*,["\s\w\d,"]*>|\[["\s\w\d,"]*,[\s\w\d]*,["\s\w\d,"]*\]', answer)
for llama_triple in llama_triples:
triple = re.split(",", llama_triple.replace("<", "").replace(">", "").replace("]", "").replace("[", ""))
triple = re.split('"."', llama_triple.replace("<", "").replace(">", "").replace("]", "").replace("[", ""))
if len(triple) == 3:
triple_object = {}
for i, entry in enumerate(triple):
Expand All @@ -79,13 +62,11 @@ def check_validity_of_response(sentence, response, relations):

def prompt_llm(data, relations):
triples = []
relations_test = ["spouse", "location", "birthPlace", "deathPlace", "owns", "sibling", "child", "parent", "title", "employer", "age", "residence", "headquarter", "deathCause", "member", "foundedBy", "religion"]
relations_text = "[" + ", ".join(["location", "birthPlace", "deathPlace", "owns", "sibling", "child", "parent", "title", "employer", "age", "residence", "headquarter", "deathCause", "member", "foundedBy", "religion"]) + "]"
system_message = f"""### Instruction ###
When given a sentence in either danish or english and the entity mentions in the sentence, you should find triples by performing relation extraction. This includes marking an entity mention as subject, marking another entity mention as object, and identifying the relation between the subject and object. You should only use entity mentions specified in the prompt. You should only use relations from the list of relations given in the context. You should provide reasoning for why each of the triples you find is correct.
S
### Context ###
List of relations: [spouse, location, birthPlace, deathPlace, owns, sibling, child, parent, title, employer, age, residence, headquarter, deathCause, member, foundedBy, religion]
List of relations: [{", ".join(relations)}]
Here is a transcript with you. You are called Llama.
User: Sentence: "Aalborg is in Denmark" Entity mentions: ["Aalborg", "Denmark"]
Llama: The relation "is in" is not in the list of relations but "location" is in the list of relations. "Aalborg is in Denmark" implies that Aalborg is located in Denmark. Therefore, the triple <"Aalborg", location, "Denmark"> is correct.
Expand All @@ -103,7 +84,7 @@ def prompt_llm(data, relations):
"""

request = {"system_message": system_message, "user_message": ""}
request = {"system_message": system_message, "user_message": "", "max_tokens": 4096}

for file in data:
for sentence in file["sentences"]:
Expand Down
26 changes: 25 additions & 1 deletion relation_extraction/multilingual/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,11 @@ def begin_relation_extraction(data):
raise Exception("Incorrectly formatted input. Exception during parsing")

try:
triples = LLMMessenger.prompt_llm(parsed_data, relations)
triples = []
chunk_size = 250
split_relations = [relations[i:i + chunk_size] for i in range(0, len(relations), chunk_size)] #Split the relations into lists of size chunk_size
for split_relation in split_relations:
triples.append(LLMMessenger.prompt_llm(parsed_data, split_relation))
except Exception as E:
print(f"Exception during prompt to Llama 2: {str(E)}")
raise Exception("Exception during prompt to Llama 2")
Expand All @@ -39,3 +43,23 @@ def begin_relation_extraction(data):
except Exception as E:
print(f"Exception during request to database. {str(E)}")
raise Exception("Data was not sent to database due to connection error")

begin_relation_extraction([
{
"filename": "path/to/Artikel.txt",
"language": "en",
"sentences": [
{
"sentence": "Barrack Obama is married to Michelle Obama.",
"sentenceStartIndex": 20,
"sentenceEndIndex": 62,
"entityMentions":
[
{ "name": "Barrack Obama", "startIndex": 0, "endIndex": 12, "iri": "knox-kb01.srv.aau.dk/Barack_Obama" },
{ "name": "Michelle Obama", "startIndex": 27, "endIndex": 40, "iri": "knox-kb01.srv.aau.dk/Michele_Obama" }
]
}
]
}
]
)

0 comments on commit d5c38ab

Please sign in to comment.