-
Notifications
You must be signed in to change notification settings - Fork 0
/
predicate_extractor_MeGaNN.py
56 lines (39 loc) · 1.58 KB
/
predicate_extractor_MeGaNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from argparse import ArgumentParser
import os
from os.path import splitext
def extract_predicates(file, dataset_name, predicates_folder):
# Read the corresponding file
inputFile = open(file, "r")
Lines = inputFile.readlines()
# Get file name and extension
fname, fext = splitext(file)
# Define output file
output_file_name = predicates_folder + "/" + dataset_name + "_predicates.csv"
outputFile = open(output_file_name, "w")
#Initialise set of predicates
unaryPredicates = set()
binaryPredicates = set()
# Filter by extension
for line in Lines:
# Mine the three entities.
ent1, ent2, ent3 = line.split(None,2)
if ent2 == "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>" :
# Remove end of line character.
if ent3.endswith('\n'):
ent3 = ent3[:-1]
# Check that predicate has not been seen already, otherwise do nothing.
if ent3 not in unaryPredicates:
unaryPredicates.add(ent3)
outputFile.write(ent3 + ",1" + "\n")
else :
# Check that predicate has not been seen already, otherwise do nothing.
if ent2 not in binaryPredicates:
binaryPredicates.add(ent2)
outputFile.write(ent2 + ",2" + "\n")
return output_file_name
if __name__ == '__main__':
# Read the argument from command line
parser = ArgumentParser()
parser.add_argument("input")
args = parser.parse_args()
extract_predicates(args.input, os.path.basename(args.input), '.')