From 59693124cc3c984b0b94a3da503e3111440c0158 Mon Sep 17 00:00:00 2001 From: eiglesias34 Date: Fri, 19 Jul 2024 10:50:01 +0200 Subject: [PATCH] Fixing parser query and format organization --- README.md | 2 +- VERSION | 2 +- rdfizer/rdfizer/__init__.py | 13 +++-- rdfizer/rdfizer/functions.py | 95 ++++++++++++++++++------------------ rdfizer/rdfizer/semantify.py | 13 +++-- 5 files changed, 66 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index fd670f8..6a97f40 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ You can easily customize your own configurations from the set of features that S ## Version ``` -4.7.4.6 +4.7.4.7 ``` ## RML-Test Cases diff --git a/VERSION b/VERSION index b61fb71..3458fdc 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.7.4.6 \ No newline at end of file +4.7.4.7 \ No newline at end of file diff --git a/rdfizer/rdfizer/__init__.py b/rdfizer/rdfizer/__init__.py index 80fa02c..ad9f0b7 100755 --- a/rdfizer/rdfizer/__init__.py +++ b/rdfizer/rdfizer/__init__.py @@ -1630,7 +1630,8 @@ def mapping_parser(mapping_file): ?obj_dump void:dataDump ?object_dump.} } OPTIONAL { - ?_source a d2rq:Database; + ?_source rml:source ?db . + ?db a d2rq:Database; d2rq:jdbcDSN ?jdbcDSN; d2rq:jdbcDriver ?jdbcDriver; d2rq:username ?user; @@ -1751,7 +1752,7 @@ def mapping_parser(mapping_file): OPTIONAL { ?_object_map rr:joinCondition ?join_condition . ?join_condition rr:child ?child_value; - rr:parent ?parent_value; + rr:parent ?parent_value. } } OPTIONAL { @@ -1776,7 +1777,8 @@ def mapping_parser(mapping_file): ?_graph_structure rr:template ?predicate_object_graph . } } OPTIONAL { - ?_source a d2rq:Database; + ?_source rml:source ?db . + ?db a d2rq:Database; d2rq:jdbcDSN ?jdbcDSN; d2rq:jdbcDriver ?jdbcDriver; d2rq:username ?user; @@ -1840,6 +1842,7 @@ def mapping_parser(mapping_file): mapping_query_results = mapping_graph.query(mapping_query) for result_triples_map in mapping_query_results: + print(result_triples_map) triples_map_exists = False for triples_map in triples_map_list: triples_map_exists = triples_map_exists or ( @@ -1995,7 +1998,7 @@ def mapping_parser(mapping_file): result_predicate_object_map.datatype_value, "None") elif result_predicate_object_map.object_parent_triples_map != None: if predicate_map.value + " " + str(result_predicate_object_map.object_parent_triples_map) not in join_predicate: - if (result_predicate_object_map.child_function is None) and (result_predicate_object_map.parent_function is not None): + if (result_predicate_object_map.child_function is not None) and (result_predicate_object_map.parent_function is not None): join_predicate[ predicate_map.value + " " + str(result_predicate_object_map.object_parent_triples_map)] = { "predicate":predicate_map, @@ -2009,7 +2012,7 @@ def mapping_parser(mapping_file): "childs":[str(result_predicate_object_map.child_function)], "parents":[str(result_predicate_object_map.parent_value)], "triples_map":str(result_predicate_object_map.object_parent_triples_map)} - elif (result_predicate_object_map.child_function is not None) and (result_predicate_object_map.parent_function is not None): + elif (result_predicate_object_map.child_function is None) and (result_predicate_object_map.parent_function is not None): join_predicate[ predicate_map.value + " " + str(result_predicate_object_map.object_parent_triples_map)] = { "predicate":predicate_map, diff --git a/rdfizer/rdfizer/functions.py b/rdfizer/rdfizer/functions.py index a7c0f83..19ae72f 100644 --- a/rdfizer/rdfizer/functions.py +++ b/rdfizer/rdfizer/functions.py @@ -709,72 +709,73 @@ def files_sort(triples_map_list, ordered, config): else: source_predicate["XPath"][str(tp.data_source)] = {po.predicate_map.value : ""} else: - if "SPARQL" in tp.file_format: - if "csv" not in sorted_list: - if ".nt" in str(tp.data_source): - sorted_list["csv"] = {str(tp.data_source) : {tp.triples_map_id : tp}} - else: - sorted_list["csv"] = {"endpoint:" + str(tp.data_source) : {tp.triples_map_id : tp}} - else: - if ".nt" in str(tp.data_source): - if str(tp.data_source) in sorted_list["csv"]: - sorted_list["csv"][str(tp.data_source)][tp.triples_map_id] = tp + if tp.file_format != None: + if "SPARQL" in tp.file_format: + if "csv" not in sorted_list: + if ".nt" in str(tp.data_source): + sorted_list["csv"] = {str(tp.data_source) : {tp.triples_map_id : tp}} else: - sorted_list["csv"][str(tp.data_source)] = {tp.triples_map_id : tp} + sorted_list["csv"] = {"endpoint:" + str(tp.data_source) : {tp.triples_map_id : tp}} else: - if "endpoint:" + str(tp.data_source) in sorted_list["csv"]: - sorted_list["csv"]["endpoint:" + str(tp.data_source)][tp.triples_map_id] = tp - else: - sorted_list["csv"]["endpoint:" + str(tp.data_source)] = {tp.triples_map_id : tp} - for po in tp.predicate_object_maps_list: - if po.predicate_map.value in general_predicates: - predicate = po.predicate_map.value + "_" + po.object_map.value - if predicate in predicate_list: - predicate_list[predicate] += 1 - else: - predicate_list[predicate] = 1 - else: - if po.predicate_map.value in predicate_list: - predicate_list[po.predicate_map.value] += 1 + if ".nt" in str(tp.data_source): + if str(tp.data_source) in sorted_list["csv"]: + sorted_list["csv"][str(tp.data_source)][tp.triples_map_id] = tp + else: + sorted_list["csv"][str(tp.data_source)] = {tp.triples_map_id : tp} else: - predicate_list[po.predicate_map.value] = 1 - if "csv" not in source_predicate: + if "endpoint:" + str(tp.data_source) in sorted_list["csv"]: + sorted_list["csv"]["endpoint:" + str(tp.data_source)][tp.triples_map_id] = tp + else: + sorted_list["csv"]["endpoint:" + str(tp.data_source)] = {tp.triples_map_id : tp} + for po in tp.predicate_object_maps_list: if po.predicate_map.value in general_predicates: predicate = po.predicate_map.value + "_" + po.object_map.value - if ".nt" in str(tp.data_source): - source_predicate["csv"] = {str(tp.data_source) : {predicate : ""}} + if predicate in predicate_list: + predicate_list[predicate] += 1 else: - source_predicate["csv"] = {"endpoint:" + str(tp.data_source) : {predicate : ""}} + predicate_list[predicate] = 1 else: - if ".nt" in str(tp.data_source): - source_predicate["csv"] = {str(tp.data_source) : {po.predicate_map.value : ""}} + if po.predicate_map.value in predicate_list: + predicate_list[po.predicate_map.value] += 1 else: - source_predicate["csv"] = {"endpoint:" + str(tp.data_source) : {po.predicate_map.value : ""}} - else: - if str(tp.data_source) in source_predicate["csv"]: + predicate_list[po.predicate_map.value] = 1 + if "csv" not in source_predicate: if po.predicate_map.value in general_predicates: predicate = po.predicate_map.value + "_" + po.object_map.value if ".nt" in str(tp.data_source): - source_predicate["csv"][str(tp.data_source)][predicate] = "" + source_predicate["csv"] = {str(tp.data_source) : {predicate : ""}} else: - source_predicate["csv"]["endpoint:" + str(tp.data_source)][predicate] = "" + source_predicate["csv"] = {"endpoint:" + str(tp.data_source) : {predicate : ""}} else: if ".nt" in str(tp.data_source): - source_predicate["csv"][str(tp.data_source)][po.predicate_map.value] = "" + source_predicate["csv"] = {str(tp.data_source) : {po.predicate_map.value : ""}} else: - source_predicate["csv"]["endpoint:" + str(tp.data_source)][po.predicate_map.value] = "" + source_predicate["csv"] = {"endpoint:" + str(tp.data_source) : {po.predicate_map.value : ""}} else: - if po.predicate_map.value in general_predicates: - predicate = po.predicate_map.value + "_" + po.object_map.value - if ".nt" in str(tp.data_source): - source_predicate["csv"][str(tp.data_source)] = {predicate : ""} + if str(tp.data_source) in source_predicate["csv"]: + if po.predicate_map.value in general_predicates: + predicate = po.predicate_map.value + "_" + po.object_map.value + if ".nt" in str(tp.data_source): + source_predicate["csv"][str(tp.data_source)][predicate] = "" + else: + source_predicate["csv"]["endpoint:" + str(tp.data_source)][predicate] = "" else: - source_predicate["csv"]["endpoint:" + str(tp.data_source)] = {predicate : ""} + if ".nt" in str(tp.data_source): + source_predicate["csv"][str(tp.data_source)][po.predicate_map.value] = "" + else: + source_predicate["csv"]["endpoint:" + str(tp.data_source)][po.predicate_map.value] = "" else: - if ".nt" in str(tp.data_source): - source_predicate["csv"][str(tp.data_source)] = {po.predicate_map.value : ""} + if po.predicate_map.value in general_predicates: + predicate = po.predicate_map.value + "_" + po.object_map.value + if ".nt" in str(tp.data_source): + source_predicate["csv"][str(tp.data_source)] = {predicate : ""} + else: + source_predicate["csv"]["endpoint:" + str(tp.data_source)] = {predicate : ""} else: - source_predicate["csv"]["endpoint:" + str(tp.data_source)] = {po.predicate_map.value : ""} + if ".nt" in str(tp.data_source): + source_predicate["csv"][str(tp.data_source)] = {po.predicate_map.value : ""} + else: + source_predicate["csv"]["endpoint:" + str(tp.data_source)] = {po.predicate_map.value : ""} else: if tp.query == "None": if tp.iterator == "None": diff --git a/rdfizer/rdfizer/semantify.py b/rdfizer/rdfizer/semantify.py index ada4ae6..713654a 100755 --- a/rdfizer/rdfizer/semantify.py +++ b/rdfizer/rdfizer/semantify.py @@ -1630,7 +1630,8 @@ def mapping_parser(mapping_file): ?obj_dump void:dataDump ?object_dump.} } OPTIONAL { - ?_source a d2rq:Database; + ?_source rml:source ?db . + ?db a d2rq:Database; d2rq:jdbcDSN ?jdbcDSN; d2rq:jdbcDriver ?jdbcDriver; d2rq:username ?user; @@ -1751,7 +1752,7 @@ def mapping_parser(mapping_file): OPTIONAL { ?_object_map rr:joinCondition ?join_condition . ?join_condition rr:child ?child_value; - rr:parent ?parent_value; + rr:parent ?parent_value. } } OPTIONAL { @@ -1776,7 +1777,8 @@ def mapping_parser(mapping_file): ?_graph_structure rr:template ?predicate_object_graph . } } OPTIONAL { - ?_source a d2rq:Database; + ?_source rml:source ?db . + ?db a d2rq:Database; d2rq:jdbcDSN ?jdbcDSN; d2rq:jdbcDriver ?jdbcDriver; d2rq:username ?user; @@ -1840,6 +1842,7 @@ def mapping_parser(mapping_file): mapping_query_results = mapping_graph.query(mapping_query) for result_triples_map in mapping_query_results: + print(result_triples_map) triples_map_exists = False for triples_map in triples_map_list: triples_map_exists = triples_map_exists or ( @@ -1995,7 +1998,7 @@ def mapping_parser(mapping_file): result_predicate_object_map.datatype_value, "None") elif result_predicate_object_map.object_parent_triples_map != None: if predicate_map.value + " " + str(result_predicate_object_map.object_parent_triples_map) not in join_predicate: - if (result_predicate_object_map.child_function is None) and (result_predicate_object_map.parent_function is not None): + if (result_predicate_object_map.child_function is not None) and (result_predicate_object_map.parent_function is not None): join_predicate[ predicate_map.value + " " + str(result_predicate_object_map.object_parent_triples_map)] = { "predicate":predicate_map, @@ -2009,7 +2012,7 @@ def mapping_parser(mapping_file): "childs":[str(result_predicate_object_map.child_function)], "parents":[str(result_predicate_object_map.parent_value)], "triples_map":str(result_predicate_object_map.object_parent_triples_map)} - elif (result_predicate_object_map.child_function is not None) and (result_predicate_object_map.parent_function is not None): + elif (result_predicate_object_map.child_function is None) and (result_predicate_object_map.parent_function is not None): join_predicate[ predicate_map.value + " " + str(result_predicate_object_map.object_parent_triples_map)] = { "predicate":predicate_map,