Fixing parser query and format organization

SDM-TIB · Jul 19, 2024 · 5969312 · 5969312
1 parent 9ebc1ac
commit 5969312
Show file tree

Hide file tree

Showing 5 changed files with 66 additions and 59 deletions.
diff --git a/README.md b/README.md
@@ -84,7 +84,7 @@ You can easily customize your own configurations from the set of features that S
 
 ## Version 
 ```
-4.7.4.6
+4.7.4.7
 ```
 
 ## RML-Test Cases

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-4.7.4.6
+4.7.4.7
diff --git a/rdfizer/rdfizer/__init__.py b/rdfizer/rdfizer/__init__.py
@@ -1630,7 +1630,8 @@ def mapping_parser(mapping_file):
                         ?obj_dump void:dataDump ?object_dump.}  
             }
             OPTIONAL {
-                ?_source a d2rq:Database;
+                ?_source rml:source ?db .
+                ?db a d2rq:Database;
                 d2rq:jdbcDSN ?jdbcDSN; 
                 d2rq:jdbcDriver ?jdbcDriver; 
                 d2rq:username ?user;
@@ -1751,7 +1752,7 @@ def mapping_parser(mapping_file):
                     OPTIONAL {
                         ?_object_map rr:joinCondition ?join_condition .
                         ?join_condition rr:child ?child_value;
-                                     rr:parent ?parent_value;
+                                     rr:parent ?parent_value.
                     }
                 }
                 OPTIONAL {
@@ -1776,7 +1777,8 @@ def mapping_parser(mapping_file):
                            ?_graph_structure rr:template ?predicate_object_graph  . }   
                 }
                 OPTIONAL {
-                    ?_source a d2rq:Database;
+                    ?_source rml:source ?db .
+                    ?db a d2rq:Database;
                     d2rq:jdbcDSN ?jdbcDSN; 
                     d2rq:jdbcDriver ?jdbcDriver; 
                     d2rq:username ?user;
@@ -1840,6 +1842,7 @@ def mapping_parser(mapping_file):
 
     mapping_query_results = mapping_graph.query(mapping_query)
     for result_triples_map in mapping_query_results:
+        print(result_triples_map)
         triples_map_exists = False
         for triples_map in triples_map_list:
             triples_map_exists = triples_map_exists or (
@@ -1995,7 +1998,7 @@ def mapping_parser(mapping_file):
                                               result_predicate_object_map.datatype_value, "None")
                 elif result_predicate_object_map.object_parent_triples_map != None:
                     if predicate_map.value + " " + str(result_predicate_object_map.object_parent_triples_map) not in join_predicate:
-                        if (result_predicate_object_map.child_function is None) and (result_predicate_object_map.parent_function is not None):
+                        if (result_predicate_object_map.child_function is not None) and (result_predicate_object_map.parent_function is not None):
                             join_predicate[
                                 predicate_map.value + " " + str(result_predicate_object_map.object_parent_triples_map)] = {
                                 "predicate":predicate_map, 
@@ -2009,7 +2012,7 @@ def mapping_parser(mapping_file):
                                 "childs":[str(result_predicate_object_map.child_function)], 
                                 "parents":[str(result_predicate_object_map.parent_value)], 
                                 "triples_map":str(result_predicate_object_map.object_parent_triples_map)}
-                        elif (result_predicate_object_map.child_function is not None) and (result_predicate_object_map.parent_function is not None):
+                        elif (result_predicate_object_map.child_function is None) and (result_predicate_object_map.parent_function is not None):
                             join_predicate[
                                 predicate_map.value + " " + str(result_predicate_object_map.object_parent_triples_map)] = {
                                 "predicate":predicate_map, 

diff --git a/rdfizer/rdfizer/functions.py b/rdfizer/rdfizer/functions.py
@@ -709,72 +709,73 @@ def files_sort(triples_map_list, ordered, config):
 						else:
 							source_predicate["XPath"][str(tp.data_source)] = {po.predicate_map.value : ""}
 		else:
-			if "SPARQL" in tp.file_format:
-				if "csv" not in sorted_list:
-					if ".nt" in str(tp.data_source):
-						sorted_list["csv"] = {str(tp.data_source) : {tp.triples_map_id : tp}}
-					else:
-						sorted_list["csv"] = {"endpoint:" + str(tp.data_source) : {tp.triples_map_id : tp}}
-				else:
-					if ".nt" in str(tp.data_source):
-						if str(tp.data_source) in sorted_list["csv"]:
-							sorted_list["csv"][str(tp.data_source)][tp.triples_map_id] = tp
+			if tp.file_format != None:
+				if "SPARQL" in tp.file_format:
+					if "csv" not in sorted_list:
+						if ".nt" in str(tp.data_source):
+							sorted_list["csv"] = {str(tp.data_source) : {tp.triples_map_id : tp}}
 						else:
-							sorted_list["csv"][str(tp.data_source)] = {tp.triples_map_id : tp}
+							sorted_list["csv"] = {"endpoint:" + str(tp.data_source) : {tp.triples_map_id : tp}}
 					else:
-						if "endpoint:" + str(tp.data_source) in sorted_list["csv"]:
-							sorted_list["csv"]["endpoint:" + str(tp.data_source)][tp.triples_map_id] = tp
-						else:
-							sorted_list["csv"]["endpoint:" + str(tp.data_source)] = {tp.triples_map_id : tp}
-				for po in tp.predicate_object_maps_list:
-					if po.predicate_map.value in general_predicates:
-						predicate = po.predicate_map.value + "_" + po.object_map.value
-						if predicate in predicate_list:
-							predicate_list[predicate] += 1
-						else:
-							predicate_list[predicate] = 1
-					else:
-						if po.predicate_map.value in predicate_list:
-							predicate_list[po.predicate_map.value] += 1
+						if ".nt" in str(tp.data_source):
+							if str(tp.data_source) in sorted_list["csv"]:
+								sorted_list["csv"][str(tp.data_source)][tp.triples_map_id] = tp
+							else:
+								sorted_list["csv"][str(tp.data_source)] = {tp.triples_map_id : tp}
 						else:
-							predicate_list[po.predicate_map.value] = 1
-					if "csv" not in source_predicate:
+							if "endpoint:" + str(tp.data_source) in sorted_list["csv"]:
+								sorted_list["csv"]["endpoint:" + str(tp.data_source)][tp.triples_map_id] = tp
+							else:
+								sorted_list["csv"]["endpoint:" + str(tp.data_source)] = {tp.triples_map_id : tp}
+					for po in tp.predicate_object_maps_list:
 						if po.predicate_map.value in general_predicates:
 							predicate = po.predicate_map.value + "_" + po.object_map.value
-							if ".nt" in str(tp.data_source):
-								source_predicate["csv"] = {str(tp.data_source) : {predicate : ""}}
+							if predicate in predicate_list:
+								predicate_list[predicate] += 1
 							else:
-								source_predicate["csv"] = {"endpoint:" + str(tp.data_source) : {predicate : ""}}
+								predicate_list[predicate] = 1
 						else:
-							if ".nt" in str(tp.data_source):
-								source_predicate["csv"] = {str(tp.data_source) : {po.predicate_map.value : ""}}
+							if po.predicate_map.value in predicate_list:
+								predicate_list[po.predicate_map.value] += 1
 							else:
-								source_predicate["csv"] = {"endpoint:" + str(tp.data_source) : {po.predicate_map.value : ""}}
-					else:
-						if str(tp.data_source) in source_predicate["csv"]:
+								predicate_list[po.predicate_map.value] = 1
+						if "csv" not in source_predicate:
 							if po.predicate_map.value in general_predicates:
 								predicate = po.predicate_map.value + "_" + po.object_map.value
 								if ".nt" in str(tp.data_source):
-									source_predicate["csv"][str(tp.data_source)][predicate] = ""
+									source_predicate["csv"] = {str(tp.data_source) : {predicate : ""}}
 								else:
-									source_predicate["csv"]["endpoint:" + str(tp.data_source)][predicate] = ""
+									source_predicate["csv"] = {"endpoint:" + str(tp.data_source) : {predicate : ""}}
 							else:
 								if ".nt" in str(tp.data_source):
-									source_predicate["csv"][str(tp.data_source)][po.predicate_map.value] = ""
+									source_predicate["csv"] = {str(tp.data_source) : {po.predicate_map.value : ""}}
 								else:
-									source_predicate["csv"]["endpoint:" + str(tp.data_source)][po.predicate_map.value] = ""
+									source_predicate["csv"] = {"endpoint:" + str(tp.data_source) : {po.predicate_map.value : ""}}
 						else:
-							if po.predicate_map.value in general_predicates:
-								predicate = po.predicate_map.value + "_" + po.object_map.value
-								if ".nt" in str(tp.data_source):
-									source_predicate["csv"][str(tp.data_source)] = {predicate : ""}
+							if str(tp.data_source) in source_predicate["csv"]:
+								if po.predicate_map.value in general_predicates:
+									predicate = po.predicate_map.value + "_" + po.object_map.value
+									if ".nt" in str(tp.data_source):
+										source_predicate["csv"][str(tp.data_source)][predicate] = ""
+									else:
+										source_predicate["csv"]["endpoint:" + str(tp.data_source)][predicate] = ""
 								else:
-									source_predicate["csv"]["endpoint:" + str(tp.data_source)] = {predicate : ""}
+									if ".nt" in str(tp.data_source):
+										source_predicate["csv"][str(tp.data_source)][po.predicate_map.value] = ""
+									else:
+										source_predicate["csv"]["endpoint:" + str(tp.data_source)][po.predicate_map.value] = ""
 							else:
-								if ".nt" in str(tp.data_source):
-									source_predicate["csv"][str(tp.data_source)] = {po.predicate_map.value : ""}
+								if po.predicate_map.value in general_predicates:
+									predicate = po.predicate_map.value + "_" + po.object_map.value
+									if ".nt" in str(tp.data_source):
+										source_predicate["csv"][str(tp.data_source)] = {predicate : ""}
+									else:
+										source_predicate["csv"]["endpoint:" + str(tp.data_source)] = {predicate : ""}
 								else:
-									source_predicate["csv"]["endpoint:" + str(tp.data_source)] = {po.predicate_map.value : ""}
+									if ".nt" in str(tp.data_source):
+										source_predicate["csv"][str(tp.data_source)] = {po.predicate_map.value : ""}
+									else:
+										source_predicate["csv"]["endpoint:" + str(tp.data_source)] = {po.predicate_map.value : ""}
 			else:
 				if tp.query == "None":
 					if tp.iterator == "None":

diff --git a/rdfizer/rdfizer/semantify.py b/rdfizer/rdfizer/semantify.py
@@ -1630,7 +1630,8 @@ def mapping_parser(mapping_file):
                         ?obj_dump void:dataDump ?object_dump.}  
             }
             OPTIONAL {
-                ?_source a d2rq:Database;
+                ?_source rml:source ?db .
+                ?db a d2rq:Database;
                 d2rq:jdbcDSN ?jdbcDSN; 
                 d2rq:jdbcDriver ?jdbcDriver; 
                 d2rq:username ?user;
@@ -1751,7 +1752,7 @@ def mapping_parser(mapping_file):
                     OPTIONAL {
                         ?_object_map rr:joinCondition ?join_condition .
                         ?join_condition rr:child ?child_value;
-                                     rr:parent ?parent_value;
+                                     rr:parent ?parent_value.
                     }
                 }
     			OPTIONAL {
@@ -1776,7 +1777,8 @@ def mapping_parser(mapping_file):
     					   ?_graph_structure rr:template ?predicate_object_graph  . }	
     			}
     			OPTIONAL {
-    				?_source a d2rq:Database;
+                    ?_source rml:source ?db .
+    				?db a d2rq:Database;
       				d2rq:jdbcDSN ?jdbcDSN; 
       				d2rq:jdbcDriver ?jdbcDriver; 
     			    d2rq:username ?user;
@@ -1840,6 +1842,7 @@ def mapping_parser(mapping_file):
 
     mapping_query_results = mapping_graph.query(mapping_query)
     for result_triples_map in mapping_query_results:
+        print(result_triples_map)
         triples_map_exists = False
         for triples_map in triples_map_list:
             triples_map_exists = triples_map_exists or (
@@ -1995,7 +1998,7 @@ def mapping_parser(mapping_file):
                                               result_predicate_object_map.datatype_value, "None")
                 elif result_predicate_object_map.object_parent_triples_map != None:
                     if predicate_map.value + " " + str(result_predicate_object_map.object_parent_triples_map) not in join_predicate:
-                        if (result_predicate_object_map.child_function is None) and (result_predicate_object_map.parent_function is not None):
+                        if (result_predicate_object_map.child_function is not None) and (result_predicate_object_map.parent_function is not None):
                             join_predicate[
                                 predicate_map.value + " " + str(result_predicate_object_map.object_parent_triples_map)] = {
                                 "predicate":predicate_map, 
@@ -2009,7 +2012,7 @@ def mapping_parser(mapping_file):
                                 "childs":[str(result_predicate_object_map.child_function)], 
                                 "parents":[str(result_predicate_object_map.parent_value)], 
                                 "triples_map":str(result_predicate_object_map.object_parent_triples_map)}
-                        elif (result_predicate_object_map.child_function is not None) and (result_predicate_object_map.parent_function is not None):
+                        elif (result_predicate_object_map.child_function is None) and (result_predicate_object_map.parent_function is not None):
                             join_predicate[
                                 predicate_map.value + " " + str(result_predicate_object_map.object_parent_triples_map)] = {
                                 "predicate":predicate_map,
-Original file line number
+Diff line change
@@ Expand Up @@
     ## Version
     ```
-.7.4.6
+.7.4.7
     ```
     ## RML-Test Cases
@@ Expand Down @@