Skip to content

Commit

Permalink
deleted unused variable; RDF graph identifier handling and root node …
Browse files Browse the repository at this point in the history
…identification improved, ro-crates should now be properly handled
  • Loading branch information
huberrob committed Oct 13, 2023
1 parent 03ba2ad commit 6a10273
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 7 deletions.
23 changes: 17 additions & 6 deletions fuji_server/helper/metadata_collector_rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def __init__(self, loggerinst, target_url=None, source=None, json_ld_content=Non
super().__init__(logger=loggerinst)

self.target_url = target_url
self.resolved_url = target_url
self.content_type = None
self.source_name = source
self.metadata_format = MetadataFormats.RDF
Expand Down Expand Up @@ -235,6 +236,7 @@ def parse_metadata(self):
self.logger.info("FsF-F2-01M : Ignoring RDF since content already has been parsed as XML")
if requestHelper.response_content is not None:
self.content_type = requestHelper.content_type
self.resolved_url = requestHelper.redirect_url
else:
self.content_type = "application/ld+json"
rdf_response = self.json_ld_content
Expand All @@ -243,7 +245,7 @@ def parse_metadata(self):
# handle JSON-LD
if self.content_type in ["application/ld+json", "application/json", "application/vnd.schemaorg.ld+json"]:
if self.target_url:
jsonld_source_url = self.target_url
jsonld_source_url = self.resolved_url
else:
jsonld_source_url = "landing page"
if self.json_ld_content:
Expand Down Expand Up @@ -341,8 +343,10 @@ def parse_metadata(self):
% (jsonld_source_url)
)
try:
jsonldgraph = rdflib.ConjunctiveGraph()
rdf_response_graph = jsonldgraph.parse(data=rdf_response, format="json-ld")
jsonldgraph = rdflib.ConjunctiveGraph(identifier=self.resolved_url)
rdf_response_graph = jsonldgraph.parse(
data=rdf_response, format="json-ld", publicID=self.resolved_url
)
# rdf_response_graph = jsonldgraph
self.setLinkedNamespaces(self.getAllURIS(jsonldgraph))
except Exception as e:
Expand Down Expand Up @@ -386,7 +390,7 @@ def parse_metadata(self):
badline = None
while not RDFparsed:
try:
graph = rdflib.Graph(identifier=self.target_url)
graph = rdflib.Graph(identifier=self.resolved_url)
graph.parse(data=rdf_response, format=parse_format)
rdf_response_graph = graph
self.setLinkedNamespaces(self.getAllURIS(rdf_response_graph))
Expand Down Expand Up @@ -901,15 +905,22 @@ def get_schemaorg_metadata_from_graph(self, graph):
root_name = str(root).rsplit("/")[-1].strip()
if root_name.lower() in creative_work_types:
creative_works = list(graph[: RDF.type : root])
# print(root, type(creative_works[0]), list(graph.subjects(object=creative_works[0])))
# Finding the schema.org root
if len(list(graph.subjects(object=creative_works[0]))) == 0:
creative_work_subjects = list(graph.subjects(object=creative_works[0]))
if len(creative_work_subjects) == 0:
cand_creative_work[root_name] = creative_works[0]
if object_types_dict.get(str(creative_works[0])):
object_types_dict[str(creative_works[0])].append(root_name)
else:
object_types_dict[str(creative_works[0])] = [root_name]
# prioritize Dataset type
# root in case graph id = subject id, assuming this means: isabout
# helps for ro crate
elif graph.identifier in creative_work_subjects:
cand_creative_work[root_name] = creative_works[0]

if cand_creative_work:
# prioritize Dataset type
if "Dataset" in cand_creative_work:
creative_work = cand_creative_work["Dataset"]
else:
Expand Down
1 change: 0 additions & 1 deletion fuji_server/helper/request_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ def __init__(self, url, logInst: object = None):
self.format = None # Guessed Metadata Format
self.request_url = url
self.redirect_url = None
self.resolved_url = None
self.redirect_list = []
self.redirect_status_list = []
self.accept_type = AcceptTypes.default.value
Expand Down

0 comments on commit 6a10273

Please sign in to comment.