diff --git a/bootstrap.py b/bootstrap.py index 25d4139..3618207 100644 --- a/bootstrap.py +++ b/bootstrap.py @@ -45,6 +45,9 @@ async def bootstrap(): di["TEMP_DIR"] = di["APP_DIR"] / "temp" + if not di["TEMP_DIR"].exists(): + di["TEMP_DIR"].mkdir() + # Detecting system and architecture for later use di["SYSTEM"] = platform.system() di["ARCH"] = platform.machine() diff --git a/pyproject.toml b/pyproject.toml index 637dac7..5554fa2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ dependencies = [ "python-multipart==0.0.12", "pytz==2024.2", "pywebview==5.3.1", - "PyYAML==6.0.2", + "pyyaml==6.0.2", "rdflib==7.1.0", "rich==13.9.2", "setuptools==70.3.0", diff --git a/server/const/err_enums.py b/server/const/err_enums.py index e63bb3d..2343c79 100644 --- a/server/const/err_enums.py +++ b/server/const/err_enums.py @@ -37,3 +37,11 @@ class ErrCodes(Enum): # Mapping Service MAPPING_NOT_FOUND = 120 MAPPING_ILLEGAL_UPDATE_OPERATION = 121 + + # YARRRML Service + + MAPPING_EDGE_ID_NOT_FOUND = 140 + MAPPING_NODE_ID_NOT_FOUND = 141 + ENTITY_URI_PATTERN_NOT_FOUND = 142 + LITERAL_VALUE_NOT_FOUND = 143 + URIREF_URI_PATTERN_NOT_FOUND = 144 \ No newline at end of file diff --git a/server/facades/workspace/mapping/mapping_to_yarrrml_facade.py b/server/facades/workspace/mapping/mapping_to_yarrrml_facade.py new file mode 100644 index 0000000..489d5f1 --- /dev/null +++ b/server/facades/workspace/mapping/mapping_to_yarrrml_facade.py @@ -0,0 +1,109 @@ +from kink import inject + +from server.exceptions import ErrCodes +from server.facades import ( + BaseFacade, + FacadeResponse, + ServerException, +) +from server.service_protocols.mapping_service_protocol import ( + MappingServiceProtocol, +) +from server.service_protocols.mapping_to_yarrrml_service_protocol import ( + FSServiceProtocol, + MappingToYARRRMLServiceProtocol, +) +from server.services.core.workspace_metadata_service import ( + WorkspaceMetadataServiceProtocol, +) +from server.services.local.local_source_service import ( + SourceServiceProtocol, +) +from server.services.local.local_workspace_service import ( + WorkspaceServiceProtocol, +) + + +@inject +class MappingToYARRRMLFacade(BaseFacade): + def __init__( + self, + workspace_metadata_service: WorkspaceMetadataServiceProtocol, + workspace_service: WorkspaceServiceProtocol, + mapping_service: MappingServiceProtocol, + source_service: SourceServiceProtocol, + yarrrml_service: MappingToYARRRMLServiceProtocol, + fs_service: FSServiceProtocol, + ): + super().__init__() + self.workspace_metadata_service: WorkspaceMetadataServiceProtocol = workspace_metadata_service + self.workspace_service: WorkspaceServiceProtocol = ( + workspace_service + ) + self.mapping_service: MappingServiceProtocol = ( + mapping_service + ) + self.source_service: SourceServiceProtocol = ( + source_service + ) + self.yarrrml_service: MappingToYARRRMLServiceProtocol = yarrrml_service + self.fs_service: FSServiceProtocol = fs_service + + @BaseFacade.error_wrapper + def execute( + self, + workspace_id: str, + mapping_id: str | None = None, + ) -> FacadeResponse: + self.logger.info( + f"Creating YARRRML mapping for mapping {mapping_id} in workspace {workspace_id}" + ) + + self.logger.info("Retrieving workspace metadata") + + workspace_metadata = self.workspace_metadata_service.get_workspace_metadata( + workspace_id, + ) + + self.logger.info("Retrieving workspace") + + workspace = self.workspace_service.get_workspace( + workspace_metadata.location, + ) + + self.logger.info("Retrieving mapping") + + if mapping_id not in workspace.mappings: + self.logger.error( + f"Mapping {mapping_id} not found in workspace {workspace_id}" + ) + raise ServerException( + f"Mapping {mapping_id} not found in workspace {workspace_id}", + ErrCodes.MAPPING_NOT_FOUND, + ) + + mapping = self.mapping_service.get_mapping( + mapping_id + ) + + self.logger.info("Retrieving source") + + source = self.source_service.get_source( + mapping.source_id, + ) + + self.logger.info("Converting mapping to YARRRML") + + yarrrml = ( + self.yarrrml_service.convert_mapping_to_yarrrml( + workspace.prefixes, + source, + mapping, + self.fs_service, + ) + ) + + return self._success_response( + data=yarrrml, + message="YARRRML mapping created", + ) diff --git a/server/routers/workspaces/workspaces.py b/server/routers/workspaces/workspaces.py index 59d1dbc..a101e1c 100644 --- a/server/routers/workspaces/workspaces.py +++ b/server/routers/workspaces/workspaces.py @@ -4,6 +4,7 @@ from fastapi.params import Depends from fastapi.routing import APIRouter from kink.container import di +from starlette.routing import PlainTextResponse from server.facades import FacadeResponse from server.facades.workspace.create_workspace_facade import ( @@ -24,6 +25,9 @@ from server.facades.workspace.mapping.get_mappings_in_workspace_facade import ( GetMappingsInWorkspaceFacade, ) +from server.facades.workspace.mapping.mapping_to_yarrrml_facade import ( + MappingToYARRRMLFacade, +) from server.facades.workspace.mapping.update_mapping_facade import ( UpdateMappingFacade, ) @@ -124,6 +128,11 @@ Depends(lambda: di[UpdateMappingFacade]), ] +MappingToYARRRMLDep = Annotated[ + MappingToYARRRMLFacade, + Depends(lambda: di[MappingToYARRRMLFacade]), +] + @router.get("/") async def get_workspaces( @@ -478,3 +487,29 @@ async def update_mapping( status_code=facade_response.status, detail=facade_response.to_dict(), ) + + +@router.get( + "/{workspace_id}/mapping/{mapping_id}/yarrrml", + response_class=PlainTextResponse, +) +async def mapping_to_yarrrml( + workspace_id: str, + mapping_id: str, + mapping_to_yarrrml_facade: MappingToYARRRMLDep, +) -> str: + facade_response = mapping_to_yarrrml_facade.execute( + workspace_id=workspace_id, + mapping_id=mapping_id, + ) + + if ( + facade_response.status // 100 == 2 + and facade_response.data + ): + return facade_response.data + + raise HTTPException( + status_code=facade_response.status, + detail=facade_response.to_dict(), + ) diff --git a/server/service_protocols/fs_service_protocol/__init__.py b/server/service_protocols/fs_service_protocol/__init__.py index 3c9a719..ff5b4f5 100644 --- a/server/service_protocols/fs_service_protocol/__init__.py +++ b/server/service_protocols/fs_service_protocol/__init__.py @@ -1,4 +1,5 @@ from abc import ABC, abstractmethod +from pathlib import Path from server.models.file_metadata import ( FileMetadata, @@ -50,3 +51,16 @@ def download_file_with_uuid(self, uuid: str) -> bytes: bytes: content of the file """ ... + + @abstractmethod + def provide_file_path_of_uuid(self, uuid: str) -> Path: + """ + Provide the path of a file with UUID. If file does not exist locally, implementation should first download it. + + Args: + uuid (str): UUID of the file + + Returns: + pathlib.Path: path of the file + """ + diff --git a/server/service_protocols/mapping_to_yarrrml_service_protocol/__init__.py b/server/service_protocols/mapping_to_yarrrml_service_protocol/__init__.py new file mode 100644 index 0000000..5485ee2 --- /dev/null +++ b/server/service_protocols/mapping_to_yarrrml_service_protocol/__init__.py @@ -0,0 +1,30 @@ +from abc import ABC, abstractmethod + +from server.models.mapping import MappingGraph +from server.models.source import Source +from server.service_protocols.fs_service_protocol import ( + FSServiceProtocol, +) + + +class MappingToYARRRMLServiceProtocol(ABC): + @abstractmethod + def convert_mapping_to_yarrrml( + self, + prefixes: dict[str, str], + source: Source, + mapping: MappingGraph, + fs_service: FSServiceProtocol, + ) -> str: + """ + Convert a mapping to YARRRML + + Args: + prefixes (dict): A dictionary of prefixes + source (Source): Source data of the mapping + mapping (MappingGraph): Mapping data + + Returns: + str: Valid Yaml string representing the YARRRML mapping + """ + pass diff --git a/server/services/__init__.py b/server/services/__init__.py index e3891a8..15e1154 100644 --- a/server/services/__init__.py +++ b/server/services/__init__.py @@ -1,6 +1,9 @@ from server.services.core.config_service import ( ConfigService, ) +from server.services.core.mapping_to_yarrrml_service import ( + MappingToYARRRMLService, +) from server.services.core.sqlite_db_service import DBService from server.services.core.workspace_metadata_service import ( WorkspaceMetadataService, @@ -30,4 +33,5 @@ "LocalOntologyService", "LocalSourceService", "LocalMappingService", + "MappingToYARRRMLService", ] diff --git a/server/services/core/mapping_to_yarrrml_service.py b/server/services/core/mapping_to_yarrrml_service.py new file mode 100644 index 0000000..c011da3 --- /dev/null +++ b/server/services/core/mapping_to_yarrrml_service.py @@ -0,0 +1,222 @@ +import datetime +from pathlib import Path +from typing import cast + +import yaml +from kink import inject + +from server.exceptions import ErrCodes, ServerException +from server.models.mapping import ( + MappingEdge, + MappingGraph, + MappingLiteral, + MappingNode, + MappingNodeType, + MappingURIRef, +) +from server.models.source import Source, SourceType +from server.service_protocols.mapping_to_yarrrml_service_protocol import ( + FSServiceProtocol, + MappingToYARRRMLServiceProtocol, +) + + +@inject(alias=MappingToYARRRMLServiceProtocol) +class MappingToYARRRMLService( + MappingToYARRRMLServiceProtocol +): + def __init__(self, TEMP_DIR: Path) -> None: + self.temp_dir = TEMP_DIR + + def convert_mapping_to_yarrrml( + self, + prefixes: dict[str, str], + source: Source, + mapping: MappingGraph, + fs_service: FSServiceProtocol, # Implementation might change depending the environment (local, cloud, etc) + ) -> str: + yarrrml_dict: dict = { + "prefixes": prefixes, + } + + source_dict: dict = {} + + match source.type: + case SourceType.CSV: + source_dict["data"] = { + "access": str( + fs_service.provide_file_path_of_uuid( + source.uuid + ).absolute() + ), + "referenceFormulation": "csv", + } + + case SourceType.JSON: + source_dict["data"] = { + "access": str( + fs_service.provide_file_path_of_uuid( + source.uuid + ).absolute() + ), + "referenceFormulation": "json", + "iterator": source.extra["json_path"], + } + + yarrrml_dict["sources"] = source_dict + + # Mappings + + mappings: dict = {} + + entities: list[MappingNode] = [ + cast(MappingNode, node) + for node in mapping.nodes + if node.type == MappingNodeType.ENTITY + ] + + for entity in entities: + if entity.uri_pattern == "": + raise ServerException( + f"Entity {entity.label} has no URI pattern", + code=ErrCodes.ENTITY_URI_PATTERN_NOT_FOUND, + ) + entity_dict: dict = { + "source": "data", + "s": entity.uri_pattern, + } + po: list[dict | list] = [ + { + "predicate": "http://www.w3.org/1999/02/22-rdf-syntax-ns#label", + "object": entity.label, + } + ] + + for rdf_type in entity.rdf_type: + po.append( + { + "predicate": "a", + "object": rdf_type, + "type": "iri", + } + ) + + outgoing_edges_target_nodes: list[ + tuple[ + MappingEdge, + MappingNode + | MappingLiteral + | MappingURIRef, + ] + ] = self._get_outgoing_edges(entity, mapping) + + for ( + edge, + target_node, + ) in outgoing_edges_target_nodes: + if isinstance(target_node, MappingLiteral): + if target_node.value == "": + raise ServerException( + f"Literal with id {target_node.id} has no value", + code=ErrCodes.LITERAL_VALUE_NOT_FOUND, + ) + po.append( + { + "predicate": edge.source_handle, + "object": { + "value": target_node.value, + "datatype": target_node.literal_type, + }, + } + ) + elif isinstance(target_node, MappingURIRef): + if target_node.uri_pattern == "": + raise ServerException( + f"URIRef with id {target_node.id} has no URI pattern", + code=ErrCodes.URIREF_URI_PATTERN_NOT_FOUND, + ) + po.append( + { + "predicate": edge.source_handle, + "object": { + "value": target_node.uri_pattern, + "type": "iri", + }, + } + ) + elif isinstance(target_node, MappingNode): + if target_node.uri_pattern == "": + raise ServerException( + f"Node with id {target_node.id} has no URI pattern", + code=ErrCodes.ENTITY_URI_PATTERN_NOT_FOUND, + ) + po.append( + { + "predicate": edge.source_handle, + "object": { + "value": target_node.uri_pattern, + "type": "iri", + }, + } + ) + + entity_dict["po"] = po + + mappings[entity.id] = entity_dict + + yarrrml_dict["mappings"] = mappings + + yaml_str = yaml.dump( + yarrrml_dict, + sort_keys=False, + default_flow_style=False, + ) + + # Write the YARRRML to a temporary file + + temp_file_path: Path = ( + self.temp_dir + / f"yarrrml-{mapping.name}-{datetime.datetime.now().isoformat()}.yml" + ) + + temp_file_path.touch() + + temp_file_path.write_text(yaml_str) + + return yaml_str + + def _get_outgoing_edges( + self, node: MappingNode, mapping: MappingGraph + ) -> list[ + tuple[ + MappingEdge, + MappingNode | MappingLiteral | MappingURIRef, + ] + ]: + outgoing_edges: list[ + tuple[ + MappingEdge, + MappingNode + | MappingLiteral + | MappingURIRef, + ] + ] = [] + + for edge in mapping.edges: + if edge.source == node.id: + target_node_iter = filter( + lambda n, e=edge: n.id == e.target, + mapping.nodes, + ) + target_node = next(target_node_iter) + if target_node is not None: + outgoing_edges.append( + (edge, target_node) + ) + else: + raise ServerException( + f"Target node with id {edge.target} not found", + code=ErrCodes.MAPPING_EDGE_ID_NOT_FOUND, + ) + + return outgoing_edges diff --git a/server/services/local/local_fs_service.py b/server/services/local/local_fs_service.py index a7c8c3a..da450a3 100644 --- a/server/services/local/local_fs_service.py +++ b/server/services/local/local_fs_service.py @@ -154,5 +154,8 @@ def download_file_with_uuid(self, uuid: str) -> bytes: return file_path.read_bytes() + def provide_file_path_of_uuid(self, uuid: str) -> Path: + return self._FILE_DIR / uuid + __all__ = ["LocalFSService"] diff --git a/test/services/core/mapping_to_yarrrml_service_test.py b/test/services/core/mapping_to_yarrrml_service_test.py new file mode 100644 index 0000000..ebb401f --- /dev/null +++ b/test/services/core/mapping_to_yarrrml_service_test.py @@ -0,0 +1,28 @@ +# import unittest +# from unittest.mock import MagicMock + +# from server.models.mapping import MappingGraph +# from server.services.core.mapping_to_yarrrml_service import ( +# MappingToYARRRMLService, +# ) + + +# class TestMappingToYARRRMLService(unittest.TestCase): +# def setUp(self): +# self.service = MappingToYARRRMLService() +# self.prefixes = {"ex": "http://example.com/"} +# self.mapping = MagicMock(spec=MappingGraph) + +# def test_convert_mapping_to_yarrrml(self): +# expected_output = ( +# "prefixes:\n ex: http://example.com/\n" +# ) +# result = self.service.convert_mapping_to_yarrrml( +# self.prefixes, self.mapping +# ) +# print(f"result: {result}") +# self.assertEqual(result, expected_output) + + +# if __name__ == "__main__": +# unittest.main()