Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for upload to authenticated endpoints #2

Merged
merged 1 commit into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 46 additions & 16 deletions prezmanifest/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import argparse
import sys
from getpass import getpass
from pathlib import Path

from kurra.format import make_dataset, export_quads
Expand All @@ -23,6 +24,7 @@
from rdflib import Graph, URIRef, Dataset
from typing import Literal as TLiteral
import logging
import httpx

try:
from prezmanifest import MRR, OLIS, validate, __version__
Expand All @@ -36,10 +38,12 @@


def load(
manifest: Path,
sparql_endpoint: str = None,
destination_file: Path = None,
return_data_type: TLiteral["Graph", "Dataset", None] = None
manifest: Path,
sparql_endpoint: str = None,
sparql_username: str = None,
sparql_password: str = None,
destination_file: Path = None,
return_data_type: TLiteral["Graph", "Dataset", None] = None
) -> None | Graph | Dataset:
"""Loads a catalogue of data from a prezmanifest file, whose content are valid according to the Prez Manifest Model
(https://kurrawong.github.io/prez.dev/manifest/) either into a specified quads file in the Trig format, or into a
Expand All @@ -53,7 +57,24 @@ def load(

return_data_value_error_message = "return_data_type was set to an invalid value. Must be one of Dataset or Graph or None"

def _export(data: Graph | Dataset, iri, sparql_endpoint, destination_file, return_data_type, append=False):
# establish a reusable client for http requests
# also allows for basic authentication to be used.
if sparql_endpoint:
auth = None
if sparql_username:
if not sparql_password:
if not sys.stdin.isatty():
# if not possible to prompt for a password
raise ValueError(
"A password must be given if a sparql username is set")
sparql_password = getpass()
auth = httpx.BasicAuth(sparql_username, sparql_password)
client = httpx.Client(base_url=sparql_endpoint, auth=auth)
else:
client = None

def _export(data: Graph | Dataset, iri, client: httpx.Client | None,
destination_file, return_data_type, append=False):
if type(data) is Dataset:
if iri is not None:
raise ValueError("If the data is a Dataset, the parameter iri must be None")
Expand All @@ -63,7 +84,7 @@ def _export(data: Graph | Dataset, iri, sparql_endpoint, destination_file, retur
elif sparql_endpoint is not None:
for g in data.graphs():
if g.identifier != URIRef("urn:x-rdflib:default"):
_export(g, g.identifier, sparql_endpoint, None, None)
_export(data=g, iri=g.identifier, client=client, destination_file=None, return_data_type=None)
else:
if return_data_type == "Dataset":
return data
Expand All @@ -85,7 +106,8 @@ def _export(data: Graph | Dataset, iri, sparql_endpoint, destination_file, retur
export_quads(make_dataset(data, iri), destination_file)
elif sparql_endpoint is not None:
msg += f"to SPARQL Endpoint {sparql_endpoint}"
upload(sparql_endpoint, data, iri, append)
upload(url=sparql_endpoint, file_or_str_or_graph=data, graph_name=iri, append=append,
http_client=client)
else: # returning data
if return_data_type == "Dataset":
msg += "to Dataset"
Expand Down Expand Up @@ -138,7 +160,8 @@ def _export(data: Graph | Dataset, iri, sparql_endpoint, destination_file, retur
vg.add((vg_iri, SDO.name, vg_name))

# export the Catalogue data
_export(c, catalogue_iri, sparql_endpoint, destination_file, return_data_type)
_export(data=c, iri=catalogue_iri, client=client, destination_file=destination_file,
return_data_type=return_data_type)

# non-catalogue resources
for s, o in manifest_graph.subject_objects(PROF.hasResource):
Expand All @@ -156,7 +179,8 @@ def _export(data: Graph | Dataset, iri, sparql_endpoint, destination_file, retur
# fg.bind("rdf", RDF)

if role == MRR.ResourceData:
resource_iri = fg.value(predicate=RDF.type, object=SKOS.ConceptScheme) or fg.value(predicate=RDF.type, object=OWL.Ontology)
resource_iri = fg.value(predicate=RDF.type, object=SKOS.ConceptScheme) or fg.value(
predicate=RDF.type, object=OWL.Ontology)

if role in [
MRR.CompleteCatalogueAndResourceLabels,
Expand All @@ -170,18 +194,20 @@ def _export(data: Graph | Dataset, iri, sparql_endpoint, destination_file, retur
vg.add((vg_iri, OLIS.isAliasFor, resource_iri))

# export one Resource
_export(fg, resource_iri, sparql_endpoint, destination_file, return_data_type)
_export(data=fg, iri=resource_iri, client=client, destination_file=destination_file,
return_data_type=return_data_type)
elif str(f.name).endswith(".trig"):
d = Dataset()
d.parse(f)
for g in d.graphs():
if g.identifier != URIRef("urn:x-rdflib:default"):
vg.add((vg_iri, OLIS.isAliasFor, g.identifier))
_export(d, None, sparql_endpoint, destination_file, return_data_type)

_export(data=d, iri=None, client=client, destination_file=destination_file,
return_data_type=return_data_type)

# export the System Graph
_export(vg, OLIS.SystemGraph, sparql_endpoint, destination_file, return_data_type, append=True)
_export(data=vg, iri=OLIS.SystemGraph, client=client, destination_file=destination_file,
return_data_type=return_data_type, append=True)

if return_data_type == "Dataset":
return dataset_holder
Expand Down Expand Up @@ -209,6 +235,8 @@ def setup_cli_parser(args=None):
"--endpoint",
help="The SPARQL endpoint you want to load the data into. Cannot be specified when destination is.",
)
parser.add_argument("-u", "--username", help="(optional) SPARQL endpoint username for Basic Auth")
parser.add_argument("-p", "--password", help="(optional) SPARQL endpoint password for Basic Auth")

group.add_argument(
"-d",
Expand All @@ -232,9 +260,11 @@ def cli(args=None):
args = setup_cli_parser(args)

load(
Path(args.manifest),
args.endpoint if args.endpoint is not None else None,
Path(args.destination) if args.destination is not None else None,
manifest=Path(args.manifest),
sparql_endpoint=args.endpoint,
sparql_username=args.username,
sparql_password=args.password,
destination_file=Path(args.destination) if args.destination is not None else None,
)


Expand Down
29 changes: 28 additions & 1 deletion tests/config.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,31 @@ PREFIX text: <http://jena.apache.org/text#>
text:predicate dc:description
]
);
text:uidField "uid" .
text:uidField "uid" .

:service2 rdf:type fuseki:Service ;
fuseki:allowedUsers "admin" ;
rdfs:label "authenticated ds" ;
fuseki:name "authds" ;
fuseki:dataset :dataset2 ;

fuseki:endpoint [ fuseki:operation fuseki:query ] ;
fuseki:endpoint [ fuseki:operation fuseki:update ] ;
fuseki:endpoint [ fuseki:operation fuseki:gsp-rw ] ;

fuseki:endpoint [ fuseki:name "sparql" ;
fuseki:operation fuseki:query ] ;
fuseki:endpoint [ fuseki:name "query" ;
fuseki:operation fuseki:query ] ;
fuseki:endpoint [ fuseki:name "update" ;
fuseki:operation fuseki:update ] ;
fuseki:endpoint [ fuseki:name "data" ;
fuseki:operation fuseki:gsp-rw ] ;
fuseki:endpoint [ fuseki:name "get" ;
fuseki:operation fuseki:gsp-r ] ;

.

:dataset2 rdf:type tdb2:DatasetTDB2 ;
tdb2:location "/fuseki/databases/authds" ;
.
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def fuseki_container(request: pytest.FixtureRequest):
container = DockerContainer(FUSEKI_IMAGE)
container.with_volume_mapping(
str(Path(__file__).parent / "shiro.ini"), "/fuseki/shiro.ini"
str(Path(__file__).parent / "shiro.ini"), "/opt/fuseki/shiro.ini"
)
container.with_volume_mapping(
str(Path(__file__).parent / "config.ttl"), "/fuseki/config.ttl"
Expand Down
5 changes: 4 additions & 1 deletion tests/shiro.ini
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@ admin = admin
## and the rest are restricted to admin user
/$/** = authcBasic,user[admin]

# Everything else
# authds service is restricted to admin user
/authds/** = authcBasic,user[admin]

# Everything else is open
/**=anon
25 changes: 25 additions & 0 deletions tests/test_loader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import warnings
from pathlib import Path

import httpx
import pytest

from kurra.fuseki import query, upload
Expand All @@ -9,6 +11,7 @@
from prezmanifest import load
except ImportError:
import sys

sys.path.append(str(Path(__file__).parent.parent.resolve()))
from prezmanifest import load

Expand Down Expand Up @@ -128,3 +131,25 @@ def test_load_to_fuseki(fuseki_container):
count = int(r[0]["count"]["value"])

assert count == 5


def test_load_to_fuseki_basic_auth(fuseki_container):
SPARQL_ENDPOINT = f"http://localhost:{fuseki_container.get_exposed_port(3030)}/authds"

manifest = Path(__file__).parent / "demo-vocabs" / "manifest.ttl"
load(manifest, sparql_endpoint=SPARQL_ENDPOINT, sparql_username="admin", sparql_password="admin")

q = """
SELECT (COUNT(DISTINCT ?g) AS ?count)
WHERE {
GRAPH ?g {
?s ?p ?o
}
}
"""
client = httpx.Client(auth=("admin", "admin"))
r = query(SPARQL_ENDPOINT, q, return_python=True, return_bindings_only=True, http_client=client)

count = int(r[0]["count"]["value"])

assert count == 5