From 1eb04304043f709074817626a907d4d844bf15c8 Mon Sep 17 00:00:00 2001 From: Benjamin Webb <40066515+webb-ben@users.noreply.github.com> Date: Thu, 26 Sep 2024 18:02:47 -0400 Subject: [PATCH] Fix SPARQL provider and CI tests (#26) * Update sparql.py * Update additional pages from OACommon * Update sparql.py Update sparql.py Update sparql.py * Update test_sparql_provider.py * Update sparql.py Update sparql.py Update sparql.py Update sparql.py Update sparql.py Update sparql.py Update sparql.py * POST sparql queries --- pygeoapi_plugins/provider/sparql.py | 30 ++++++++++++++++++----------- tests/test_sitemap_process.py | 4 ++-- tests/test_sparql_provider.py | 4 +--- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/pygeoapi_plugins/provider/sparql.py b/pygeoapi_plugins/provider/sparql.py index 78aa53c..19e4a3d 100644 --- a/pygeoapi_plugins/provider/sparql.py +++ b/pygeoapi_plugins/provider/sparql.py @@ -91,7 +91,9 @@ def __init__(self, provider_def): # Set SPARQL query parameters query = provider_def.get('sparql_query', {}) self.convert = query.get('convert', True) - self.sparql_endpoint = query.get('endpoint') + self.sparql = SPARQLWrapper(query.get('endpoint')) + self.sparql.setMethod('POST') + self.sparql.setReturnFormat(JSON) select = query.get('select', '*') self.select = _SELECT.format(select=select) @@ -286,15 +288,15 @@ def _clean_result(self, result): if _id not in ret: ret[_id] = {k: [] for k in v.keys()} - # Iterate over each property-value pair for this binding - for k, v in v.items(): + # Iterate over each property-value pair for this binding + for k, v_ in v.items(): # Ensure the property's entry is always a list if not isinstance(ret[_id][k], list): ret[_id][k] = [ret[_id][k]] # If the current value is not already in the list, append it - if v not in [item['value'] for item in ret[_id][k]]: - ret[_id][k].append(v) + if v_ not in [item['value'] for item in ret[_id][k]]: + ret[_id][k].append(v_) return ret @@ -359,12 +361,10 @@ def _sendQuery(self, query): :returns: SPARQL query results """ LOGGER.debug('Sending SPARQL query') - sparql = SPARQLWrapper(self.sparql_endpoint) - sparql.setQuery(query) - sparql.setReturnFormat(JSON) + self.sparql.setQuery(query) try: - results = sparql.query().convert() + results = self.sparql.query().convert() LOGGER.debug('Received SPARQL results') except Exception as err: LOGGER.error(f'Error in SPARQL query: {err}') @@ -407,9 +407,11 @@ def parse(value: str) -> list: otherwise the original string. """ if '|' in value: - return value.split('|') + LOGGER.debug('Splitting value by "|"') + return value.lstrip('|').rstrip('|').split('|') elif ', ' in value: - return value.split(', ') + LOGGER.debug('Splitting value by ", "') + return value.lstrip(', ').rstrip(', ').split(', ') else: return value @@ -424,13 +426,19 @@ def combine_lists(dict_data: dict): """ # Extract keys from the dictionary keys = list(dict_data.keys()) + if len(keys) == 1: + LOGGER.debug('Returning un-mondified data') + return dict_data # Ensure all lists have the same length length = len(dict_data[keys[0]]) + LOGGER.debug(f'Number of keys: {length}') if not all(len(dict_data[key]) == length for key in keys): + LOGGER.debug('Returning un-mondified data') return dict_data # Combine the items into a list of dictionaries + LOGGER.debug(f'Extracting data for: {keys}') combined_list = [ {key: dict_data[key][i] for key in keys} for i in range(length) ] diff --git a/tests/test_sitemap_process.py b/tests/test_sitemap_process.py index 30b1714..13e4690 100644 --- a/tests/test_sitemap_process.py +++ b/tests/test_sitemap_process.py @@ -54,7 +54,7 @@ def test_sitemap_generator(body): assert len(sitemap) == 5 common = sitemap.pop('common.xml') - assert len(common) == 2402 + assert len(common) == 3134 root = xml.etree.ElementTree.fromstring(common) assert all(i.tag == j.tag for (i, j) in zip(root, root.findall('url'))) @@ -79,7 +79,7 @@ def test_sitemap_no_features(body): assert len(sitemap) == 1 common = sitemap.pop('common.xml') - assert len(common) == 2402 + assert len(common) == 3134 def test_sitemap_zip(body): diff --git a/tests/test_sparql_provider.py b/tests/test_sparql_provider.py index a7c9fa7..913a224 100644 --- a/tests/test_sparql_provider.py +++ b/tests/test_sparql_provider.py @@ -72,11 +72,9 @@ def test_query(config): assert base_fields['uri']['type'] == 'string' fields = p.get_fields() - assert len(fields) == 6 + assert len(fields) == 3 for field in base_fields: assert field in fields - assert fields['country']['type'] == 'string' - assert fields['leader']['type'] == 'string' results = p.query() assert len(results['features']) == 8