Skip to content

Commit

Permalink
Merge pull request #98 from mjanez/feature/harvester-private-datasets
Browse files Browse the repository at this point in the history
Add allow_private_datasets to ckan harvester
  • Loading branch information
mjanez authored Aug 19, 2024
2 parents 28c3d3d + 9e5037a commit dd75b3f
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 6 deletions.
6 changes: 5 additions & 1 deletion ckanext/schemingdcat/harvesters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,7 @@ def get_extra_value(extras, key):

def apply_field_mapping(d, mapping):
new_dict = {}
#TODO: If not field_mapping, items() fails.
for local_field, remote_info in mapping.items():
if 'field_name' in remote_info:
remote_field = remote_info['field_name']
Expand Down Expand Up @@ -1107,8 +1108,11 @@ def _set_translated_fields(self, package_dict):
translated_fields["dataset_fields"].append(local_field_name)

if isinstance(remote_field_name, dict):
local_field_value = package_dict.get(local_field_name, {})
if not isinstance(local_field_value, dict):
local_field_value = {}
package_dict[local_field_name] = {
lang: package_dict.get(name, package_dict.get(local_field_name, {}).get(lang))
lang: package_dict.get(name, local_field_value.get(lang))
for lang, name in remote_field_name.items()
}
if local_field_name.endswith('_translated'):
Expand Down
28 changes: 23 additions & 5 deletions ckanext/schemingdcat/harvesters/ckan.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,12 @@ def validate_config(self, config):
f'schema should be one of: {", ".join(self._supported_schemas)}. Current dataset schema: {self._local_schema_name}'
)
else:
raise ValueError(
f"Config schema should match the local schema: '{self._local_schema_name}'. "
f"Check the remote schema with CKAN API: {{ckan_site_url}}/api/3/action/scheming_dataset_schema_show?type=dataset, "
f"or specify the local schema, and the harvester will try to map the fields."
)
if self._local_schema_name.lower().strip() != schema.lower().strip():
raise ValueError(
f"Config 'schema': {schema} should match the local schema: '{self._local_schema_name}'. "
f"Check the remote schema with CKAN API: {{ckan_site_url}}/api/3/action/scheming_dataset_schema_show?type=dataset, "
f"or specify the local schema '{self._local_schema_name}', and the harvester will try to map the fields."
)

config = json.dumps({**config_obj, "schema": schema.lower().strip()})

Expand All @@ -124,6 +125,18 @@ def validate_config(self, config):
):
config = json.dumps({**config_obj, "allow_harvest_datasets": False})

# Check if 'allow_private_datasets' is not in the config_obj or is not a boolean
if "allow_private_datasets" in config_obj:
if "api_key" in config_obj:
if not isinstance(config_obj["allow_private_datasets"], bool):
config = json.dumps({**config_obj, "allow_private_datasets": False})
else:
raise ValueError(
"'api_key' is needed to using 'allow_private_datasets'"
)
else:
config = json.dumps({**config_obj, "allow_private_datasets": False})

# Check remote_orgs and remote_groups == only_local, if not, put
# remote_orgs and remote_groups to only_local
if (
Expand Down Expand Up @@ -376,7 +389,12 @@ def _search_for_datasets(self, remote_ckan_base_url, fq_terms=None):
# they will harvested the next time anyway. When datasets are added,
# we are at risk of seeing datasets twice in the paging, so we detect
# and remove any duplicates.
params["include_private"] = False
params["sort"] = "id asc"

if self.config.get("allow_private_datasets", False):
params["include_private"] = True

if fq_terms:
params["fq"] = " ".join(fq_terms)

Expand Down

0 comments on commit dd75b3f

Please sign in to comment.