From 6f98b16ffe4a14e48626f2c6854bd35bb507f2b9 Mon Sep 17 00:00:00 2001 From: Lenzma Date: Fri, 15 Jul 2022 12:23:54 +0200 Subject: [PATCH 1/6] added about section. --- src/oaipmh/common.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/oaipmh/common.py b/src/oaipmh/common.py index c602ada..f82cd50 100644 --- a/src/oaipmh/common.py +++ b/src/oaipmh/common.py @@ -46,6 +46,38 @@ def getField(self, name): __getitem__ = getField +class About(object): + def __init__(self, element, baseURL, identifier, datestamp, metadataNamespace, harvestDate): + self._element = element + # force identifier to be a string, it might be + # an lxml.etree._ElementStringResult... + try: + self._identifier = str(identifier) + except UnicodeEncodeError: + self._identifier = unicode(identifier) + self._datestamp = datestamp + self._baseURL = baseURL + self._metadataNamespace = metadataNamespace + self._harvestDate = harvestDate + + def element(self): + return self._element + + def identifier(self): + return self._identifier + + def datestamp(self): + return self._datestamp + + def baseURL(self): + return self._baseURL + + def metadataNamespace(self): + return self._metadataNamespace + + def harvestDate(self): + return self._harvestDate + class Identify(object): def __init__(self, repositoryName, baseURL, protocolVersion, adminEmails, earliestDatestamp, deletedRecord, granularity, compression, From 750c1f0ee88c3f0ef3dc566adc6ae5e623b3dd1c Mon Sep 17 00:00:00 2001 From: Lenzma Date: Fri, 15 Jul 2022 13:03:41 +0200 Subject: [PATCH 2/6] enable about section. --- src/oaipmh/server.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/oaipmh/server.py b/src/oaipmh/server.py index ae843e8..b3f174f 100644 --- a/src/oaipmh/server.py +++ b/src/oaipmh/server.py @@ -1,6 +1,8 @@ from lxml.etree import ElementTree, Element, SubElement from lxml import etree from datetime import datetime +import base64 + try: from urllib.parse import urlencode, quote, unquote except ImportError: @@ -48,6 +50,8 @@ def getRecord(self, **kw): self._outputHeader(e_record, header) if not header.isDeleted(): self._outputMetadata(e_record, kw['metadataPrefix'], metadata) + if about: + self._outputAbout(e_record, about) return envelope def getMetadata(self, **kw): @@ -131,7 +135,8 @@ def outputFunc(element, records, token_kw): self._outputHeader(e_record, header) if not header.isDeleted(): self._outputMetadata(e_record, metadataPrefix, metadata) - # XXX about + if about: + self._outputAbout(e_record, about) self._outputResuming( e_listRecords, self._server.listRecords, @@ -243,6 +248,25 @@ def _outputMetadata(self, element, metadata_prefix, metadata): self._metadata_registry.writeMetadata( metadata_prefix, e_metadata, metadata) + def _outputAbout(self, element, about): + if about.baseURL(): + e_about = SubElement(element, nsoai('about')) + e_provenance = SubElement(e_about, nsoai('provenance')) + e_provenance.set ('xmlns', 'http://www.openarchives.org/OAI/2.0/provenance') + e_provenance.set ('{%s}schemaLocation' % NS_XSI, 'http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd') + e_originDescription = SubElement(e_provenance, nsoai('originDescription')) + e_originDescription.set ('harvestDate', datetime_to_datestamp(about.harvestDate())) + e_originDescription.set ('altered', "true") + e_baseURL = SubElement(e_originDescription, nsoai('baseURL')) + e_baseURL.text = about.baseURL() + e_identifier = SubElement(e_originDescription, nsoai('identifier')) + e_identifier.text = about.identifier() + e_datestamp = SubElement(e_originDescription, nsoai('datestamp')) +# e_datestamp.text = datetime_to_datestamp(about.datestamp()) + e_datestamp.text = about.datestamp() + e_metadataNamespace = SubElement(e_originDescription, nsoai('metadataNamespace')) + e_metadataNamespace.text = about.metadataNamespace() + class ServerBase(common.ResumptionOAIPMH): """A server that responds to messages by returning OAI-PMH compliant XML. @@ -453,10 +477,12 @@ def encodeResumptionToken(kw, cursor): if until is not None: kw['until'] = datetime_to_datestamp(until) return quote(urlencode(kw)) + # return base64.b64encode(str(urlencode(kw))) def decodeResumptionToken(token): token = str(unquote(token)) - + # token = base64.b64decode(token).decode('utf-8') + try: kw = parse_qs(token, True, True) except ValueError: @@ -468,6 +494,7 @@ def decodeResumptionToken(token): if key == 'from_' or key == 'until': value = datestamp_to_datetime(value) result[key] = value + try: cursor = int(result.pop('cursor')) except (KeyError, ValueError): From 788196a2db0669c24a2ea3fb7fdcb0f4a41450d9 Mon Sep 17 00:00:00 2001 From: Lenzma Date: Fri, 15 Jul 2022 13:10:46 +0200 Subject: [PATCH 3/6] fixed resumption token. --- src/oaipmh/server.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/oaipmh/server.py b/src/oaipmh/server.py index b3f174f..9fdcdaf 100644 --- a/src/oaipmh/server.py +++ b/src/oaipmh/server.py @@ -476,12 +476,12 @@ def encodeResumptionToken(kw, cursor): until = kw.get('until') if until is not None: kw['until'] = datetime_to_datestamp(until) - return quote(urlencode(kw)) - # return base64.b64encode(str(urlencode(kw))) +# return quote(urlencode(kw)) + return base64.b64encode(str(urlencode(kw))) def decodeResumptionToken(token): - token = str(unquote(token)) - # token = base64.b64decode(token).decode('utf-8') +# token = str(unquote(token)) + token = base64.b64decode(token).decode('utf-8') try: kw = parse_qs(token, True, True) From 9dc622394071593ed06742f4d9f9d2d4a2ad5bad Mon Sep 17 00:00:00 2001 From: Lenzma Date: Fri, 15 Jul 2022 13:18:26 +0200 Subject: [PATCH 4/6] fix resumption token. --- src/oaipmh/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oaipmh/server.py b/src/oaipmh/server.py index 9fdcdaf..7831431 100644 --- a/src/oaipmh/server.py +++ b/src/oaipmh/server.py @@ -477,7 +477,7 @@ def encodeResumptionToken(kw, cursor): if until is not None: kw['until'] = datetime_to_datestamp(until) # return quote(urlencode(kw)) - return base64.b64encode(str(urlencode(kw))) + return base64.b64encode(bytes(urlencode(kw), encoding="utf-8")) def decodeResumptionToken(token): # token = str(unquote(token)) From 0729af05603807c9864c193613bab36049e1af05 Mon Sep 17 00:00:00 2001 From: Lenzma Date: Mon, 23 Jan 2023 14:13:41 +0100 Subject: [PATCH 5/6] added repositoryID and repositoryName to about section. --- src/oaipmh/common.py | 10 +++++++++- src/oaipmh/server.py | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/oaipmh/common.py b/src/oaipmh/common.py index f82cd50..907f6ac 100644 --- a/src/oaipmh/common.py +++ b/src/oaipmh/common.py @@ -47,7 +47,7 @@ def getField(self, name): __getitem__ = getField class About(object): - def __init__(self, element, baseURL, identifier, datestamp, metadataNamespace, harvestDate): + def __init__(self, element, baseURL, identifier, datestamp, metadataNamespace, harvestDate, repositoryID=None, reposioryName=None): self._element = element # force identifier to be a string, it might be # an lxml.etree._ElementStringResult... @@ -59,6 +59,8 @@ def __init__(self, element, baseURL, identifier, datestamp, metadataNamespace, h self._baseURL = baseURL self._metadataNamespace = metadataNamespace self._harvestDate = harvestDate + self._repositoryID = repositoryID + self._repositoryName = repositoryName def element(self): return self._element @@ -78,6 +80,12 @@ def metadataNamespace(self): def harvestDate(self): return self._harvestDate + def repositoryID(self): + return self._repositoryID + + def repositoryName(self): + return self._repositoryName + class Identify(object): def __init__(self, repositoryName, baseURL, protocolVersion, adminEmails, earliestDatestamp, deletedRecord, granularity, compression, diff --git a/src/oaipmh/server.py b/src/oaipmh/server.py index 7831431..07804e4 100644 --- a/src/oaipmh/server.py +++ b/src/oaipmh/server.py @@ -266,6 +266,10 @@ def _outputAbout(self, element, about): e_datestamp.text = about.datestamp() e_metadataNamespace = SubElement(e_originDescription, nsoai('metadataNamespace')) e_metadataNamespace.text = about.metadataNamespace() + e_repositoryID = SubElement(e_originDescription, nsoai('repositoryID')) + e_repositoryID.text = about.repositoryID() + e_repositoryName = SubElement(e_originDescription, nsoai('repositoryName')) + e_repositoryName.text = about.repositoryName() class ServerBase(common.ResumptionOAIPMH): """A server that responds to messages by returning OAI-PMH compliant XML. From c820990bad0eb1f1267532041f458bc716461ec8 Mon Sep 17 00:00:00 2001 From: Lenzma Date: Mon, 23 Jan 2023 14:43:47 +0100 Subject: [PATCH 6/6] fixed typo. --- src/oaipmh/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/oaipmh/common.py b/src/oaipmh/common.py index 907f6ac..387d086 100644 --- a/src/oaipmh/common.py +++ b/src/oaipmh/common.py @@ -47,7 +47,7 @@ def getField(self, name): __getitem__ = getField class About(object): - def __init__(self, element, baseURL, identifier, datestamp, metadataNamespace, harvestDate, repositoryID=None, reposioryName=None): + def __init__(self, element, baseURL, identifier, datestamp, metadataNamespace, harvestDate, repositoryID=None, repositoryName=None): self._element = element # force identifier to be a string, it might be # an lxml.etree._ElementStringResult...