From 02d5605e6140e725259f838a5113a864122385b4 Mon Sep 17 00:00:00 2001 From: <> Date: Tue, 7 May 2024 09:35:16 +0000 Subject: [PATCH] Deployed 6a03620 with MkDocs version: 1.6.0 --- .nojekyll | 0 404.html | 667 ++ api/client/index.html | 3525 +++++++++ api/exceptions/index.html | 1491 ++++ api/iterator/index.html | 1783 +++++ api/models/index.html | 2755 +++++++ api/response/index.html | 1074 +++ api/utils/index.html | 1617 ++++ assets/_mkdocstrings.css | 119 + assets/images/favicon.png | Bin 0 -> 1870 bytes assets/javascripts/bundle.a7c05c9e.min.js | 29 + assets/javascripts/bundle.a7c05c9e.min.js.map | 7 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.el.min.js | 1 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.he.min.js | 1 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++++++++++++ .../workers/search.b8dbb3d2.min.js | 42 + .../workers/search.b8dbb3d2.min.js.map | 7 + assets/stylesheets/main.66ac8b77.min.css | 1 + assets/stylesheets/main.66ac8b77.min.css.map | 1 + assets/stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + changelog/index.html | 1858 +++++ contributing/index.html | 1046 +++ customizing/index.html | 797 ++ index.html | 1013 +++ license/index.html | 767 ++ oaipmh/index.html | 912 +++ objects.inv | Bin 0 -> 913 bytes search/search_index.json | 1 + sitemap.xml | 3 + sitemap.xml.gz | Bin 0 -> 127 bytes tutorial/index.html | 1427 ++++ 63 files changed, 28145 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 api/client/index.html create mode 100644 api/exceptions/index.html create mode 100644 api/iterator/index.html create mode 100644 api/models/index.html create mode 100644 api/response/index.html create mode 100644 api/utils/index.html create mode 100644 assets/_mkdocstrings.css create mode 100644 assets/images/favicon.png create mode 100644 assets/javascripts/bundle.a7c05c9e.min.js create mode 100644 assets/javascripts/bundle.a7c05c9e.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.b8dbb3d2.min.js create mode 100644 assets/javascripts/workers/search.b8dbb3d2.min.js.map create mode 100644 assets/stylesheets/main.66ac8b77.min.css create mode 100644 assets/stylesheets/main.66ac8b77.min.css.map create mode 100644 assets/stylesheets/palette.06af60db.min.css create mode 100644 assets/stylesheets/palette.06af60db.min.css.map create mode 100644 changelog/index.html create mode 100644 contributing/index.html create mode 100644 customizing/index.html create mode 100644 index.html create mode 100644 license/index.html create mode 100644 oaipmh/index.html create mode 100644 objects.inv create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz create mode 100644 tutorial/index.html diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404.html b/404.html new file mode 100644 index 0000000..5603882 --- /dev/null +++ b/404.html @@ -0,0 +1,667 @@ + + + +
+ + + + + + + + + + + + + + + + +The client module provides a client interface for interacting with OAI-PMH services.
+This module defines the Scythe class, which facilitates the harvesting of records, identifiers, and sets +from OAI-PMH compliant repositories. It handles various OAI-PMH requests, manages pagination with resumption tokens, +and supports customizable error handling and retry logic.
+ + + +Scythe
+
+
+A client for interacting with OAI-PMH interfaces, facilitating the harvesting of records, identifiers, and sets.
+The Scythe class is designed to simplify the process of making OAI-PMH requests and processing the responses. +It supports various OAI-PMH verbs and handles pagination through resumption tokens, error handling, and retry logic.
+ + + +Attributes:
+Name | +Type | +Description | +
---|---|---|
endpoint |
+ + | +
+
+
+ The base URL of the OAI-PMH service. + |
+
http_method |
+ + | +
+
+
+ The HTTP method to use for requests (either 'GET' or 'POST'). + |
+
iterator |
+ + | +
+
+
+ The iterator class to be used for iterating over responses. + |
+
max_retries |
+ + | +
+
+
+ The maximum number of retries for a request in case of failures. + |
+
retry_status_codes |
+ + | +
+
+
+ The HTTP status codes on which to retry the request. + |
+
default_retry_after |
+ + | +
+
+
+ The default wait time (in seconds) between retries if no 'retry-after' header is present. + |
+
class_mapping |
+ + | +
+
+
+ A mapping from OAI verbs to classes representing OAI items. + |
+
encoding |
+ + | +
+
+
+ The character encoding for decoding responses. Defaults to the server's specified encoding. + |
+
auth |
+ + | +
+
+
+ Optional authentication credentials for accessing the OAI-PMH interface. + |
+
timeout |
+ + | +
+
+
+ The timeout (in seconds) for HTTP requests. + |
+
Examples:
+>>> with Scythe("https://zenodo.org/oai2d") as scythe:
+>>> records = scythe.list_records()
+>>> for record in records:
+>>> print(record)
+
src/oaipmh_scythe/client.py
51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 |
|
client: httpx.Client
+
+
+ property
+
+
+Provide a reusable HTTP client instance for making requests.
+This property ensures that an httpx.Client
instance is created and maintained for
+the lifecycle of the Scythe
instance. It handles the creation of the client and
+ensures that a new client is created if the existing one is closed.
Returns:
+Type | +Description | +
---|---|
+ Client
+ |
+
+
+
+ A reusable HTTP client instance for making HTTP requests. + |
+
close()
+
+Close the internal HTTP client if it exists and is open.
+This method is responsible for explicitly closing the httpx.Client
instance used
+by the Scythe
class. It should be called when the client is no longer needed, to
+ensure proper cleanup and release of resources.
It's recommended to call this method at the end of operations or when the Scythe
+instance is no longer in use, especially if it's not being used as a context manager.
src/oaipmh_scythe/client.py
get_record(identifier, metadata_prefix='oai_dc')
+
+Issue a GetRecord request to the OAI server.
+Send a request to the OAI server to retrieve a specific record. The request is constructed with the provided +identifier and metadata prefix. The method then processes and returns the relevant OAIResponse or Record object +using an iterator.
+Ref: https://openarchives.org/OAI/openarchivesprotocol.html#GetRecord
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
identifier |
+
+ str
+ |
+
+
+
+ A unique identifier for the record to be retrieved from the OAI server. + |
+ + required + | +
metadata_prefix |
+
+ str
+ |
+
+
+
+ The metadata format to be returned for the record. Defaults to "oai_dc". + |
+
+ 'oai_dc'
+ |
+
Returns:
+Type | +Description | +
---|---|
+ OAIResponse | Record
+ |
+
+
+
+ An OAIResponse or Record object representing the requested record. + |
+
Raises:
+Type | +Description | +
---|---|
+ CannotDisseminateFormat
+ |
+
+
+
+ If the specified metadata_prefix is not supported by the OAI server for +the requested record. + |
+
+ IdDoesNotExist
+ |
+
+
+
+ If the specified identifier does not correspond to any record in the OAI server. + |
+
src/oaipmh_scythe/client.py
get_retry_after(http_response)
+
+Determine the appropriate time to wait before retrying a request, based on the server's response.
+Check the status code of the provided HTTP response. If it's 503 (Service Unavailable), +attempt to parse the 'retry-after' header to find the suggested wait time. If parsing fails +or a different status code is received, use the default retry time.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
http_response |
+
+ Response
+ |
+
+
+
+ The HTTP response received from the server. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ int | float
+ |
+
+
+
+ An integer representing the number of seconds to wait before retrying the request. + |
+
src/oaipmh_scythe/client.py
harvest(query)
+
+Perform an HTTP request to the OAI server with the given parameters.
+Send an OAI-PMH request to the server using the specified parameters. Handle retry logic +for failed requests based on the configured retry settings and response status codes.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
query |
+
+ dict[str, str]
+ |
+
+
+
+ A dictionary containing the request parameters. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ OAIResponse
+ |
+
+
+
+ An OAIResponse object encapsulating the server's response. + |
+
Raises:
+Type | +Description | +
---|---|
+ HTTPError
+ |
+
+
+
+ If the HTTP request fails after the maximum number of retries. + |
+
src/oaipmh_scythe/client.py
identify()
+
+Issue an Identify request to the OAI server.
+Send a request to identify the OAI server and retrieve its information. This includes details such as the repository name, +the base URL, the protocol version, and other relevant data about the OAI server. It's useful for understanding the +capabilities and configuration of the server.
+Ref: https://openarchives.org/OAI/openarchivesprotocol.html#Identify
+ + + +Returns:
+Type | +Description | +
---|---|
+ Identify
+ |
+
+
+
+ An object encapsulating the server's identify response, which contains various pieces of information about +the OAI server. + |
+
src/oaipmh_scythe/client.py
list_identifiers(from_=None, until=None, metadata_prefix='oai_dc', set_=None, resumption_token=None, ignore_deleted=False)
+
+Issue a ListIdentifiers request to the OAI server.
+Send a request to list record identifiers from the OAI server. This method allows filtering records based on +date range, set membership, and metadata format. It also supports pagination through resumption tokens and has +an option to ignore deleted records.
+Ref: https://openarchives.org/OAI/openarchivesprotocol.html#ListIdentifiers
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
from_ |
+
+ str | None
+ |
+
+
+
+ An optional date string specifying the start of a date range for harvesting records. + |
+
+ None
+ |
+
until |
+
+ str | None
+ |
+
+
+
+ An optional date string specifying the end of a date range for harvesting records. + |
+
+ None
+ |
+
metadata_prefix |
+
+ str
+ |
+
+
+
+ The metadata format for the records to be harvested. Defaults to "oai_dc". + |
+
+ 'oai_dc'
+ |
+
set_ |
+
+ str | None
+ |
+
+
+
+ An optional set identifier to restrict the harvest to records within a specific set. + |
+
+ None
+ |
+
resumption_token |
+
+ str | None
+ |
+
+
+
+ An optional token for pagination, used to continue a request for the next page of +identifiers. + |
+
+ None
+ |
+
ignore_deleted |
+
+ bool
+ |
+
+
+
+ If True, skip records flagged as deleted in the response. + |
+
+ False
+ |
+
Yields:
+Type | +Description | +
---|---|
+ OAIResponse | Header
+ |
+
+
+
+ An iterator over OAIResponse or Header objects, each representing an individual record identifier +or response from the server. + |
+
Raises:
+Type | +Description | +
---|---|
+ BadResumptionToken
+ |
+
+
+
+ If the provided resumption token is invalid or expired. + |
+
+ CannotDisseminateFormat
+ |
+
+
+
+ If the specified metadata_prefix is not supported by the OAI server. + |
+
+ NoRecordsMatch
+ |
+
+
+
+ If no records match the provided criteria. + |
+
+ NoSetHierarchy
+ |
+
+
+
+ If set-based harvesting is requested but the OAI server does not support sets. + |
+
src/oaipmh_scythe/client.py
list_metadata_formats(identifier=None)
+
+Issue a ListMetadataFormats request to the OAI server.
+Send a request to list the metadata formats available from the OAI server. This can be done for the entire +repository or for a specific record if an identifier is provided. The method constructs a query and yields an +iterator over OAIResponse or MetadataFormat objects, each representing a different metadata format or response +from the server.
+Ref: https://openarchives.org/OAI/openarchivesprotocol.html#ListMetadataFormats
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
identifier |
+
+ str | None
+ |
+
+
+
+ An optional unique identifier for a specific record to query available metadata formats. + If None, all metadata formats available in the repository are listed. + |
+
+ None
+ |
+
Yields:
+Type | +Description | +
---|---|
+ OAIResponse | MetadataFormat
+ |
+
+
+
+ An iterator over OAIResponse or MetadataFormat objects, each representing an individual metadata format +or response from the server. + |
+
Raises:
+Type | +Description | +
---|---|
+ IdDoesNotExist
+ |
+
+
+
+ If the specified identifier does not correspond to any record in the OAI server. + |
+
+ NoMetadataFormats
+ |
+
+
+
+ If there are no metadata formats available for the requested record or repository. + |
+
src/oaipmh_scythe/client.py
list_records(from_=None, until=None, metadata_prefix='oai_dc', set_=None, resumption_token=None, ignore_deleted=False)
+
+Issue a ListRecords request to the OAI server.
+Send a request to list records from the OAI server, allowing for selective harvesting based on date range, +set membership, and metadata format. This method supports pagination via resumption tokens and can optionally +ignore records marked as deleted.
+Ref: https://openarchives.org/OAI/openarchivesprotocol.html#ListRecords
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
from_ |
+
+ str | None
+ |
+
+
+
+ An optional date string specifying the start of a date range for harvesting records. + |
+
+ None
+ |
+
until |
+
+ str | None
+ |
+
+
+
+ An optional date string specifying the end of a date range for harvesting records. + |
+
+ None
+ |
+
metadata_prefix |
+
+ str
+ |
+
+
+
+ The metadata format for the records to be harvested. Defaults to "oai_dc". + |
+
+ 'oai_dc'
+ |
+
set_ |
+
+ str | None
+ |
+
+
+
+ An optional set identifier to restrict the harvest to records within a specific set. + |
+
+ None
+ |
+
resumption_token |
+
+ str | None
+ |
+
+
+
+ An optional token for pagination, used to continue a request for the next page of records. + |
+
+ None
+ |
+
ignore_deleted |
+
+ bool
+ |
+
+
+
+ If True, skip records flagged as deleted in the response. + |
+
+ False
+ |
+
Yields:
+Type | +Description | +
---|---|
+ OAIResponse | Record
+ |
+
+
+
+ An iterator over OAIResponse or Record objects, each representing an individual record or response +from the server. + |
+
Raises:
+Type | +Description | +
---|---|
+ BadArgument
+ |
+
+
+
+ If the arguments provided do not conform to the expectations of the OAI server. + |
+
+ BadResumptionToken
+ |
+
+
+
+ If the provided resumption token is invalid or expired. + |
+
+ CannotDisseminateFormat
+ |
+
+
+
+ If the specified metadata_prefix is not supported by the OAI server. + |
+
+ NoRecordsMatch
+ |
+
+
+
+ If no records match the provided criteria. + |
+
+ NoSetHierarchy
+ |
+
+
+
+ If set-based harvesting is requested but the OAI server does not support sets. + |
+
src/oaipmh_scythe/client.py
list_sets(resumption_token=None)
+
+Issue a ListSets request to the OAI server.
+Send a request to list all sets defined in the OAI server. Sets are used to categorize records in the OAI +repository. This method allows for the retrieval of these sets, optionally using a resumption token to handle +pagination.
+Ref: https://openarchives.org/OAI/openarchivesprotocol.html#ListSets
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
resumption_token |
+
+ str | None
+ |
+
+
+
+ An optional token for pagination, used to continue a request for the next batch of sets. + |
+
+ None
+ |
+
Yields:
+Type | +Description | +
---|---|
+ OAIResponse | Set
+ |
+
+
+
+ An iterator over OAIResponse or Set objects, representing an individual set or response from the server. + |
+
Raises:
+Type | +Description | +
---|---|
+ BadResumptionToken
+ |
+
+
+
+ If the provided resumption token is invalid or expired. + |
+
+ NoSetHierarchy
+ |
+
+
+
+ If the OAI server does not support sets or has no set hierarchy available. + |
+
src/oaipmh_scythe/client.py
The exceptions module defines exception classes for handling error scenarios encountered in OAI-PMH operations.
+These exception classes provide a structured way to capture and communicate specific errors that may occur +while interacting with OAI-PMH services. Each class corresponds to a particular type of error defined +in the OAI-PMH protocol, facilitating precise error handling and meaningful feedback in client applications.
+ + + +Classes:
+Name | +Description | +
---|---|
OAIPMHException |
+
+
+
+ The base exception class for all OAI-PMH related errors. + |
+
GeneralOAIPMHError |
+
+
+
+ A general exception class for OAI-PMH errors not specifically covered by other classes. + |
+
BadArgument |
+
+
+
+ Raised when a request contains illegal, missing, or improperly formatted arguments. + |
+
BadVerb |
+
+
+
+ Raised when the verb argument in a request is invalid or improperly used. + |
+
BadResumptionToken |
+
+
+
+ Raised when a resumption token is invalid or expired. + |
+
CannotDisseminateFormat |
+
+
+
+ Raised when a requested metadata format is not supported. + |
+
IdDoesNotExist |
+
+
+
+ Raised when an identifier does not exist or is illegal in a repository. + |
+
NoSetHierarchy |
+
+
+
+ Raised when a repository does not support set hierarchies. + |
+
NoMetadataFormats |
+
+
+
+ Raised when no metadata formats are available for an item. + |
+
NoRecordsMatch |
+
+
+
+ Raised when a query yields no results due to specific argument combinations. + |
+
These custom exceptions enhance the robustness and clarity of error handling in OAI-PMH client implementations, +aligning closely with the protocol's standard error conditions.
+Ref: https://openarchives.org/OAI/openarchivesprotocol.html#ErrorConditions
+ + + +BadArgument
+
+
+
+ Bases: OAIPMHException
Exception raised when the OAI-PMH request contains illegal or missing arguments or arguments with illegal syntax.
+This includes scenarios where arguments are repeated, missing, have illegal values, +or their syntax is not compliant with the OAI-PMH specifications.
+ +src/oaipmh_scythe/exceptions.py
BadResumptionToken
+
+
+
+ Bases: OAIPMHException
Exception raised when the resumptionToken argument in the OAI-PMH request is invalid or expired.
+Indicates issues with the value of the resumptionToken, such as expiration or incorrect formatting.
+ +src/oaipmh_scythe/exceptions.py
BadVerb
+
+
+
+ Bases: OAIPMHException
Exception raised when the verb argument in the OAI-PMH request is invalid.
+This occurs if the verb value is not a legal OAI-PMH verb, the verb argument is missing, +or if the verb argument is repeated in the request.
+ +src/oaipmh_scythe/exceptions.py
CannotDisseminateFormat
+
+
+
+ Bases: OAIPMHException
Exception raised when the requested metadata format is not supported.
+This error occurs if the metadata format identified by the metadataPrefix argument is not +supported by either the requested item or the repository as a whole.
+ +src/oaipmh_scythe/exceptions.py
GeneralOAIPMHError
+
+
+
+ Bases: OAIPMHException
General exception for context-specific OAI-PMH errors not covered by the other specific classes.
+This class is used for OAI-PMH errors that do not fall into the predefined categories +of the other exception classes in this module.
+ +src/oaipmh_scythe/exceptions.py
IdDoesNotExist
+
+
+
+ Bases: OAIPMHException
Exception raised when the specified identifier is unknown or illegal in the repository.
+Indicates that the value of the identifier argument does not correspond to any item +in the repository or is not formulated correctly.
+ +src/oaipmh_scythe/exceptions.py
NoMetadataFormats
+
+
+
+ Bases: OAIPMHException
Exception raised when there are no available metadata formats for the specified item.
+Indicates a lack of metadata formats that can be disseminated for the requested item.
+ +src/oaipmh_scythe/exceptions.py
NoRecordsMatch
+
+
+
+ Bases: OAIPMHException
Exception raised when a query does not yield any results.
+This error occurs when the combination of the 'from', 'until', 'set', and 'metadataPrefix' +arguments in a request results in an empty list, indicating no matching records.
+ +src/oaipmh_scythe/exceptions.py
NoSetHierarchy
+
+
+
+ Bases: OAIPMHException
Exception raised when sets are not supported by the repository.
+This error indicates that the repository does not support the concept of set hierarchies.
+ + + +The iterator module provides classes for iterating over data retrieved from OAI-PMH services.
+This module includes the BaseOAIIterator, an abstract base class that defines a standard interface +for OAI-PMH data iteration, along with its specialized subclasses. Each subclass is tailored +to handle specific types of data such as records, identifiers, or sets, +ensuring efficient and structured access to OAI-PMH responses.
+ + + +Classes:
+Name | +Description | +
---|---|
BaseOAIIterator |
+
+
+
+ An abstract base class for creating iterators over OAI-PMH data. + |
+
OAIResponseIterator |
+
+
+
+ Iterates over OAI responses, handling pagination and resumption tokens. + |
+
OAIItemIterator |
+
+
+
+ Provides iteration over specific OAI items like records, identifiers, and sets. + |
+
BaseOAIIterator
+
+
+
+ Bases: ABC
An abstract base class for iterators over various types of data aggregated through the OAI-PMH protocol.
+This class provides a common interface and implementation for iterating over records, identifiers, +and sets obtained via OAI-PMH. It handles OAI-PMH's resumption token mechanism, allowing seamless +iteration over potentially large sets of data.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
scythe |
+
+ Scythe
+ |
+
+
+
+ The Scythe instance used to perform OAI-PMH requests. + |
+ + required + | +
query |
+
+ dict[str, str]
+ |
+
+
+
+ A dictionary of parameters specifying the details of the OAI-PMH request. + |
+ + required + | +
ignore_deleted |
+
+ bool
+ |
+
+
+
+ A boolean flag indicating whether to ignore deleted records in the iteration. + |
+
+ False
+ |
+
Attributes:
+Name | +Type | +Description | +
---|---|---|
scythe |
+ + | +
+
+
+ The Scythe instance handling OAI-PMH requests. + |
+
query |
+ + | +
+
+
+ The parameters for OAI-PMH requests. + |
+
ignore_deleted |
+ + | +
+
+
+ Indicates whether deleted records should be ignored. + |
+
verb |
+
+ str
+ |
+
+
+
+ The OAI-PMH verb (e.g., 'ListRecords', 'ListIdentifiers') used in the request. + |
+
oai_response |
+
+ OAIResponse | None
+ |
+
+
+
+ The most recent OAIResponse received from the OAI server. + |
+
resumption_token |
+
+ ResumptionToken | None
+ |
+
+
+
+ The current resumption token, if any, for paginated results. + |
+
src/oaipmh_scythe/iterator.py
44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 |
|
OAIItemIterator
+
+
+
+ Bases: BaseOAIIterator
An iterator class for iterating over various types of OAI items aggregated via OAI-PMH.
+This iterator is designed to handle the iteration of specific OAI items, such as records or sets, from a repository. +It extends the functionality of the BaseOAIIterator to parse and yield individual items from the OAI-PMH responses.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
scythe |
+
+ Scythe
+ |
+
+
+
+ The Scythe instance used for making OAI-PMH requests. + |
+ + required + | +
query |
+
+ dict[str, str]
+ |
+
+
+
+ A dictionary of OAI-PMH request parameters. + |
+ + required + | +
ignore_deleted |
+
+ bool
+ |
+
+
+
+ A boolean indicating whether to ignore deleted records in the response. + |
+
+ False
+ |
+
src/oaipmh_scythe/iterator.py
__iter__()
+
+Iterate over individual OAI items from the response.
+Go through the items in the OAI-PMH response, applying any necessary mapping and handling +the exclusion of deleted records if specified. Automatically handle pagination through resumption tokens.
+ + + +Yields:
+Name | Type | +Description | +
---|---|---|
OAIItem |
+ OAIItem
+ |
+
+
+
+ The next OAI item (e.g., record, identifier, set) from the response. + |
+
src/oaipmh_scythe/iterator.py
OAIResponseIterator
+
+
+
+ Bases: BaseOAIIterator
An iterator class for iterating over OAI responses obtained via the OAI-PMH protocol.
+This iterator specifically handles the iteration of OAIResponse objects, allowing for seamless +navigation through a sequence of responses returned by an OAI-PMH request. It utilizes the +underlying mechanisms of the BaseOAIIterator, including handling of resumption tokens for paginated data.
+ +src/oaipmh_scythe/iterator.py
__iter__()
+
+Yield the next OAIResponse object from the server response sequence.
+Enable the OAIResponseIterator to iterate over a series of OAIResponse objects, managing pagination +with resumption tokens. Continue yielding responses until no more data is available from the server.
+ + + +Yields:
+Name | Type | +Description | +
---|---|---|
OAIResponse |
+ OAIResponse
+ |
+
+
+
+ The next available OAIResponse object in the sequence. + |
+
src/oaipmh_scythe/iterator.py
The models module defines data structures for representing various components of the OAI-PMH protocol.
+This module includes classes that encapsulate different entities in OAI-PMH, such as resumption tokens and +various types of OAI items. These classes provide structured representations of OAI-PMH elements, +facilitating their manipulation and processing in client applications.
+ + + +Classes:
+Name | +Description | +
---|---|
ResumptionToken |
+
+
+
+ Represents a resumption token used in OAI-PMH for paginated data retrieval. + |
+
OAIItem |
+
+
+
+ A base class for generic OAI items. + |
+
Identify |
+
+
+
+ Represents an Identify response in OAI-PMH. + |
+
Header |
+
+
+
+ Represents an OAI Header element. + |
+
Record |
+
+
+
+ Represents an OAI Record element. + |
+
Set |
+
+
+
+ Represents an OAI Set element. + |
+
MetadataFormat |
+
+
+
+ Represents an OAI MetadataFormat element. + |
+
Header
+
+
+
+ Bases: OAIItem
A class representing an OAI Header in the OAI-PMH protocol.
+The header contains essential information about a record, such as its identifier, datestamp, +and set specifications. This class parses these details from the provided XML header element +and makes them easily accessible as attributes.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
header_element |
+
+ _Element
+ |
+
+
+
+ The XML element representing the OAI header. + |
+ + required + | +
Attributes:
+Name | +Type | +Description | +
---|---|---|
deleted |
+ + | +
+
+
+ Indicates whether the record is marked as deleted in the OAI-PMH repository. + |
+
identifier |
+ + | +
+
+
+ The unique identifier of the record in the OAI-PMH repository. + |
+
datestamp |
+ + | +
+
+
+ The datestamp of the record, indicating when it was last updated. + |
+
setSpecs |
+ + | +
+
+
+ A list of set specifications that the record belongs to. + |
+
src/oaipmh_scythe/models.py
__iter__()
+
+Iterate over the header information, yielding key-value pairs.
+ +src/oaipmh_scythe/models.py
Identify
+
+
+
+ Bases: OAIItem
A class representing an Identify container in the OAI-PMH protocol.
+This class is specifically used for handling the response of an Identify request in OAI-PMH. +It differs from other OAI entities in that it is initialized with an OAIResponse object +rather than a direct XML element. The class parses the Identify information from the +response and provides access to its individual components.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
identify_response |
+
+ OAIResponse
+ |
+
+
+
+ The response object from an Identify request. +It should contain the XML representation of the Identify response. + |
+ + required + | +
Attributes:
+Name | +Type | +Description | +
---|---|---|
xml |
+ + | +
+
+
+ The XML element representing the Identify response. + |
+
_identify_dict |
+ + | +
+
+
+ A dictionary containing the parsed Identify information. + |
+
Dynamic |
+
+ Attributes
+ |
+
+
+
+ Based on the content of the Identify response, additional attributes + are dynamically set on this object. These can include attributes like + repository name, base URL, protocol version, etc. + |
+
Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If the Identify element is not found in the provided XML. + |
+
src/oaipmh_scythe/models.py
__iter__()
+
+MetadataFormat
+
+
+
+ Bases: OAIItem
A class representing a metadata format in the OAI-PMH protocol.
+This class handles the representation of a metadata format, which is an essential part of the OAI-PMH protocol. +It parses the provided XML element to extract and store metadata format details such as the metadata prefix.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
mdf_element |
+
+ _Element
+ |
+
+
+
+ The XML element representing the metadata format. This element is parsed +to extract metadata format details. + |
+ + required + | +
Attributes:
+Name | +Type | +Description | +
---|---|---|
metadataPrefix |
+
+ str | None
+ |
+
+
+
+ The prefix of the metadata format, extracted from the XML element. + |
+
_mdf_dict |
+ + | +
+
+
+ A dictionary containing the parsed metadata format details. + |
+
src/oaipmh_scythe/models.py
__iter__()
+
+OAIItem
+
+
+A base class representing a generic item in the OAI-PMH protocol.
+This class provides a common structure for handling and manipulating XML data +associated with different types of OAI-PMH items, such as records, headers, or sets.
+ + + +Attributes:
+Name | +Type | +Description | +
---|---|---|
xml |
+ + | +
+
+
+ The parsed XML element representing the OAI item. + |
+
_strip_ns |
+ + | +
+
+
+ A flag indicating whether to remove the namespaces from the element names +in the dictionary representation. + |
+
_oai_namespace |
+ + | +
+
+
+ The namespace URI extracted from the XML element. + |
+
src/oaipmh_scythe/models.py
raw: str
+
+
+ property
+
+
+Return the original XML as a unicode string.
+Record
+
+
+
+ Bases: OAIItem
A class representing an OAI record in the OAI-PMH protocol.
+This class encapsulates a record element from an OAI-PMH response, handling its parsing, and providing +structured access to its details, such as header information and metadata. It checks for the presence of +the header and metadata elements and raises an error if the header is not found.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
record_element |
+
+ _Element
+ |
+
+
+
+ The XML element representing the OAI record. + |
+ + required + | +
strip_ns |
+
+ bool
+ |
+
+
+
+ If True, namespaces are removed from the element names in the parsed metadata. Defaults to True. + |
+
+ True
+ |
+
Attributes:
+Name | +Type | +Description | +
---|---|---|
header |
+ + | +
+
+
+ An instance of the Header class representing the header information of the record. + |
+
deleted |
+ + | +
+
+
+ Indicates whether the record is marked as deleted. + |
+
metadata |
+ + | +
+
+
+ A dictionary representation of the record's metadata, if available and not deleted. + |
+
Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If the header element is not found in the provided XML. + |
+
src/oaipmh_scythe/models.py
__iter__()
+
+get_metadata()
+
+Extract and return the record's metadata as a dictionary.
+ +src/oaipmh_scythe/models.py
ResumptionToken
+
+
+
+ dataclass
+
+
+A data class representing a resumption token in the OAI-PMH protocol.
+Resumption tokens are used for iterating over multiple sets of results in OAI-PMH +harvest requests. This class encapsulates the typical components of a resumption token, +including the token itself, cursor, complete list size, and an expiration date.
+ + + +Attributes:
+Name | +Type | +Description | +
---|---|---|
token |
+
+ str | None
+ |
+
+
+
+ The actual resumption token used for continuing the iteration in subsequent OAI-PMH requests. +Default is None. + |
+
cursor |
+
+ str | None
+ |
+
+
+
+ A marker indicating the current position in the list of results. Default is None. + |
+
complete_list_size |
+
+ str | None
+ |
+
+
+
+ The total number of records in the complete list of results. Default is None. + |
+
expiration_date |
+
+ str | None
+ |
+
+
+
+ The date and time when the resumption token expires. Default is None. + |
+
src/oaipmh_scythe/models.py
Set
+
+
+
+ Bases: OAIItem
A class representing a set in the OAI-PMH protocol.
+This class encapsulates a set element from an OAI-PMH response and provides structured access to its details. +It parses the set information from the provided XML element and dynamically sets attributes +based on the parsed content.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
set_element |
+
+ _Element
+ |
+
+
+
+ The XML element representing the OAI set. The element is parsed to extract set details. + |
+ + required + | +
Attributes:
+Name | +Type | +Description | +
---|---|---|
setName |
+
+ str | None
+ |
+
+
+
+ The name of the set, extracted from the set's XML element. + |
+
_set_dict |
+ + | +
+
+
+ A dictionary containing the parsed set information. + |
+
src/oaipmh_scythe/models.py
__iter__()
+
+The response module offers a structured representation of responses from OAI-PMH services.
+This module defines the OAIResponse class, which encapsulates the HTTP response from an OAI-PMH server, +providing easy access to its content both as raw text and as parsed XML. It is designed to work seamlessly +with various components of an OAI-PMH client, handling the nuances of OAI-PMH responses.
+ + + +OAIResponse
+
+
+
+ dataclass
+
+
+Represents a response received from an OAI server, encapsulating the raw HTTP response and parsed XML content.
+This class provides a structured way to access various aspects of an OAI server's response. +It offers methods to retrieve the raw text of the response, parse it as XML, +and obtain a string representation of the response that includes the OAI verb.
+ + + +Attributes:
+Name | +Type | +Description | +
---|---|---|
http_response |
+
+ Response
+ |
+
+
+
+ The original HTTP response object from the OAI server. + |
+
params |
+
+ dict[str, str]
+ |
+
+
+
+ A dictionary of the OAI parameters used in the request that led to this response. + |
+
src/oaipmh_scythe/response.py
raw: str
+
+
+ property
+
+
+Return the raw text of the server's response as a unicode string.
+xml: etree._Element
+
+
+ property
+
+
+Parse the server's response content and return it as an etree._Element
object.
The utils module provides utility functions for handling XML data in the context of OAI-PMH services.
+This module includes functions essential for parsing and transforming XML data obtained from OAI-PMH responses. +These utilities facilitate the extraction of namespaces and conversion of XML elements into +more accessible data structures.
+ + + +Functions:
+Name | +Description | +
---|---|
log_response |
+
+
+
+ Log the details of an HTTP response. + |
+
remove_none_values |
+
+
+
+ Remove keys from the dictionary where the value is |
+
filter_dict_except_resumption_token |
+
+
+
+ Filter keys from the dictionary, if resumption token is not |
+
get_namespace |
+
+
+
+ Extracts the namespace from an XML element. + |
+
xml_to_dict |
+
+
+
+ Converts an XML tree or element into a dictionary representation. + |
+
filter_dict_except_resumption_token(d)
+
+Filter out keys with None values from a dictionary, with special handling for 'resumptionToken'.
+If 'resumptionToken' is present and not None, and there are other non-None keys, log a warning and +retain only 'resumptionToken' and 'verb' keys. Otherwise, return a dictionary excluding any keys +with None values.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
d |
+
+ dict[str, Any | None]
+ |
+
+
+
+ The dictionary to filter. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ dict[str, Any]
+ |
+
+
+
+ dict[str, Any]: A filtered dictionary based on the defined criteria. + |
+
src/oaipmh_scythe/utils.py
get_namespace(element)
+
+Return the namespace URI of an XML element.
+Extracts and returns the namespace URI from the tag of the given XML element.
+The namespace URI is enclosed in curly braces at the start of the tag.
+If the element does not have a namespace, None
is returned.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
element |
+
+ _Element
+ |
+
+
+
+ The XML element from which to extract the namespace. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ str | None
+ |
+
+
+
+ The namespace URI as a string if the element has a namespace, otherwise |
+
src/oaipmh_scythe/utils.py
log_response(response)
+
+Log the details of an HTTP response.
+This function logs the HTTP method, URL, and status code of the response for debugging purposes. +It uses the 'debug' logging level to provide detailed diagnostic information.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
response |
+
+ Response
+ |
+
+
+
+ The response object received from an HTTP request. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ None
+ |
+
+
+
+ None + |
+
src/oaipmh_scythe/utils.py
remove_none_values(d)
+
+Remove keys from the dictionary where the value is None
.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
d |
+
+ dict[str, Any | None]
+ |
+
+
+
+ The input dictionary. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ dict[str, Any]
+ |
+
+
+
+ A new dictionary with the same keys as the input dictionary but none values have been removed. + |
+
src/oaipmh_scythe/utils.py
xml_to_dict(tree, paths=None, nsmap=None, strip_ns=False)
+
+Convert an XML tree to a dictionary, with options for custom XPath and namespace handling.
+This function takes an XML element tree and converts it into a dictionary. The keys of the +dictionary are the tags of the XML elements, and the values are lists of the text contents +of these elements. It offers options to apply specific XPath expressions, handle namespaces, +and optionally strip namespaces from the tags in the resulting dictionary.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
tree |
+
+ _Element
+ |
+
+
+
+ The root element of the XML tree to be converted. + |
+ + required + | +
paths |
+
+ list[str] | None
+ |
+
+
+
+ An optional list of XPath expressions to apply on the XML tree. If None or not +provided, the function will consider all elements in the tree. + |
+
+ None
+ |
+
nsmap |
+
+ dict[str, str] | None
+ |
+
+
+
+ An optional dictionary for namespace mapping, used to provide shorter, more +readable paths in XPath expressions. If None or not provided, no namespace +mapping is applied. + |
+
+ None
+ |
+
strip_ns |
+
+ bool
+ |
+
+
+
+ A boolean flag indicating whether to remove namespaces from the element tags +in the resulting dictionary. Defaults to False. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ dict[str, list[str | None]]
+ |
+
+
+
+ A dictionary where each key is an element tag (with or without namespace, based on + |
+
+ dict[str, list[str | None]]
+ |
+
+
+
+
|
+
+ dict[str, list[str | None]]
+ |
+
+
+
+ each element with that tag. + |
+
src/oaipmh_scythe/utils.py