-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
26 changed files
with
607 additions
and
147 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,216 +1,140 @@ | ||
/** | ||
* Implementation of the RDF Canonicalization Algorithm, published by the W3C RCH Working Group. | ||
* Implementation of the RDF Canonicalization Algorithm, published by the W3C RCH Working Group. | ||
* See [specification](https://www.w3.org/TR/rdf-canon/) for the latest official version. | ||
* | ||
* | ||
* @copyright Ivan Herman 2023 | ||
* | ||
* | ||
* @packageDocumentation | ||
*/ | ||
import * as rdf from '@rdfjs/types'; | ||
|
||
export type Quads = rdf.DatasetCore; | ||
export type InputQuads = Iterable<rdf.Quad>; | ||
export type InputDataset = InputQuads | string; | ||
export type BNodeId = string; | ||
export type Hash = string; | ||
|
||
declare interface ConfigData { | ||
/** Number must be positive */ | ||
c14n_complexity?: number, | ||
|
||
/** The value must be one of the algorithms listed in the built-in list of available hash functions */ | ||
c14n_hash?: string, | ||
} | ||
|
||
export type GetConfigData = () => ConfigData; | ||
|
||
declare interface C14nResult { | ||
/** N-Quads serialization of the dataset */ | ||
canonical_form : string; | ||
|
||
/** Dataset as Set or Array of rdf Quads */ | ||
canonicalized_dataset : Quads; | ||
|
||
/** Mapping of a blank node to its identifier */ | ||
bnode_identifier_map : ReadonlyMap<rdf.BlankNode,BNodeId>; | ||
|
||
/** Mapping of an (original) blank node id to its canonical equivalent */ | ||
issued_identifier_map : ReadonlyMap<BNodeId,BNodeId>; | ||
} | ||
|
||
declare enum LogLevels { | ||
error, | ||
warn, | ||
info, | ||
debug | ||
} | ||
|
||
/********************************************************* | ||
The main class encapsulating the library's functionalities | ||
**********************************************************/ | ||
|
||
declare class RDFC10 { | ||
import { Hash, InputDataset, C14nResult } from './lib/common'; | ||
import { GetConfigData } from './lib/config'; | ||
import { LogLevels, Logger } from './lib/logging'; | ||
export { Quads, InputDataset, C14nResult, InputQuads } from './lib/common'; | ||
export { Hash, BNodeId } from './lib/common'; | ||
export { LogLevels, Logger } from './lib/logging'; | ||
export { ConfigData, GetConfigData } from './lib/config'; | ||
/** | ||
* Just a shell around the algorithm, consisting of a state, and the calls to the real implementation. | ||
* | ||
* The variable parts of the state, as [defined in the spec](https://www.w3.org/TR/rdf-canon/#dfn-canonicalization-state), | ||
* are re-initialized at the canonicalize call. Ie, the same class instance can therefore be reused to | ||
* {@link RDFC10#canonicalize} for different graphs. | ||
*/ | ||
export declare class RDFC10 { | ||
private state; | ||
/** | ||
* @constructor | ||
* @param data_factory An implementation of the generic RDF DataFactory interface, see [the specification](http://rdf.js.org/data-model-spec/#datafactory-interface). If undefined, the DataFactory of the [n3 package](https://www.npmjs.com/package/n3) is used. | ||
* @param getConfigData A function returning the configuration data, see {@link ConfigData}. By default, this returns the constant values set in the code; the caller may provide a more complex function to handle environment variables and/or configuration files. | ||
*/ | ||
constructor(data_factory?: rdf.DataFactory, getConfigData?: GetConfigData); | ||
|
||
/** | ||
* Set a logger instance. By default it is an "empty" logger, ie, no logging happens | ||
* @param logger | ||
* Create and set a logger instance. By default it is an "empty" logger, ie, no logging happens. | ||
* | ||
* @param logger | ||
*/ | ||
setLogger(id: string, level: LogLevels): Logger | undefined ; | ||
|
||
setLogger(id?: string, level?: LogLevels): Logger | undefined; | ||
/** | ||
* Current logger type. | ||
*/ | ||
get logger_type(): string; | ||
|
||
/** | ||
* List of available logger types. | ||
*/ | ||
get available_logger_types(): string[]; | ||
|
||
|
||
/** | ||
* Set the Hash algorithm (default is "sha256"). | ||
* If the algorithm isn't available the value is ignored (and an exception is thrown). | ||
* | ||
* | ||
* The name is considered to be case insensitive. Also, both the format including a '-' dash character or not | ||
* are accepted (i.e., "sha256" and "sha-256" are both fine). | ||
* | ||
* @param algorithm_in: the (case insensitive) name of the algorithm, | ||
*/ | ||
set hash_algorithm(algorithm: string); | ||
* | ||
* @param algorithm_in: the (case insensitive) name of the algorithm. | ||
*/ | ||
set hash_algorithm(algorithm_in: string); | ||
get hash_algorithm(): string; | ||
get available_hash_algorithms(): string[] | ||
|
||
/** | ||
* List of available hash algorithm names. | ||
*/ | ||
get available_hash_algorithms(): string[]; | ||
/** | ||
* Set the maximal complexity number. This number, multiplied with the number of blank nodes in the dataset, | ||
* sets a maximum number of calls the algorithm can do for the so called "hash n degree quads" function. | ||
* Setting this number to a reasonably low number (say, 30), ensures that some "poison graphs" would not result in | ||
* an unreasonably long canonicalization process. | ||
* See the [security consideration section](https://www.w3.org/TR/rdf-canon/#security-considerations) in the specification. | ||
* | ||
* | ||
* The default value set by this implementation is 50; any number _greater_ then this number is ignored (and an exception is thrown). | ||
*/ | ||
set maximum_complexity_number(level: number); | ||
get maximum_complexity_number(): number; | ||
get maximum_allowed_complexity_number(): number | ||
|
||
/** | ||
* The system-wide maximum value for the complexity level. The current maximum complexity level cannot exceed this value. | ||
*/ | ||
get maximum_allowed_complexity_number(): number; | ||
/** | ||
* Canonicalize a Dataset into an N-Quads document. | ||
* | ||
* | ||
* Implementation of the main algorithm, see the | ||
* [separate overview in the spec](https://www.w3.org/TR/rdf-canon/#canon-algo-overview). | ||
* | ||
* [separate overview in the spec](https://www.w3.org/TR/rdf-canon/#canon-algo-overview). | ||
* | ||
* (The real work is done in the [separate function](../functions/lib_canonicalization.computeCanonicalDataset.html)). | ||
* | ||
* | ||
* @remarks | ||
* Note that the N-Quads parser throws an exception in case of syntax error. | ||
* @throws - RangeError, if the complexity of the graph goes beyond the set complexity number. See {@link maximum_complexity_number} | ||
* | ||
* | ||
* @param input_dataset | ||
* | ||
* @throws - RangeError, if the complexity of the graph goes beyond the set complexity number. See {@link maximum_complexity_number}. | ||
* | ||
* @param input_dataset | ||
* @param deduplicate - whether duplicate quads should be removed from the input (optional, defaults to `false`) | ||
* @returns - N-Quads document using the canonical ID-s. | ||
* | ||
* @async | ||
* | ||
*/ | ||
canonicalize(input_dataset: InputDataset, deduplicate?: boolean): Promise<string>; | ||
|
||
/** | ||
* Canonicalize a Dataset producing the full set of information. | ||
* | ||
* | ||
* Implementation of the main algorithmic steps, see | ||
* [separate overview in the spec](https://www.w3.org/TR/rdf-canon/#canon-algo-overview). | ||
* | ||
* [separate overview in the spec](https://www.w3.org/TR/rdf-canon/#canon-algo-overview). | ||
* | ||
* (The real work is done in the [separate function](../functions/lib_canonicalization.computeCanonicalDataset.html)). | ||
* | ||
* The result is an Object containing the serialized version and the Quads version of the canonicalization result, | ||
* | ||
* The result is an Object containing the serialized version and the Quads version of the canonicalization result, | ||
* as well as a bnode mapping from the original to the canonical equivalents. | ||
* | ||
* | ||
* @remarks | ||
* Note that the N-Quads parser throws an exception in case of syntax error. | ||
* @throws - RangeError, if the complexity of the graph goes beyond the set complexity number. See {@link maximum_complexity_number} | ||
* | ||
* @param input_dataset | ||
* | ||
* @throws - RangeError, if the complexity of the graph goes beyond the set complexity number. See {@link maximum_complexity_number}. | ||
* | ||
* @param input_dataset | ||
* @param deduplicate - whether duplicate quads should be removed from the input (optional, defaults to `false`) | ||
* @returns - Detailed results of the canonicalization | ||
* | ||
* @async | ||
*/ | ||
c14n(input_dataset: InputDataset, deduplicate?: boolean): Promise<C14nResult> ; | ||
|
||
c14n(input_dataset: InputDataset, deduplicate?: boolean): Promise<C14nResult>; | ||
/** | ||
* Serialize the dataset into a (possibly sorted) Array of nquads. | ||
* | ||
* @param input_dataset | ||
* Serialize a dataset into a (possibly sorted) Array of nquads. | ||
* | ||
* @param input_dataset | ||
* @param sort If `true` (the default) the array is lexicographically sorted | ||
* @returns | ||
* @returns | ||
*/ | ||
toNquads(input_dataset: InputQuads, sort?: boolean): string[]; | ||
|
||
toNquads(input_dataset: Iterable<rdf.Quad>, sort?: boolean): string[]; | ||
/** | ||
* Hash a dataset: | ||
* | ||
* | ||
* 1. serialize the dataset into nquads and sort the result (unless the input is an N-Quads document); | ||
* 2. compute the hash of the concatenated nquads. | ||
* | ||
* | ||
* This method is typically used on the result of the canonicalization to compute the canonical hash of a dataset. | ||
* | ||
* @param input_dataset | ||
* | ||
* @param input_dataset | ||
* @returns | ||
* @async | ||
*/ | ||
hash(input_dataset: InputDataset): Promise<Hash>; | ||
} | ||
|
||
/***************************************************************************** | ||
Type and class declarations for logging; can be ignored if no logging is used | ||
******************************************************************************/ | ||
|
||
declare interface LogItem { | ||
[index: string]: string|string[]|Map<string,string>|boolean|LogItem|LogItem[]; | ||
} | ||
|
||
/** | ||
* Very simple Logger interface. | ||
* | ||
* Implementations should follow the usual interpretation of log severity levels. E.g., if | ||
* the Logger is set up with severity level of, say, `LogLevels.info`, then the messages to `debug` should be ignored. If the | ||
* level is set to `LogLevels.warn`, then only warning and debugging messages should be recorded/displayed, etc. | ||
* | ||
* For each call the arguments are: | ||
* - log_point: the identification of the log point, related to the spec (in practice, this should be identical to the `id` value of the respective HTML element) | ||
* - position: short description of the position of the log. The string may be empty (i.e., ""), in which case it will be ignored. | ||
* - otherData: the 'real' log information | ||
* | ||
*/ | ||
declare interface Logger { | ||
level: LogLevels; | ||
|
||
debug(log_point: string, position: string, ...otherData: LogItem[]): void; | ||
warn(log_point: string, position: string, ...otherData: LogItem[]): void; | ||
error(log_point: string, position: string, ...otherData: LogItem[]): void; | ||
info(log_point: string, position: string, ...otherData: LogItem[]): void; | ||
/** | ||
* Entry point for a increase in stack level. This is issued at each function entry except the top level, and at some, more complex, cycles. | ||
* Needed if the logger instance intends to create recursive logs or if the structure is complex. | ||
* @param label - identification of the position in the code | ||
* @param extra_info - possible extra information on the level increase | ||
* @param | ||
*/ | ||
push(label: string, extra_info ?: string, ...otherData: LogItem[]): void; | ||
|
||
/** | ||
* Counterpart of the {@link push} method. | ||
*/ | ||
pop(): void; | ||
|
||
/** | ||
* Accessor to the (readonly) log; | ||
*/ | ||
get log(): string; | ||
} | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
/** | ||
* Top level entry point for the canonicalization algorithm. | ||
* | ||
* @copyright Ivan Herman 2023 | ||
* | ||
* @packageDocumentation | ||
*/ | ||
import { GlobalState, InputDataset, C14nResult } from './common'; | ||
/** | ||
* Implementation of the main [steps on the top level](https://www.w3.org/TR/rdf-canon/#canon-algo-algo) of the algorithm specification. | ||
* | ||
* @param state - the overall canonicalization state + interface to the underlying RDF environment | ||
* @param input | ||
* @param deduplicate - whether duplicate quads should be removed from the input | ||
* @returns - A semantically identical set of Quads, with canonical BNode labels, plus other information. | ||
* | ||
* @async | ||
*/ | ||
export declare function computeCanonicalDataset(state: GlobalState, input: InputDataset, deduplicate?: boolean): Promise<C14nResult>; |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.