-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.ts
228 lines (210 loc) · 9.83 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
/**
* Implementation of the RDF Canonicalization Algorithm, published by the W3C RCH Working Group.
* See [specification](https://www.w3.org/TR/rdf-canon/) for the latest official version.
*
* @copyright Ivan Herman 2023
*
* @packageDocumentation
*/
import * as rdf from '@rdfjs/types';
import * as n3 from 'n3';
import { GlobalState, hashDataset, Hash, quadsToNquads, InputDataset, computeHash, C14nResult } from './lib/common';
import { AVAILABLE_HASH_ALGORITHMS, DEFAULT_MAXIMUM_COMPLEXITY, ConfigData, GetConfigData, defaultConfigData } from './lib/config';
import { IDIssuer } from './lib/issueIdentifier';
import { computeCanonicalDataset } from './lib/canonicalization';
import { LoggerFactory, LogLevels, Logger } from './lib/logging';
export { Quads, InputDataset, C14nResult, InputQuads } from './lib/common';
export { Hash, BNodeId } from './lib/common';
export { LogLevels, Logger } from './lib/logging';
export { ConfigData, GetConfigData } from './lib/config';
/**
* Just a shell around the algorithm, consisting of a state, and the calls to the real implementation.
*
* The variable parts of the state, as [defined in the spec](https://www.w3.org/TR/rdf-canon/#dfn-canonicalization-state),
* are re-initialized at the canonicalize call. Ie, the same class instance can therefore be reused to
* {@link RDFC10#canonicalize} for different graphs.
*/
export class RDFC10 {
private state: GlobalState;
/**
* @constructor
* @param data_factory An implementation of the generic RDF DataFactory interface, see [the specification](http://rdf.js.org/data-model-spec/#datafactory-interface). If undefined, the DataFactory of the [n3 package](https://www.npmjs.com/package/n3) is used.
* @param getConfigData A function returning the configuration data, see {@link ConfigData}. By default, this returns the constant values set in the code; the caller may provide a more complex function to handle environment variables and/or configuration files.
*/
constructor(data_factory?: rdf.DataFactory, getConfigData?: GetConfigData) {
const localGetConfigData: GetConfigData =
(getConfigData !== undefined && getConfigData !== null) ? getConfigData : defaultConfigData;
const { c14n_complexity, c14n_hash } = localGetConfigData();
this.state = {
bnode_to_quads: {},
hash_to_bnodes: {},
canonical_issuer: new IDIssuer(),
hash_algorithm: c14n_hash,
dataFactory: (data_factory !== null && data_factory !== undefined) ? data_factory : n3.DataFactory,
logger: LoggerFactory.createLogger(LoggerFactory.DEFAULT_LOGGER),
logger_id: LoggerFactory.DEFAULT_LOGGER,
complexity_number: c14n_complexity,
maximum_n_degree_call: 0,
current_n_degree_call: 0
};
}
/**
* Create and set a logger instance. By default it is an "empty" logger, ie, no logging happens.
*
* @param logger
*/
setLogger(id: string = LoggerFactory.DEFAULT_LOGGER, level: LogLevels = LogLevels.debug): Logger | undefined {
const new_logger = LoggerFactory.createLogger(id, level);
if (new_logger !== undefined) {
this.state.logger_id = id;
this.state.logger = new_logger;
return new_logger;
} else {
return undefined;
}
}
/**
* Current logger type.
*/
get logger_type(): string {
return this.state.logger_id;
}
/**
* List of available logger types.
*/
get available_logger_types(): string[] {
return LoggerFactory.loggerTypes();
}
/**
* Set the Hash algorithm (default is "sha256").
* If the algorithm isn't available the value is ignored (and an exception is thrown).
*
* The name is considered to be case insensitive. Also, both the format including a '-' dash character or not
* are accepted (i.e., "sha256" and "sha-256" are both fine).
*
* @param algorithm_in: the (case insensitive) name of the algorithm.
*/
set hash_algorithm(algorithm_in: string) {
// To avoid stupid case dependent misspellings...
const algorithm = algorithm_in.toLowerCase();
if (Object.keys(AVAILABLE_HASH_ALGORITHMS).includes(algorithm)) {
this.state.hash_algorithm = algorithm;
} else {
const error_message = `"${algorithm_in}" is not a valid Hash Algorithm name`;
throw TypeError(error_message);
}
}
get hash_algorithm(): string {
return this.state.hash_algorithm;
}
/**
* List of available hash algorithm names.
*/
get available_hash_algorithms(): string[] {
return Object.keys(AVAILABLE_HASH_ALGORITHMS);
}
/**
* Set the maximal complexity number. This number, multiplied with the number of blank nodes in the dataset,
* sets a maximum number of calls the algorithm can do for the so called "hash n degree quads" function.
* Setting this number to a reasonably low number (say, 30), ensures that some "poison graphs" would not result in
* an unreasonably long canonicalization process.
* See the [security consideration section](https://www.w3.org/TR/rdf-canon/#security-considerations) in the specification.
*
* The default value set by this implementation is 50; any number _greater_ then this number is ignored (and an exception is thrown).
*/
set maximum_complexity_number(level: number) {
if (!Number.isNaN(level) && Number.isInteger(level) && level > 0 && level < DEFAULT_MAXIMUM_COMPLEXITY) {
this.state.complexity_number = level;
} else {
const error_message = `Required complexity must be between 0 and ${DEFAULT_MAXIMUM_COMPLEXITY}`;
throw RangeError(error_message);
}
}
get maximum_complexity_number(): number {
return this.state.complexity_number;
}
/**
* The system-wide maximum value for the complexity level. The current maximum complexity level cannot exceed this value.
*/
get maximum_allowed_complexity_number(): number {
return DEFAULT_MAXIMUM_COMPLEXITY;
}
/**
* Canonicalize a Dataset into an N-Quads document.
*
* Implementation of the main algorithm, see the
* [separate overview in the spec](https://www.w3.org/TR/rdf-canon/#canon-algo-overview).
*
* (The real work is done in the [separate function](../functions/lib_canonicalization.computeCanonicalDataset.html)).
*
* @remarks
* Note that the N-Quads parser throws an exception in case of syntax error.
*
* @throws - RangeError, if the complexity of the graph goes beyond the set complexity number. See {@link maximum_complexity_number}.
*
* @param input_dataset
* @param copy - whether the input should be copied to a local store (e.g., if the input is a generator, or the uniqueness of quads are not guaranteed). If this
* parameter is not used (i.e., value is `undefined`) the copy is always done _unless_ the input is an `rdf.DatasetCore` instance.
* @returns - N-Quads document using the canonical ID-s.
*
* @async
*
*/
async canonicalize(input_dataset: InputDataset, copy: boolean | undefined = undefined): Promise<string> {
return (await this.c14n(input_dataset, copy)).canonical_form;
}
/**
* Canonicalize a Dataset producing the full set of information.
*
* Implementation of the main algorithmic steps, see
* [separate overview in the spec](https://www.w3.org/TR/rdf-canon/#canon-algo-overview).
*
* (The real work is done in the [separate function](../functions/lib_canonicalization.computeCanonicalDataset.html)).
*
* The result is an Object containing the serialized version and the Quads version of the canonicalization result,
* as well as a bnode mapping from the original to the canonical equivalents.
*
* @remarks
* Note that the N-Quads parser throws an exception in case of syntax error.
*
* @throws - RangeError, if the complexity of the graph goes beyond the set complexity number. See {@link maximum_complexity_number}.
*
* @param input_dataset
* @param copy - whether the input should be copied to a local store (e.g., if the input is a generator, or the uniqueness of quads are not guaranteed). If this
* parameter is not used (i.e., value is `undefined`) the copy is always done _unless_ the input is an `rdf.DatasetCore` instance.
* @returns - Detailed results of the canonicalization
*
* @async
*/
async c14n(input_dataset: InputDataset, copy: boolean | undefined = undefined): Promise<C14nResult> {
return computeCanonicalDataset(this.state, input_dataset, copy);
}
/**
* Serialize a dataset into a (possibly sorted) Array of nquads.
*
* @param input_dataset
* @param sort If `true` (the default) the array is lexicographically sorted
* @returns
*/
toNquads(input_dataset: Iterable<rdf.Quad>, sort: boolean = true): string[] {
return quadsToNquads(input_dataset, sort);
}
/**
* Hash a dataset:
*
* 1. serialize the dataset into nquads and sort the result (unless the input is an N-Quads document);
* 2. compute the hash of the concatenated nquads.
*
* This method is typically used on the result of the canonicalization to compute the canonical hash of a dataset.
*
* @param input_dataset
* @returns
*/
async hash(input_dataset: InputDataset): Promise<Hash> {
if (typeof input_dataset === 'string') {
return computeHash(this.state, input_dataset);
} else {
return hashDataset(this.state, input_dataset, true);
}
}
}