diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 7d0bc24..68b4daf 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -14,10 +14,10 @@ jobs: uses: cross-org/workflows/.github/workflows/bun-ci.yml@main with: jsr_dependencies: "@cross/test @cross/fs @cross/runtime @std/assert@0.224.0 @std/path@0.224.0" - npm_dependencies: "cbor-x ohash" + npm_dependencies: "cbor-x" node_ci: uses: cross-org/workflows/.github/workflows/node-ci.yml@main with: jsr_dependencies: "@cross/test @cross/fs @cross/runtime @std/assert@0.224.0 @std/path@0.224.0" - npm_dependencies: "cbor-x ohash" + npm_dependencies: "cbor-x" test_target: "test/*.test.ts" \ No newline at end of file diff --git a/.gitignore b/.gitignore index a6349eb..bc3813d 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ node_modules/ .npmrc package-lock.json package.json +scripts # Deno / JSR deno.lock diff --git a/deno.json b/deno.json index e4f487e..824dd42 100644 --- a/deno.json +++ b/deno.json @@ -19,8 +19,7 @@ "@cross/utils": "jsr:@cross/utils@^0.16.0", "@std/assert": "jsr:@std/assert@^1.0.4", "@std/path": "jsr:@std/path@^1.0.4", - "cbor-x": "npm:cbor-x@^1.5.9", - "ohash": "npm:ohash@^1.1.3" + "cbor-x": "npm:cbor-x@^1.5.9" }, "publish": { "exclude": [".github", "test/*"] diff --git a/src/lib/ledger.ts b/src/lib/ledger.ts index d350c42..b19e532 100644 --- a/src/lib/ledger.ts +++ b/src/lib/ledger.ts @@ -21,7 +21,7 @@ import { SUPPORTED_LEDGER_VERSIONS, UNLOCKED_BYTES, } from "./constants.ts"; -import { KVOperation, KVTransaction } from "./transaction.ts"; +import { KVHashAlgorithm, KVOperation, KVTransaction } from "./transaction.ts"; import { rename, unlink } from "@cross/fs"; import type { FileHandle } from "node:fs/promises"; import type { KVQuery } from "./key.ts"; @@ -150,6 +150,8 @@ export class KVLedger { if (result) { newTransactions.push(result); currentOffset += result.length + result.errorCorrectionOffset; // Advance the offset + } else if (!ignoreReadErrors) { + throw new Error("Unexpected end of file"); } else { break; } @@ -253,6 +255,8 @@ export class KVLedger { } } currentOffset += result.length + result.errorCorrectionOffset; // Advance the offset + } else if (!ignoreReadErrors) { + throw new Error("Unexpected end of file"); } else { break; } @@ -411,7 +415,13 @@ export class KVLedger { dataLength, baseOffset + errorCorrectionOffset + headerOffset + headerLength, ); - transaction.dataFromUint8Array(transactionData); + /* Ignore hash check for ledger version B016 */ + transaction.dataFromUint8Array( + transactionData, + this.header.ledgerVersion === "B016" + ? KVHashAlgorithm.FAULTY_MURMURHASH3 + : KVHashAlgorithm.MURMURHASH3, + ); } // Get transaction result const result = { @@ -466,8 +476,8 @@ export class KVLedger { // Update the header after each read, to make sure we catch any new transactions this.readHeader(); - } else { - break; + } else if (!ignoreReadErrors) { + throw new Error("Unexpected end of file"); } } @@ -497,6 +507,8 @@ export class KVLedger { addedKeys.add(result.transaction.key!.stringify()); validTransactions.push(result); } + } else if (!ignoreReadErrors) { + throw new Error("Unexpected end of file"); } } @@ -525,6 +537,8 @@ export class KVLedger { await tempLedger.add([{ transactionData: transaction.transaction.toUint8Array(), }]); + } else if (!ignoreReadErrors) { + throw new Error("Unexpected end of file"); } } this.header.currentOffset = tempLedger.header.currentOffset; diff --git a/src/lib/transaction.ts b/src/lib/transaction.ts index 94af8c2..34bc9e6 100644 --- a/src/lib/transaction.ts +++ b/src/lib/transaction.ts @@ -2,7 +2,7 @@ import { type KVKey, KVKeyInstance } from "./key.ts"; import { decode, encode } from "cbor-x"; import { ENCODED_TRANSACTION_SIGNATURE } from "./constants.ts"; -import { murmurHash } from "ohash"; +import { faultyMurmurHash, murmurHash } from "./utils/murmur.ts"; /** * Data structure of a Cross/kv transaction: @@ -40,6 +40,21 @@ export enum KVOperation { DELETE = 2, } +/** + * Enumerates the possible algorithms supported by the ledger + */ +export enum KVHashAlgorithm { + /** + * Correct custom MurmurHash3 implementation + */ + MURMURHASH3 = 1, + + /** + * Faulty MurmurHash3 implementation (ohash 1.1.3) used by ledger version B016 + */ + FAULTY_MURMURHASH3 = 2, +} + /** * Represents content of a transaction */ @@ -112,6 +127,7 @@ export class KVTransaction { public timestamp?: number; public data?: Uint8Array; public hash?: number; + public hashIsFresh?: boolean; constructor() { } @@ -132,6 +148,7 @@ export class KVTransaction { const valueData = new Uint8Array(encode(value)); this.data = valueData; this.hash = murmurHash(valueData); + this.hashIsFresh = true; } } @@ -192,9 +209,26 @@ export class KVTransaction { } } - public dataFromUint8Array(data: Uint8Array) { - if (murmurHash(data) !== this.hash!) { - throw new Error("Invalid data: Read data not matching hash"); + public dataFromUint8Array( + data: Uint8Array, + expectedAlgorithm: KVHashAlgorithm = KVHashAlgorithm.MURMURHASH3, + ) { + switch (expectedAlgorithm) { + case KVHashAlgorithm.MURMURHASH3: { + if (murmurHash(data) !== this.hash!) { + throw new Error("Invalid data: Read data not matching hash"); + } + break; + } + case KVHashAlgorithm.FAULTY_MURMURHASH3: { + if (faultyMurmurHash(data) !== this.hash!) { + throw new Error("Invalid data: Read data not matching hash"); + } + break; + } + default: { + throw new Error("Incorrect hash algorithm requested"); + } } this.data = data; } @@ -204,7 +238,17 @@ export class KVTransaction { */ public toUint8Array(): Uint8Array { const keyBytes = this.key!.toUint8Array(); - const hashBytes = this.hash; + + // Make sure hash is fresh + let hashBytes: number | undefined; + if (this.hashIsFresh) { + hashBytes = this.hash; + } else { + if (this.data) { + hashBytes = murmurHash(this.data); + } + } + const pendingTransactionData = this.data; // Calculate total sizes diff --git a/src/lib/utils/murmur.ts b/src/lib/utils/murmur.ts new file mode 100644 index 0000000..4dd0a25 --- /dev/null +++ b/src/lib/utils/murmur.ts @@ -0,0 +1,168 @@ +/** + * JS Implementation of MurmurHash3 (r136) (as of May 20, 2011) + * + * Based on main branch of https://github.com/unjs/ohash 2024-09-19, slightly modified + * + * @param {Uint8Array | string} key + * @param {number} seed Positive integer only + * @return {number} 32-bit positive integer hash + */ +export function murmurHash(key: Uint8Array | string, seed = 0) { + if (typeof key === "string") { + key = createBuffer(key); + } + + let i = 0; + let h1 = seed; + let k1; + let h1b; + + const remainder = key.length & 3; // key.length % 4 + const bytes = key.length - remainder; + const c1 = 0xcc_9e_2d_51; + const c2 = 0x1b_87_35_93; + + while (i < bytes) { + k1 = (key[i] & 0xff) | + ((key[++i] & 0xff) << 8) | + ((key[++i] & 0xff) << 16) | + ((key[++i] & 0xff) << 24); + ++i; + + k1 = ((k1 & 0xff_ff) * c1 + ((((k1 >>> 16) * c1) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + k1 = (k1 << 15) | (k1 >>> 17); + k1 = ((k1 & 0xff_ff) * c2 + ((((k1 >>> 16) * c2) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + + h1 ^= k1; + h1 = (h1 << 13) | (h1 >>> 19); + h1b = ((h1 & 0xff_ff) * 5 + ((((h1 >>> 16) * 5) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + h1 = (h1b & 0xff_ff) + 0x6b_64 + + ((((h1b >>> 16) + 0xe6_54) & 0xff_ff) << 16); + } + + k1 = 0; + + switch (remainder) { + case 3: { + k1 ^= (key[i + 2] & 0xff) << 16; + /* falls through */ + } + // deno-lint-ignore no-fallthrough + case 2: { + k1 ^= (key[i + 1] & 0xff) << 8; + /* falls through */ + } + case 1: { + k1 ^= key[i] & 0xff; + k1 = ((k1 & 0xff_ff) * c1 + ((((k1 >>> 16) * c1) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + k1 = (k1 << 15) | (k1 >>> 17); + k1 = ((k1 & 0xff_ff) * c2 + ((((k1 >>> 16) * c2) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + h1 ^= k1; + } + } + + h1 ^= key.length; + + h1 ^= h1 >>> 16; + h1 = ((h1 & 0xff_ff) * 0x85_eb_ca_6b + + ((((h1 >>> 16) * 0x85_eb_ca_6b) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + h1 ^= h1 >>> 13; + h1 = ((h1 & 0xff_ff) * 0xc2_b2_ae_35 + + ((((h1 >>> 16) * 0xc2_b2_ae_35) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + h1 ^= h1 >>> 16; + + return h1 >>> 0; +} + +/** + * JS Implementation of MurmurHash3 (r136) (as of May 20, 2011) + * + * Based on version 1.1.3 of https://github.com/unjs/ohash + * + * @param {Uint8Array | string} key + * @param {number} seed Positive integer only + * @return {number} 32-bit positive integer hash + */ +export function faultyMurmurHash(key: Uint8Array | string, seed = 0) { + if (typeof key === "string") { + key = createBuffer(key); + } + + let i = 0; + let h1 = seed; + let k1; + let h1b; + + const remainder = key.length & 3; // key.length % 4 + const bytes = key.length - remainder; + const c1 = 0xcc_9e_2d_51; + const c2 = 0x1b_87_35_93; + + while (i < bytes) { + k1 = (key[i] & 0xff) | + ((key[++i] & 0xff) << 8) | + ((key[++i] & 0xff) << 16) | + ((key[++i] & 0xff) << 24); + ++i; + + k1 = ((k1 & 0xff_ff) * c1 + ((((k1 >>> 16) * c1) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + k1 = (k1 << 15) | (k1 >>> 17); + k1 = ((k1 & 0xff_ff) * c2 + ((((k1 >>> 16) * c2) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + + h1 ^= k1; + h1 = (h1 << 13) | (h1 >>> 19); + h1b = ((h1 & 0xff_ff) * 5 + ((((h1 >>> 16) * 5) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + h1 = (h1b & 0xff_ff) + 0x6b_64 + + ((((h1b >>> 16) + 0xe6_54) & 0xff_ff) << 16); + } + + k1 = 0; + + switch (remainder) { + case 3: { + k1 ^= (key[i + 2] & 0xff) << 16; + break; + } + case 2: { + k1 ^= (key[i + 1] & 0xff) << 8; + break; + } + case 1: { + k1 ^= key[i] & 0xff; + k1 = ((k1 & 0xff_ff) * c1 + ((((k1 >>> 16) * c1) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + k1 = (k1 << 15) | (k1 >>> 17); + k1 = ((k1 & 0xff_ff) * c2 + ((((k1 >>> 16) * c2) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + h1 ^= k1; + } + } + + h1 ^= key.length; + + h1 ^= h1 >>> 16; + h1 = ((h1 & 0xff_ff) * 0x85_eb_ca_6b + + ((((h1 >>> 16) * 0x85_eb_ca_6b) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + h1 ^= h1 >>> 13; + h1 = ((h1 & 0xff_ff) * 0xc2_b2_ae_35 + + ((((h1 >>> 16) * 0xc2_b2_ae_35) & 0xff_ff) << 16)) & + 0xff_ff_ff_ff; + h1 ^= h1 >>> 16; + + return h1 >>> 0; +} + +function createBuffer(val: string) { + return new TextEncoder().encode(val); +}