Skip to content

Commit

Permalink
fix: out of bounds lookbehind
Browse files Browse the repository at this point in the history
  • Loading branch information
gurgunday committed Aug 16, 2024
1 parent 8505186 commit 3180eea
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 75 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ The constructor may throw:
- `lookbehindBuffer` (Uint8Array | null): Buffer containing data from previous chunks that might be part of a match.
- `currentBuffer` (Uint8Array | null): The current buffer being processed.

Note:
**Note:**

- The callback will contain EITHER the `lookbehindBuffer` OR the `currentBuffer`, not both at the same time.
The callback will contain **either** the `lookbehindBuffer` or the `currentBuffer`, not both at the same time.

## Usage

Expand Down
2 changes: 1 addition & 1 deletion bench/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import StreamSearch from "streamsearch";
import { Bench } from "tinybench";
import { Buffer } from "node:buffer";

const bench = new Bench({ time: 1000 });
const bench = new Bench({ time: 5000 });
const pattern = "exampleexampleexampleexampleexampleexample";
const longText =
`This is a long text with multiple occurrences of the word example. ` +
Expand Down
110 changes: 51 additions & 59 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ const bufferFrom = (string) => {
return buffer;
};

const bufferCompare = (buffer1, index1, buffer2, index2, length) => {
const bufferCompare = (buffer1, offset1, buffer2, offset2, length) => {
for (let i = 0; i !== length; ++i) {
if (buffer1[index1 + i] !== buffer2[index2 + i]) {
if (buffer1[offset1 + i] !== buffer2[offset2 + i]) {
return false;
}
}
Expand All @@ -20,7 +20,6 @@ const bufferCompare = (buffer1, index1, buffer2, index2, length) => {

const Match = class {
#matches = 0;
#bufferIndex = 0;
#lookbehindSize = 0;
#lookbehind;
#skip;
Expand Down Expand Up @@ -57,7 +56,6 @@ const Match = class {

reset() {
this.#lookbehindSize = 0;
this.#bufferIndex = 0;
this.#matches = 0;
}

Expand All @@ -72,124 +70,117 @@ const Match = class {
write(chunk) {
const buffer =
chunk instanceof Uint8Array ? chunk : this.#from(String(chunk));
this.#bufferIndex = 0;
let offset = 0;

while (this.#bufferIndex !== buffer.length) {
this.#bufferIndex = this.#search(buffer);
while (offset !== buffer.length) {
offset = this.#search(buffer, offset);
}

return this.#bufferIndex;
}

#search(buffer) {
#search(buffer, offset) {
const patternLastCharIndex = this.#pattern.length - 1;
const patternLastChar = this.#pattern[patternLastCharIndex];
const end = buffer.length - this.#pattern.length;
let index = -this.#lookbehindSize;
let position = -this.#lookbehindSize;

if (index < 0) {
while (index < 0 && index <= end) {
const char = buffer[index + patternLastCharIndex];
if (position < 0) {
while (position < 0 && position <= end) {
const char = buffer[position + patternLastCharIndex];

if (
char === patternLastChar &&
this.#matchPattern(buffer, index, patternLastCharIndex)
this.#matchPattern(buffer, position, patternLastCharIndex)
) {
if (-index < this.#lookbehindSize) {
++this.#matches;

if (-position === this.#lookbehindSize) {
this.#callback(true, 0, 0, null, null);
} else {
this.#callback(
false,
true,
0,
index + this.#lookbehindSize,
position + this.#lookbehindSize,
this.#lookbehind,
null,
);
}

++this.#matches;
this.#callback(true, 0, 0, null, null);
this.#lookbehindSize = 0;

this.#bufferIndex = index + this.#pattern.length;

return this.#bufferIndex;
return position + this.#pattern.length;
}

index += this.#skip[char];
position += this.#skip[char];
}

if (index < 0) {
const bytesToCutOff = this.#lookbehindSize + index;
if (position < 0) {
const bytesToCutOff = position + this.#lookbehindSize;

if (bytesToCutOff) {
this.#callback(false, 0, bytesToCutOff, this.#lookbehind, null);
this.#lookbehindSize -= bytesToCutOff;
this.#lookbehind.set(
this.#lookbehind.subarray(bytesToCutOff, this.#lookbehindSize),
);
}

this.#lookbehind.set(this.#lookbehind.subarray(bytesToCutOff));
this.#lookbehind.set(buffer, this.#lookbehindSize - bytesToCutOff);
this.#lookbehind.set(buffer, this.#lookbehindSize);
this.#lookbehindSize += buffer.length;

this.#bufferIndex = buffer.length;

return this.#bufferIndex;
return buffer.length;
}

this.#callback(false, 0, this.#lookbehindSize, this.#lookbehind, null);
this.#lookbehindSize = 0;
}

index += this.#bufferIndex;
position += offset;

while (index <= end) {
const char = buffer[index + patternLastCharIndex];
while (position <= end) {
const char = buffer[position + patternLastCharIndex];

if (
char === patternLastChar &&
bufferCompare(this.#pattern, 0, buffer, index, patternLastCharIndex)
bufferCompare(this.#pattern, 0, buffer, position, patternLastCharIndex)
) {
++this.#matches;

if (index) {
this.#callback(true, this.#bufferIndex, index, null, buffer);
} else {
if (!position) {
this.#callback(true, 0, 0, null, null);
} else {
this.#callback(true, offset, position, null, buffer);
}

this.#bufferIndex = index + this.#pattern.length;

return this.#bufferIndex;
return position + this.#pattern.length;
}

index += this.#skip[char];
position += this.#skip[char];
}

if (index < buffer.length) {
this.#lookbehind.set(buffer.subarray(index));
this.#lookbehindSize = buffer.length - index;

if (index !== this.#bufferIndex) {
this.#callback(false, this.#bufferIndex, index, null, buffer);
}
} else {
this.#callback(false, this.#bufferIndex, buffer.length, null, buffer);
if (position !== offset) {
this.#callback(false, offset, position, null, buffer);
}

this.#bufferIndex = buffer.length;
if (position !== buffer.length) {
this.#lookbehind.set(buffer.subarray(position));
this.#lookbehindSize = buffer.length - position;
}

return this.#bufferIndex;
return buffer.length;
}

#matchPattern(buffer, index, length) {
#matchPattern(buffer, position, length) {
for (let i = 0; i !== length; ++i) {
const char =
index < 0
? this.#lookbehind[this.#lookbehindSize + index]
: buffer[index];
position < 0
? this.#lookbehind[position + this.#lookbehindSize]
: buffer[position];

if (char !== this.#pattern[i]) {
return false;
}

++index;
++position;
}

return true;
Expand All @@ -201,8 +192,9 @@ const Match = class {

static #table(buffer) {
const table = new Uint8Array(256).fill(buffer.length);
const length = buffer.length - 1;

for (let i = 0, length = buffer.length - 1; i !== length; ++i) {
for (let i = 0; i !== length; ++i) {
table[buffer[i]] = length - i;
}

Expand Down
77 changes: 64 additions & 13 deletions test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,30 @@ test("lookbehind match", (t, done) => {
test("lookbehind append", (t, done) => {
let call = 0;

const match = new Match("thisisalongpattern", () => {
const match = new Match("thisisalongpattern", (a) => {
++call;
if (call === 1) {
assert.strictEqual(a, false);
}

if (call === 2) {
assert.strictEqual(a, false);
}

if (call === 3) {
assert.strictEqual(a, true);
}

if (call === 3) {
assert.strictEqual(a, true);
done();
}
});

match.write("thisisalong");
match.write("longpatlig");
match.write("patterthis");
match.write("isalongpatternt");
match.write("hisisalongpattern");
});

test("lookbehind matchPattern pass", (t, done) => {
Expand Down Expand Up @@ -223,7 +239,20 @@ test("lookbehind bufferCompare with null", (t, done) => {
match.write("great");
});

test("lookbehind test", (t, done) => {
test("lookbehind append", (t, done) => {
let call = 0;

const match = new Match("thisisalongpattern", () => {
++call;
if (call === 1) {
done();
}
});
match.write("thisisalong");
match.write("patterl");
});

test("lookbehind test /2", (t, done) => {
let count = 0;

const match = new Match(
Expand Down Expand Up @@ -260,19 +289,11 @@ test("pattern test", (t, done) => {
++count;

if (count === 1) {
assert.strictEqual(isMatch, false);
assert.strictEqual(isMatch, true);
assert.strictEqual(start, 0);
assert.strictEqual(end, 1);
assert.strictEqual(String.fromCharCode(l[0]), "s");
assert.strictEqual(b, null);
}

if (count === 2) {
assert.strictEqual(isMatch, true);
assert.strictEqual(start, 0);
assert.strictEqual(end, 0);
assert.strictEqual(l, null);
assert.strictEqual(b, null);
done();
}
});
Expand All @@ -285,7 +306,7 @@ test("pattern test /2", (t, done) => {
let buffer = Buffer.from([]);

const m = new Match("Hello, World!", (isMatch, start, end, l, b) => {
if (!isMatch) {
if (l ?? b) {
buffer = Buffer.concat([buffer, (l ?? b).subarray(start, end)]);
}
});
Expand All @@ -301,3 +322,33 @@ test("pattern test /2", (t, done) => {
assert.deepEqual(buffer.toString("utf8"), `"".........Hello, Gurgun`);
done();
});

test("pattern test /3", (t, done) => {
let buffer = Buffer.from([]);

const m = new Match("Hello, World!", (isMatch, start, end, l, b) => {
if (l ?? b) {
buffer = Buffer.concat([buffer, (l ?? b).subarray(start, end)]);
}
});

m.write("Hello");
m.write(", World.Hello, World.asd");

assert.deepEqual(buffer.toString("utf8"), `Hello, World.Hello, World.`);
done();
});

test("pattern test /4", (t, done) => {
let c = 0;

const m = new Match("Hello, World!", () => {
c++;
});

m.write("Hello");
m.write(", World!");

assert.strictEqual(c, 1);
done();
});

0 comments on commit 3180eea

Please sign in to comment.