-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
338 additions
and
353 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,4 @@ | ||
/** | ||
* Encoding data into different representations. | ||
* @module | ||
*/ | ||
|
||
export * from "./categorical.ts"; | ||
export { MultiHotEncoder } from "./multihot.ts"; | ||
export { OneHotEncoder } from "./onehot.ts"; | ||
export { TfEncoder } from "./termfrequency.ts"; | ||
export { transformSoftmaxMut } from "./softmax.ts"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import { Matrix } from "../mod.ts"; | ||
import type { DataType } from "../utils/common_types.ts"; | ||
|
||
/** | ||
* Convert 2D array of indices into multi-hot encoded vectors. | ||
*/ | ||
export class MultiHotEncoder { | ||
/** Size of encoded vectors. */ | ||
mappingSize: number; | ||
constructor(mappingSize: number) { | ||
this.mappingSize = mappingSize; | ||
} | ||
/** Encoding values into multi-hot vectors */ | ||
transform<DT extends DataType>(targets: Matrix<DT>): Matrix<DT>; | ||
transform<DT extends DataType>(targets: number[][], dType: DT): Matrix<DT>; | ||
transform<DT extends DataType>( | ||
targets: number[][] | Matrix<DT>, | ||
dType?: DT | ||
): Matrix<DT> { | ||
if (!dType && !(targets instanceof Matrix)) | ||
throw new Error("dType required when not dealing with matrices."); | ||
const dataType = dType || (targets as Matrix<DT>).dType; | ||
const res = new Matrix<DT>(dataType, [targets.length, this.mappingSize]); | ||
let i = 0; | ||
while (i < targets.length) { | ||
const row = targets instanceof Matrix ? targets.row(i) : targets[i]; | ||
let j = 0; | ||
while (j < row.length) { | ||
if (Number(row[j]) >= row.length) { | ||
j += 1; | ||
continue; | ||
} | ||
res.setCell(i, Number(row[j]), 1); | ||
j += 1; | ||
} | ||
|
||
i += 1; | ||
} | ||
return res; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import { Matrix, type MatrixLike } from "../mod.ts"; | ||
import type { DataType } from "../utils/common_types.ts"; | ||
|
||
/** | ||
* Convert an array of indices into one-hot encoded vectors. | ||
*/ | ||
export class OneHotEncoder { | ||
/** Size of one-hot encoded vectors. */ | ||
mappingSize: number; | ||
constructor(mappingSize: number) { | ||
this.mappingSize = mappingSize; | ||
} | ||
/** One-hot encoding of values */ | ||
transform<DT extends DataType>(targets: number[], dType: DT): Matrix<DT> { | ||
const res = new Matrix<DT>(dType, [targets.length, this.mappingSize]); | ||
let i = 0; | ||
while (i < targets.length) { | ||
const index = targets[i]; | ||
if (index >= this.mappingSize) { | ||
i += 1; | ||
continue; | ||
} | ||
res.setCell(i, index, 1); | ||
i += 1; | ||
} | ||
return res; | ||
} | ||
untransform<DT extends DataType>(data: MatrixLike<DT>): number[] { | ||
const matrix = new Matrix(data); | ||
const res = new Array(matrix.nRows); | ||
for (let i = 0; i < res.length; i += 1) { | ||
const idx = matrix.row(i).findIndex((x) => x === 1); | ||
res[i] = idx; | ||
} | ||
return res; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import { Matrix, type MatrixLike } from "../mod.ts"; | ||
import type { DataType, DType, DTypeValue } from "../utils/common_types.ts"; | ||
|
||
/** | ||
* Convert a softmax output into one-hot vectors. | ||
* Mutates the input. | ||
*/ | ||
export function transformSoftmaxMut<DT extends DataType>( | ||
targets: MatrixLike<DT> | ||
): Matrix<DT> { | ||
const matrix = new Matrix(targets); | ||
for (let i = 0; i < matrix.nRows; i += 1) { | ||
const max = matrix | ||
.row(i) | ||
// @ts-ignore It can reduce. | ||
.reduce( | ||
(acc: number, curr: DTypeValue<DT>, i: number, arr: DType<DT>) => | ||
arr[acc] > curr ? acc : i, | ||
0 | ||
); | ||
if ( | ||
targets.data instanceof BigInt64Array || | ||
targets.data instanceof BigUint64Array | ||
) { | ||
const newR = new Array(matrix.nCols).fill(0n); | ||
newR[max] = 1n; | ||
matrix.setRow(i, newR); | ||
} else { | ||
const newR = new Array(matrix.nCols).fill(0); | ||
newR[max] = 1; | ||
matrix.setRow(i, newR); | ||
} | ||
} | ||
return matrix; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import { Matrix } from "../mod.ts"; | ||
import type { DataType } from "../utils/common_types.ts"; | ||
|
||
/** | ||
* Convert 2D array of indices into multi-hot encoded vectors | ||
* where each index contains the number of times the respective | ||
* value appears in a sample (term frequency encoder). | ||
*/ | ||
export class TfEncoder { | ||
/** Size of encoded vectors. */ | ||
mappingSize: number; | ||
constructor(mappingSize: number) { | ||
this.mappingSize = mappingSize; | ||
} | ||
/** Encoding values into count vectors */ | ||
transform<DT extends DataType>(targets: Matrix<DT>): Matrix<DT>; | ||
transform<DT extends DataType>(targets: number[][], dType: DT): Matrix<DT>; | ||
transform<DT extends DataType>( | ||
targets: number[][] | Matrix<DT>, | ||
dType?: DT | ||
): Matrix<DT> { | ||
if (!dType && !(targets instanceof Matrix)) | ||
throw new Error("dType required when not dealing with matrices."); | ||
const dataType = dType || (targets as Matrix<DT>).dType; | ||
const res = new Matrix<DT>(dataType, [targets.length, this.mappingSize]); | ||
let i = 0; | ||
while (i < targets.length) { | ||
const row = targets instanceof Matrix ? targets.row(i) : targets[i]; | ||
let j = 0; | ||
while (j < row.length) { | ||
if (Number(row[j]) >= row.length) { | ||
j += 1; | ||
continue; | ||
} | ||
res.setAdd(i, Number(row[j]), 1); | ||
j += 1; | ||
} | ||
|
||
i += 1; | ||
} | ||
return res; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
/** Map discrete values into numbers */ | ||
export class DiscreteMapper<T> { | ||
/** Map categories to indices */ | ||
mapping: Map<T, number>; | ||
/** An internal counter for remembering the last index in mapping. */ | ||
#lastToken: Uint32Array; | ||
constructor() { | ||
this.mapping = new Map(); | ||
this.#lastToken = new Uint32Array(1); | ||
} | ||
/** Construct a mapping from a given set of text. */ | ||
fit(targets: T[]): this { | ||
let i = 0; | ||
while (i < targets.length) { | ||
if (!this.mapping.has(targets[i])) { | ||
const token = this.#incrementToken(); | ||
this.mapping.set(targets[i], token); | ||
} | ||
i += 1; | ||
} | ||
return this; | ||
} | ||
/** | ||
* Encode values into their respective mappings. | ||
* Returns -1 in case of missing mapping. | ||
*/ | ||
transform(targets: T[]): number[] { | ||
const res = new Array(targets.length); | ||
let i = 0; | ||
while (i < targets.length) { | ||
const index = this.mapping.get(targets[i]) ?? -1; | ||
res[i] = index; | ||
i += 1; | ||
} | ||
return res; | ||
} | ||
/** Convert mapped numbers into actual values */ | ||
untransform(data: number[]): T[] { | ||
const res = new Array(data.length); | ||
for (let i = 0; i < res.length; i += 1) { | ||
res[i] = this.getOg(data[i]) || "__unknown__"; | ||
} | ||
return res; | ||
} | ||
getOg(data: number): T | undefined { | ||
for (const [k, v] of this.mapping.entries()) { | ||
if (v === data) { | ||
return k; | ||
} | ||
} | ||
return undefined; | ||
} | ||
#incrementToken(): number { | ||
return Atomics.add(this.#lastToken, 0, 1); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
export { DiscreteMapper } from "./discrete.ts"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,4 +4,5 @@ | |
* @module | ||
*/ | ||
|
||
export * from "./preprocess/mod.ts"; | ||
export * from "./cleaner.ts"; | ||
export * from "./vectorizer.ts" |
Oops, something went wrong.