Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Revamp utilities + sentiment analysis example #62

Merged
merged 12 commits into from
Sep 7, 2024
6 changes: 5 additions & 1 deletion deno.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
"./utilities/image": "./packages/utilities/src/image/mod.ts",
"./utilities/metrics": "./packages/utilities/src/metrics/mod.ts",
"./utilities/encoding": "./packages/utilities/src/encoding/mod.ts",
"./utilities/mapper": "./packages/utilities/src/mapper/mod.ts",
"./utilities/transformer": "./packages/utilities/src/transformer/mod.ts",
"./utilities/misc": "./packages/utilities/src/utils/mod.ts",
// Tokenizers
"./tokenizers": "./packages/tokenizers/mod.ts",
Expand All @@ -40,7 +42,9 @@
"example:multiple-linear": "deno -A ./examples/multiple-linear/student.ts",
"example:binary": "deno -A ./examples/classification/binary_iris.ts",
"example:multiclass": "deno -A ./examples/classification/iris.ts",
"example:text-sentiment": "deno -A ./examples/sentiment-analysis/classifier.ts",
"example:sentiment-train": "deno -A ./examples/sentiment-analysis/classifier.ts",
"example:sentiment-test": "deno -A ./examples/sentiment-analysis/tester.ts",
"example:sentiment-try": "deno -A ./examples/sentiment-analysis/analyzer.ts",
"example:text-spam": "deno -A ./examples/classification/spam.ts",
"example:filters": "deno -A examples/filters/conv.ts ",
"example:train": "deno -A examples/model/train.ts ",
Expand Down
2 changes: 1 addition & 1 deletion examples/classification/spam.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ net.train(
// Train for 20 epochs
20,
2,
0.01
0.001
);

console.log(`training time: ${performance.now() - time}ms`);
Expand Down
50 changes: 17 additions & 33 deletions examples/sentiment-analysis/analyzer.ts
Original file line number Diff line number Diff line change
@@ -1,70 +1,54 @@
import { CPU, setupBackend, tensor } from "jsr:@denosaurs/[email protected]";
import { Sequential } from "jsr:@denosaurs/[email protected]/core";

import {
useSplit,
ClassificationReport,
MatrixLike,
} from "jsr:@denosaurs/[email protected]/utilities";
import type { MatrixLike } from "jsr:@denosaurs/[email protected]/utilities";

import { CategoricalEncoder } from "jsr:@denosaurs/[email protected]/utilities/encoding";
import {
CountVectorizer,
SplitTokenizer,
CountVectorizer,
SplitTokenizer,
TfIdfTransformer,
} from "jsr:@denosaurs/[email protected]/utilities/text";

import Mappings from "./mappings.json" with {type: "json"}
import Vocab from "./vocab.json" with {type: "json"}


console.time("Time Elapsed");

console.log("\nImports loaded.");

import Mappings from "./mappings.json" with { type: "json" };
import Vocab from "./vocab.json" with { type: "json" };
import Idf from "./tfidf.json" with { type: "json" };

const vocab = new Map();

for (const entry of Vocab) {
vocab.set(entry[0], entry[1])
vocab.set(entry[0], entry[1]);
}

const tokenizer = new SplitTokenizer({
skipWords: "english",
vocabulary: vocab,
standardize: { lowercase: true, stripNewlines: true },
skipWords: "english",
vocabulary: vocab,
standardize: { lowercase: true, stripNewlines: true },
});

const vectorizer = new CountVectorizer(tokenizer.vocabulary.size);

console.log("\nX vectorized");
console.timeLog("Time Elapsed");
const transformer = new TfIdfTransformer({ idf: Float64Array.from(Idf) });

const encoder = new CategoricalEncoder<string>();
const mappings = new Map();

for (const entry of Mappings) {
mappings.set(entry[0], entry[1])
mappings.set(entry[0], entry[1]);
}

encoder.mapping = mappings;

console.log("\nCPU Backend Loading");
console.timeLog("Time Elapsed");

await setupBackend(CPU);

console.log("\nCPU Backend Loaded");
console.timeLog("Time Elapsed");

const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st")
const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st");

const text = prompt("Text to analyze?") || "hello world"
const text = prompt("Text to analyze?") || "hello world";

const predYSoftmax = await net.predict(
tensor(vectorizer.transform(tokenizer.transform([text]), "f32"))
tensor(transformer.transform<"f32">(vectorizer.transform(tokenizer.transform([text]), "f32"))),
);

CategoricalEncoder.fromSoftmax<"f32">(predYSoftmax as MatrixLike<"f32">);
const predY = encoder.untransform(predYSoftmax as MatrixLike<"f32">);

console.log(`The sentiment predicted is ${predY[0]}`)
console.log(`The sentiment predicted is ${predY[0]}`);
9 changes: 3 additions & 6 deletions examples/sentiment-analysis/classifier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@ import {
AdamOptimizer,
Cost,
CPU,
Dropout1DLayer,
Init,
setupBackend,
tensor,
} from "jsr:@denosaurs/[email protected]";
import { Sequential } from "jsr:@denosaurs/[email protected]/core";
import { NadamOptimizer } from "jsr:@denosaurs/[email protected]/core/optimizers";
import {
DenseLayer,
ReluLayer,
Expand All @@ -18,7 +16,7 @@ import {
import {
useSplit,
ClassificationReport,
MatrixLike,
type MatrixLike,
} from "jsr:@denosaurs/[email protected]/utilities";

import { CategoricalEncoder } from "jsr:@denosaurs/[email protected]/utilities/encoding";
Expand Down Expand Up @@ -103,7 +101,7 @@ Deno.writeTextFileSync(
);
Deno.writeTextFileSync(
"examples/sentiment-analysis/tfidf.json",
JSON.stringify(transformer.idf)
JSON.stringify(Array.from(transformer.idf as Float64Array))
);

console.log("\nCPU Backend Loading");
Expand All @@ -115,7 +113,7 @@ console.log("\nCPU Backend Loaded");
console.timeLog("Time Elapsed");

const net = new Sequential({
size: [4, vecX.nCols],
size: [4, tfidfX.nCols],
layers: [
DenseLayer({ size: [256], init: Init.Kaiming }),
ReluLayer(),
Expand All @@ -127,7 +125,6 @@ const net = new Sequential({
ReluLayer(),
DenseLayer({ size: [16], init: Init.Kaiming }),
ReluLayer(),
Dropout1DLayer({ probability: 0.5 }),
DenseLayer({ size: [encoder.mapping.size], init: Init.Kaiming }),
SoftmaxLayer(),
],
Expand Down
2 changes: 1 addition & 1 deletion examples/sentiment-analysis/mappings.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[["empty",0],["sadness",1],["neutral",2],["worry",3],["surprise",4],["fun",5],["hate",6],["happiness",7],["enthusiasm",8],["love",9],["relief",10],["boredom",11],["anger",12]]
[["empty",0],["sadness",1],["enthusiasm",2],["neutral",3],["worry",4],["surprise",5],["love",6],["fun",7],["hate",8],["happiness",9],["boredom",10],["relief",11],["anger",12]]
Binary file modified examples/sentiment-analysis/sentiment.st
Binary file not shown.
79 changes: 31 additions & 48 deletions examples/sentiment-analysis/tester.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,96 +2,79 @@ import { CPU, setupBackend, tensor } from "jsr:@denosaurs/[email protected]";
import { Sequential } from "jsr:@denosaurs/[email protected]/core";

import {
useSplit,
ClassificationReport,
MatrixLike,
ClassificationReport,
type MatrixLike,
useSplit,
} from "jsr:@denosaurs/[email protected]/utilities";

import { CategoricalEncoder } from "jsr:@denosaurs/[email protected]/utilities/encoding";
import {
CountVectorizer,
SplitTokenizer,
CountVectorizer,
SplitTokenizer,
TfIdfTransformer,
} from "jsr:@denosaurs/[email protected]/utilities/text";

import Mappings from "./mappings.json" with {type: "json"}
import Vocab from "./vocab.json" with {type: "json"}
import Mappings from "./mappings.json" with { type: "json" };
import Vocab from "./vocab.json" with { type: "json" };
import Idf from "./tfidf.json" with { type: "json" };

import { parse as parseCsv } from "jsr:@std/[email protected]/parse";


console.time("Time Elapsed");

console.log("\nImports loaded.");

const file = Deno.readTextFileSync(
"examples/sentiment-analysis/text_emotion.csv"
);

console.log("\nData file loaded.");
console.timeLog("Time Elapsed");

const data = parseCsv(file, { skipFirstRow: true }) as {
"examples/sentiment-analysis/text_emotion.csv",
);

const data = parseCsv(file, { skipFirstRow: true }) as {
sentiment: string;
content: string;
}[];
const text = data.map((x) => x.content);
}[];
const text = data.map((x) => x.content);
const labels = data.map((x) => x.sentiment);

console.log("\nCSV Parsed");
console.timeLog("Time Elapsed");

const [[trainX, trainY], [testX, testY]] = useSplit(
{ shuffle: true, ratio: [7, 3] },
text,
labels
const [[_trainX, _trainY], [testX, testY]] = useSplit(
{ shuffle: true, ratio: [7, 3] },
text,
labels,
);

console.log("Data Split");
console.timeLog("Time Elapsed");

const vocab = new Map();

for (const entry of Vocab) {
vocab.set(entry[0], entry[1])
vocab.set(entry[0], entry[1]);
}

const tokenizer = new SplitTokenizer({
skipWords: "english",
vocabulary: vocab,
standardize: { lowercase: true, stripNewlines: true },
skipWords: "english",
vocabulary: vocab,
standardize: { lowercase: true, stripNewlines: true },
});

const vectorizer = new CountVectorizer(tokenizer.vocabulary.size);

console.log("\nX vectorized");
console.timeLog("Time Elapsed");
const transformer = new TfIdfTransformer({ idf: Float64Array.from(Idf) });

const encoder = new CategoricalEncoder<string>();
const mappings = new Map();

for (const entry of Mappings) {
mappings.set(entry[0], entry[1])
mappings.set(entry[0], entry[1]);
}

encoder.mapping = mappings;

console.log("\nCPU Backend Loading");
console.timeLog("Time Elapsed");

await setupBackend(CPU);

console.log("\nCPU Backend Loaded");
console.timeLog("Time Elapsed");

const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st")
const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st");

const predYSoftmax = await net.predict(
tensor(vectorizer.transform(tokenizer.transform(testX), "f32"))
tensor(
transformer.transform<"f32">(
vectorizer.transform(tokenizer.transform(testX), "f32"),
),
),
);

CategoricalEncoder.fromSoftmax<"f32">(predYSoftmax as MatrixLike<"f32">);
const predY = encoder.untransform(predYSoftmax as MatrixLike<"f32">);

console.log(new ClassificationReport(testY, predY));

console.log(testY, predY)
1 change: 1 addition & 0 deletions examples/sentiment-analysis/tfidf.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion examples/sentiment-analysis/vocab.json

Large diffs are not rendered by default.

92 changes: 0 additions & 92 deletions packages/utilities/src/encoding/categorical.ts

This file was deleted.

Loading
Loading