Skip to content

Commit

Permalink
Merge branch 'sentiment-analysis' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
load1n9 authored Sep 7, 2024
2 parents a2129d4 + 7cc0b82 commit e905984
Show file tree
Hide file tree
Showing 9 changed files with 40,350 additions and 9 deletions.
3 changes: 2 additions & 1 deletion deno.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
"example:multiple-linear": "deno -A ./examples/multiple-linear/student.ts",
"example:binary": "deno -A ./examples/classification/binary_iris.ts",
"example:multiclass": "deno -A ./examples/classification/iris.ts",
"example:text": "deno -A ./examples/classification/spam.ts",
"example:text-sentiment": "deno -A ./examples/sentiment-analysis/classifier.ts",
"example:text-spam": "deno -A ./examples/classification/spam.ts",
"example:filters": "deno -A examples/filters/conv.ts ",
"example:train": "deno -A examples/model/train.ts ",
"example:run": "deno -A examples/model/run.ts ",
Expand Down
70 changes: 70 additions & 0 deletions examples/sentiment-analysis/analyzer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import { CPU, setupBackend, tensor } from "jsr:@denosaurs/[email protected]";
import { Sequential } from "jsr:@denosaurs/[email protected]/core";

import {
useSplit,
ClassificationReport,
MatrixLike,
} from "jsr:@denosaurs/[email protected]/utilities";

import { CategoricalEncoder } from "jsr:@denosaurs/[email protected]/utilities/encoding";
import {
CountVectorizer,
SplitTokenizer,
} from "jsr:@denosaurs/[email protected]/utilities/text";

import Mappings from "./mappings.json" with {type: "json"}
import Vocab from "./vocab.json" with {type: "json"}


console.time("Time Elapsed");

console.log("\nImports loaded.");


const vocab = new Map();

for (const entry of Vocab) {
vocab.set(entry[0], entry[1])
}

const tokenizer = new SplitTokenizer({
skipWords: "english",
vocabulary: vocab,
standardize: { lowercase: true, stripNewlines: true },
});

const vectorizer = new CountVectorizer(tokenizer.vocabulary.size);

console.log("\nX vectorized");
console.timeLog("Time Elapsed");

const encoder = new CategoricalEncoder<string>();
const mappings = new Map();

for (const entry of Mappings) {
mappings.set(entry[0], entry[1])
}

encoder.mapping = mappings;

console.log("\nCPU Backend Loading");
console.timeLog("Time Elapsed");

await setupBackend(CPU);

console.log("\nCPU Backend Loaded");
console.timeLog("Time Elapsed");

const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st")

const text = prompt("Text to analyze?") || "hello world"

const predYSoftmax = await net.predict(
tensor(vectorizer.transform(tokenizer.transform([text]), "f32"))
);

CategoricalEncoder.fromSoftmax<"f32">(predYSoftmax as MatrixLike<"f32">);
const predY = encoder.untransform(predYSoftmax as MatrixLike<"f32">);

console.log(`The sentiment predicted is ${predY[0]}`)
170 changes: 170 additions & 0 deletions examples/sentiment-analysis/classifier.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import {
AdamOptimizer,
Cost,
CPU,
Dropout1DLayer,
Init,
setupBackend,
tensor,
} from "jsr:@denosaurs/[email protected]";
import { Sequential } from "jsr:@denosaurs/[email protected]/core";
import { NadamOptimizer } from "jsr:@denosaurs/[email protected]/core/optimizers";
import {
DenseLayer,
ReluLayer,
SoftmaxLayer,
} from "jsr:@denosaurs/[email protected]/core/layers";

import {
useSplit,
ClassificationReport,
MatrixLike,
} from "jsr:@denosaurs/[email protected]/utilities";

import { CategoricalEncoder } from "jsr:@denosaurs/[email protected]/utilities/encoding";
import {
CountVectorizer,
TfIdfTransformer,
SplitTokenizer,
} from "jsr:@denosaurs/[email protected]/utilities/text";

import { parse as parseCsv } from "jsr:@std/[email protected]/parse";

import { format as duration } from "jsr:@std/[email protected]/duration";

console.time("Time Elapsed");

console.log("\nImports loaded.");

const file = Deno.readTextFileSync(
"examples/sentiment-analysis/text_emotion.csv"
);

console.log("\nData file loaded.");
console.timeLog("Time Elapsed");

const data = parseCsv(file, { skipFirstRow: true }) as {
sentiment: string;
content: string;
}[];

const text = data.map((x) => x.content);
const labels = data.map((x) => x.sentiment);

console.log("\nCSV Parsed");
console.timeLog("Time Elapsed");

const [[trainX, trainY], [testX, testY]] = useSplit(
{ shuffle: true, ratio: [7, 3] },
text,
labels
);

console.log("Data Split");
console.timeLog("Time Elapsed");

const tokenizer = new SplitTokenizer({
skipWords: "english",
standardize: { lowercase: true, stripNewlines: true },
});

const tokens = tokenizer.fit(trainX).transform(trainX);

console.log("\nX tokenized");
console.timeLog("Time Elapsed");

const vectorizer = new CountVectorizer(tokenizer.vocabulary.size);

const vecX = vectorizer.transform(tokens, "f32");

tokens.splice(0, tokens.length);

console.log("\nX vectorized");
console.timeLog("Time Elapsed");

const transformer = new TfIdfTransformer();

const tfidfX = transformer.fit(vecX).transform<"f32">(vecX);

console.log("\nX Transformed", tfidfX.shape);
console.timeLog("Time Elapsed");

const encoder = new CategoricalEncoder<string>();

const oneHotY = encoder.fit(trainY).transform(trainY, "f32");

Deno.writeTextFileSync(
"examples/sentiment-analysis/mappings.json",
JSON.stringify(Array.from(encoder.mapping.entries()))
);
Deno.writeTextFileSync(
"examples/sentiment-analysis/vocab.json",
JSON.stringify(Array.from(tokenizer.vocabulary.entries()))
);
Deno.writeTextFileSync(
"examples/sentiment-analysis/tfidf.json",
JSON.stringify(transformer.idf)
);

console.log("\nCPU Backend Loading");
console.timeLog("Time Elapsed");

await setupBackend(CPU);

console.log("\nCPU Backend Loaded");
console.timeLog("Time Elapsed");

const net = new Sequential({
size: [4, vecX.nCols],
layers: [
DenseLayer({ size: [256], init: Init.Kaiming }),
ReluLayer(),
DenseLayer({ size: [32], init: Init.Kaiming }),
ReluLayer(),
DenseLayer({ size: [16], init: Init.Kaiming }),
ReluLayer(),
DenseLayer({ size: [16], init: Init.Kaiming }),
ReluLayer(),
DenseLayer({ size: [16], init: Init.Kaiming }),
ReluLayer(),
Dropout1DLayer({ probability: 0.5 }),
DenseLayer({ size: [encoder.mapping.size], init: Init.Kaiming }),
SoftmaxLayer(),
],
silent: false,
optimizer: AdamOptimizer(),
cost: Cost.CrossEntropy,
patience: 10,
});

console.log("\nStarting");
console.timeLog("Time Elapsed");
const timeStart = performance.now();

net.train(
[{ inputs: tensor(tfidfX), outputs: tensor(oneHotY) }],
100,
2,
0.002
);

console.log(
`Training complete in ${duration(performance.now() - timeStart, {
style: "narrow",
})}.`
);

const predYSoftmax = await net.predict(
tensor(
transformer.transform<"f32">(
vectorizer.transform(tokenizer.transform(testX), "f32")
)
)
);

CategoricalEncoder.fromSoftmax<"f32">(predYSoftmax as MatrixLike<"f32">);
const predY = encoder.untransform(predYSoftmax as MatrixLike<"f32">);

console.log(new ClassificationReport(testY, predY));

net.saveFile("examples/sentiment-analysis/sentiment.st");
1 change: 1 addition & 0 deletions examples/sentiment-analysis/mappings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[["empty",0],["sadness",1],["neutral",2],["worry",3],["surprise",4],["fun",5],["hate",6],["happiness",7],["enthusiasm",8],["love",9],["relief",10],["boredom",11],["anger",12]]
Binary file added examples/sentiment-analysis/sentiment.st
Binary file not shown.
97 changes: 97 additions & 0 deletions examples/sentiment-analysis/tester.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import { CPU, setupBackend, tensor } from "jsr:@denosaurs/[email protected]";
import { Sequential } from "jsr:@denosaurs/[email protected]/core";

import {
useSplit,
ClassificationReport,
MatrixLike,
} from "jsr:@denosaurs/[email protected]/utilities";

import { CategoricalEncoder } from "jsr:@denosaurs/[email protected]/utilities/encoding";
import {
CountVectorizer,
SplitTokenizer,
} from "jsr:@denosaurs/[email protected]/utilities/text";

import Mappings from "./mappings.json" with {type: "json"}
import Vocab from "./vocab.json" with {type: "json"}

import { parse as parseCsv } from "jsr:@std/[email protected]/parse";


console.time("Time Elapsed");

console.log("\nImports loaded.");

const file = Deno.readTextFileSync(
"examples/sentiment-analysis/text_emotion.csv"
);

console.log("\nData file loaded.");
console.timeLog("Time Elapsed");

const data = parseCsv(file, { skipFirstRow: true }) as {
sentiment: string;
content: string;
}[];
const text = data.map((x) => x.content);
const labels = data.map((x) => x.sentiment);

console.log("\nCSV Parsed");
console.timeLog("Time Elapsed");

const [[trainX, trainY], [testX, testY]] = useSplit(
{ shuffle: true, ratio: [7, 3] },
text,
labels
);

console.log("Data Split");
console.timeLog("Time Elapsed");

const vocab = new Map();

for (const entry of Vocab) {
vocab.set(entry[0], entry[1])
}

const tokenizer = new SplitTokenizer({
skipWords: "english",
vocabulary: vocab,
standardize: { lowercase: true, stripNewlines: true },
});

const vectorizer = new CountVectorizer(tokenizer.vocabulary.size);

console.log("\nX vectorized");
console.timeLog("Time Elapsed");

const encoder = new CategoricalEncoder<string>();
const mappings = new Map();

for (const entry of Mappings) {
mappings.set(entry[0], entry[1])
}

encoder.mapping = mappings;

console.log("\nCPU Backend Loading");
console.timeLog("Time Elapsed");

await setupBackend(CPU);

console.log("\nCPU Backend Loaded");
console.timeLog("Time Elapsed");

const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st")

const predYSoftmax = await net.predict(
tensor(vectorizer.transform(tokenizer.transform(testX), "f32"))
);

CategoricalEncoder.fromSoftmax<"f32">(predYSoftmax as MatrixLike<"f32">);
const predY = encoder.untransform(predYSoftmax as MatrixLike<"f32">);

console.log(new ClassificationReport(testY, predY));

console.log(testY, predY)
Loading

0 comments on commit e905984

Please sign in to comment.