diff --git a/examples/sentiment-analysis/analyzer.ts b/examples/sentiment-analysis/analyzer.ts index 6ae951b..c3b3d9c 100644 --- a/examples/sentiment-analysis/analyzer.ts +++ b/examples/sentiment-analysis/analyzer.ts @@ -1,69 +1,54 @@ import { CPU, setupBackend, tensor } from "jsr:@denosaurs/netsaur@0.4.0"; import { Sequential } from "jsr:@denosaurs/netsaur@0.4.0/core"; -import type { - MatrixLike, -} from "jsr:@denosaurs/netsaur@0.4.0/utilities"; +import type { MatrixLike } from "jsr:@denosaurs/netsaur@0.4.0/utilities"; import { CategoricalEncoder } from "jsr:@denosaurs/netsaur@0.4.0/utilities/encoding"; import { - CountVectorizer, - SplitTokenizer, + CountVectorizer, + SplitTokenizer, + TfIdfTransformer, } from "jsr:@denosaurs/netsaur@0.4.0/utilities/text"; -import Mappings from "./mappings.json" with {type: "json"} -import Vocab from "./vocab.json" with {type: "json"} -import Idf from "./idf.json" with {type: "json"} - - -console.time("Time Elapsed"); - -console.log("\nImports loaded."); - +import Mappings from "./mappings.json" with { type: "json" }; +import Vocab from "./vocab.json" with { type: "json" }; +import Idf from "./tfidf.json" with { type: "json" }; const vocab = new Map(); for (const entry of Vocab) { - vocab.set(entry[0], entry[1]) + vocab.set(entry[0], entry[1]); } const tokenizer = new SplitTokenizer({ - skipWords: "english", - vocabulary: vocab, - standardize: { lowercase: true, stripNewlines: true }, + skipWords: "english", + vocabulary: vocab, + standardize: { lowercase: true, stripNewlines: true }, }); const vectorizer = new CountVectorizer(tokenizer.vocabulary.size); - -console.log("\nX vectorized"); -console.timeLog("Time Elapsed"); +const transformer = new TfIdfTransformer({ idf: Float64Array.from(Idf) }); const encoder = new CategoricalEncoder(); const mappings = new Map(); for (const entry of Mappings) { - mappings.set(entry[0], entry[1]) + mappings.set(entry[0], entry[1]); } encoder.mapping = mappings; -console.log("\nCPU Backend Loading"); -console.timeLog("Time Elapsed"); - await setupBackend(CPU); -console.log("\nCPU Backend Loaded"); -console.timeLog("Time Elapsed"); - -const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st") +const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st"); -const text = prompt("Text to analyze?") || "hello world" +const text = prompt("Text to analyze?") || "hello world"; const predYSoftmax = await net.predict( - tensor(vectorizer.transform(tokenizer.transform([text]), "f32")) + tensor(transformer.transform<"f32">(vectorizer.transform(tokenizer.transform([text]), "f32"))), ); CategoricalEncoder.fromSoftmax<"f32">(predYSoftmax as MatrixLike<"f32">); const predY = encoder.untransform(predYSoftmax as MatrixLike<"f32">); -console.log(`The sentiment predicted is ${predY[0]}`) \ No newline at end of file +console.log(`The sentiment predicted is ${predY[0]}`); diff --git a/examples/sentiment-analysis/tester.ts b/examples/sentiment-analysis/tester.ts index ecd5d4d..ce8656e 100644 --- a/examples/sentiment-analysis/tester.ts +++ b/examples/sentiment-analysis/tester.ts @@ -2,92 +2,76 @@ import { CPU, setupBackend, tensor } from "jsr:@denosaurs/netsaur@0.4.0"; import { Sequential } from "jsr:@denosaurs/netsaur@0.4.0/core"; import { - useSplit, - ClassificationReport, - type MatrixLike, + ClassificationReport, + type MatrixLike, + useSplit, } from "jsr:@denosaurs/netsaur@0.4.0/utilities"; import { CategoricalEncoder } from "jsr:@denosaurs/netsaur@0.4.0/utilities/encoding"; import { - CountVectorizer, - SplitTokenizer, + CountVectorizer, + SplitTokenizer, + TfIdfTransformer, } from "jsr:@denosaurs/netsaur@0.4.0/utilities/text"; -import Mappings from "./mappings.json" with {type: "json"} -import Vocab from "./vocab.json" with {type: "json"} -import Idf from "./idf.json" with {type: "json"} +import Mappings from "./mappings.json" with { type: "json" }; +import Vocab from "./vocab.json" with { type: "json" }; +import Idf from "./tfidf.json" with { type: "json" }; import { parse as parseCsv } from "jsr:@std/csv@1.0.3/parse"; - -console.time("Time Elapsed"); - -console.log("\nImports loaded."); - const file = Deno.readTextFileSync( - "examples/sentiment-analysis/text_emotion.csv" - ); - - console.log("\nData file loaded."); - console.timeLog("Time Elapsed"); - - const data = parseCsv(file, { skipFirstRow: true }) as { + "examples/sentiment-analysis/text_emotion.csv", +); + +const data = parseCsv(file, { skipFirstRow: true }) as { sentiment: string; content: string; - }[]; - const text = data.map((x) => x.content); +}[]; +const text = data.map((x) => x.content); const labels = data.map((x) => x.sentiment); -console.log("\nCSV Parsed"); -console.timeLog("Time Elapsed"); - const [[_trainX, _trainY], [testX, testY]] = useSplit( - { shuffle: true, ratio: [7, 3] }, - text, - labels + { shuffle: true, ratio: [7, 3] }, + text, + labels, ); -console.log("Data Split"); -console.timeLog("Time Elapsed"); - const vocab = new Map(); for (const entry of Vocab) { - vocab.set(entry[0], entry[1]) + vocab.set(entry[0], entry[1]); } const tokenizer = new SplitTokenizer({ - skipWords: "english", - vocabulary: vocab, - standardize: { lowercase: true, stripNewlines: true }, + skipWords: "english", + vocabulary: vocab, + standardize: { lowercase: true, stripNewlines: true }, }); const vectorizer = new CountVectorizer(tokenizer.vocabulary.size); -console.log("\nX vectorized"); -console.timeLog("Time Elapsed"); +const transformer = new TfIdfTransformer({ idf: Float64Array.from(Idf) }); const encoder = new CategoricalEncoder(); const mappings = new Map(); for (const entry of Mappings) { - mappings.set(entry[0], entry[1]) + mappings.set(entry[0], entry[1]); } encoder.mapping = mappings; -console.log("\nCPU Backend Loading"); -console.timeLog("Time Elapsed"); - await setupBackend(CPU); -console.log("\nCPU Backend Loaded"); -console.timeLog("Time Elapsed"); - -const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st") +const net = Sequential.loadFile("examples/sentiment-analysis/sentiment.st"); const predYSoftmax = await net.predict( - tensor(vectorizer.transform(tokenizer.transform(testX), "f32")) + tensor( + transformer.transform<"f32">( + vectorizer.transform(tokenizer.transform(testX), "f32"), + ), + ), ); CategoricalEncoder.fromSoftmax<"f32">(predYSoftmax as MatrixLike<"f32">); @@ -95,4 +79,4 @@ const predY = encoder.untransform(predYSoftmax as MatrixLike<"f32">); console.log(new ClassificationReport(testY, predY)); -console.log(testY, predY) \ No newline at end of file +console.log(testY, predY);