diff --git a/deno.jsonc b/deno.jsonc
index 2840807..8e943e8 100644
--- a/deno.jsonc
+++ b/deno.jsonc
@@ -22,6 +22,8 @@
     "./utilities/image": "./packages/utilities/src/image/mod.ts",
     "./utilities/metrics": "./packages/utilities/src/metrics/mod.ts",
     "./utilities/encoding": "./packages/utilities/src/encoding/mod.ts",
+    "./utilities/mapper": "./packages/utilities/src/mapper/mod.ts",
+    "./utilities/transformer": "./packages/utilities/src/transformer/mod.ts",
     "./utilities/misc": "./packages/utilities/src/utils/mod.ts",
     // Tokenizers
     "./tokenizers": "./packages/tokenizers/mod.ts",
diff --git a/packages/utilities/src/encoding/categorical.ts b/packages/utilities/src/encoding/categorical.ts
deleted file mode 100644
index a7d5700..0000000
--- a/packages/utilities/src/encoding/categorical.ts
+++ /dev/null
@@ -1,92 +0,0 @@
-import { Matrix, type MatrixLike } from "../mod.ts";
-import type { DataType, DType, DTypeValue } from "../utils/common_types.ts";
-
-/** Convert class labels into categorical variables (one-hot) */
-export class CategoricalEncoder<T> {
-  /** Map categories to indices */
-  mapping: Map<T, number>;
-  /** An internal counter for remembering the last index in mapping. */
-  #lastToken: Uint32Array;
-  constructor() {
-    this.mapping = new Map();
-    this.#lastToken = new Uint32Array(1);
-  }
-  /** Construct a mapping from a given set of text. */
-  fit(targets: T[]): this {
-    let i = 0;
-    while (i < targets.length) {
-      if (!this.mapping.has(targets[i])) {
-        const token = this.#incrementToken();
-        this.mapping.set(targets[i], token);
-      }
-      i += 1;
-    }
-    return this;
-  }
-  /** One-hot encoding of categorical values */
-  transform<DT extends DataType>(targets: T[], dType: DT): Matrix<DT> {
-    const res = new Matrix<DT>(dType, [targets.length, this.#lastToken[0]]);
-    let i = 0;
-    while (i < targets.length) {
-      const index = this.mapping.get(targets[i]);
-      if (index !== 0 && !index) {
-        i += 1;
-        continue;
-      }
-      res.setCell(i, index, 1);
-      i += 1;
-    }
-    return res;
-  }
-  untransform<DT extends DataType>(data: MatrixLike<DT>): T[] {
-    const matrix = new Matrix(data);
-    const res = new Array(matrix.nRows);
-    for (let i = 0; i < res.length; i += 1) {
-      const idx = matrix.row(i).findIndex((x) => x === 1);
-      res[i] = this.getOg(idx) || "__unknown__";
-    }
-    return res;
-  }
-  getOg(data: number): T | undefined {
-    for (const [k, v] of this.mapping.entries()) {
-      if (v === data) {
-        return k;
-      }
-    }
-    return undefined;
-  }
-  #incrementToken(): number {
-    return Atomics.add(this.#lastToken, 0, 1);
-  }
-  /**
-   * Convert softmax outputs into categorical outputs
-   * This method mutates the original matrix.
-   * @returns The modified matrix.
-   */
-  static fromSoftmax<DT extends DataType>(data: MatrixLike<DT>): Matrix<DT> {
-    const matrix = new Matrix(data);
-    for (let i = 0; i < matrix.nRows; i += 1) {
-      const max = matrix
-        .row(i)
-        // @ts-ignore It can reduce.
-        .reduce(
-          (acc: number, curr: DTypeValue<DT>, i: number, arr: DType<DT>) =>
-            arr[acc] > curr ? acc : i,
-          0,
-        );
-      if (
-        data.data instanceof BigInt64Array ||
-        data.data instanceof BigUint64Array
-      ) {
-        const newR = new Array(matrix.nCols).fill(0n);
-        newR[max] = 1n;
-        matrix.setRow(i, newR);
-      } else {
-        const newR = new Array(matrix.nCols).fill(0);
-        newR[max] = 1;
-        matrix.setRow(i, newR);
-      }
-    }
-    return matrix;
-  }
-}
diff --git a/packages/utilities/src/encoding/mod.ts b/packages/utilities/src/encoding/mod.ts
index 26408b2..741dea0 100644
--- a/packages/utilities/src/encoding/mod.ts
+++ b/packages/utilities/src/encoding/mod.ts
@@ -1,6 +1,4 @@
-/**
- * Encoding data into different representations.
- * @module
- */
-
-export * from "./categorical.ts";
+export { MultiHotEncoder } from "./multihot.ts";
+export { OneHotEncoder } from "./onehot.ts";
+export { TfEncoder } from "./termfrequency.ts";
+export { transformSoftmaxMut } from "./softmax.ts";
diff --git a/packages/utilities/src/encoding/multihot.ts b/packages/utilities/src/encoding/multihot.ts
new file mode 100644
index 0000000..3fd3924
--- /dev/null
+++ b/packages/utilities/src/encoding/multihot.ts
@@ -0,0 +1,41 @@
+import { Matrix } from "../mod.ts";
+import type { DataType } from "../utils/common_types.ts";
+
+/**
+ * Convert 2D array of indices into multi-hot encoded vectors.
+ */
+export class MultiHotEncoder {
+  /** Size of encoded vectors. */
+  mappingSize: number;
+  constructor(mappingSize: number) {
+    this.mappingSize = mappingSize;
+  }
+  /** Encoding values into multi-hot vectors */
+  transform<DT extends DataType>(targets: Matrix<DT>): Matrix<DT>;
+  transform<DT extends DataType>(targets: number[][], dType: DT): Matrix<DT>;
+  transform<DT extends DataType>(
+    targets: number[][] | Matrix<DT>,
+    dType?: DT
+  ): Matrix<DT> {
+    if (!dType && !(targets instanceof Matrix))
+      throw new Error("dType required when not dealing with matrices.");
+    const dataType = dType || (targets as Matrix<DT>).dType;
+    const res = new Matrix<DT>(dataType, [targets.length, this.mappingSize]);
+    let i = 0;
+    while (i < targets.length) {
+      const row = targets instanceof Matrix ? targets.row(i) : targets[i];
+      let j = 0;
+      while (j < row.length) {
+        if (Number(row[j]) >= row.length) {
+          j += 1;
+          continue;
+        }
+        res.setCell(i, Number(row[j]), 1);
+        j += 1;
+      }
+
+      i += 1;
+    }
+    return res;
+  }
+}
diff --git a/packages/utilities/src/encoding/onehot.ts b/packages/utilities/src/encoding/onehot.ts
new file mode 100644
index 0000000..c09d096
--- /dev/null
+++ b/packages/utilities/src/encoding/onehot.ts
@@ -0,0 +1,37 @@
+import { Matrix, type MatrixLike } from "../mod.ts";
+import type { DataType } from "../utils/common_types.ts";
+
+/**
+ * Convert an array of indices into one-hot encoded vectors.
+ */
+export class OneHotEncoder {
+  /** Size of one-hot encoded vectors. */
+  mappingSize: number;
+  constructor(mappingSize: number) {
+    this.mappingSize = mappingSize;
+  }
+  /** One-hot encoding of values */
+  transform<DT extends DataType>(targets: number[], dType: DT): Matrix<DT> {
+    const res = new Matrix<DT>(dType, [targets.length, this.mappingSize]);
+    let i = 0;
+    while (i < targets.length) {
+      const index = targets[i];
+      if (index >= this.mappingSize) {
+        i += 1;
+        continue;
+      }
+      res.setCell(i, index, 1);
+      i += 1;
+    }
+    return res;
+  }
+  untransform<DT extends DataType>(data: MatrixLike<DT>): number[] {
+    const matrix = new Matrix(data);
+    const res = new Array(matrix.nRows);
+    for (let i = 0; i < res.length; i += 1) {
+      const idx = matrix.row(i).findIndex((x) => x === 1);
+      res[i] = idx;
+    }
+    return res;
+  }
+}
diff --git a/packages/utilities/src/encoding/softmax.ts b/packages/utilities/src/encoding/softmax.ts
new file mode 100644
index 0000000..a1abe6c
--- /dev/null
+++ b/packages/utilities/src/encoding/softmax.ts
@@ -0,0 +1,35 @@
+import { Matrix, type MatrixLike } from "../mod.ts";
+import type { DataType, DType, DTypeValue } from "../utils/common_types.ts";
+
+/**
+ * Convert a softmax output into one-hot vectors.
+ * Mutates the input.
+ */
+export function transformSoftmaxMut<DT extends DataType>(
+  targets: MatrixLike<DT>
+): Matrix<DT> {
+  const matrix = new Matrix(targets);
+  for (let i = 0; i < matrix.nRows; i += 1) {
+    const max = matrix
+      .row(i)
+      // @ts-ignore It can reduce.
+      .reduce(
+        (acc: number, curr: DTypeValue<DT>, i: number, arr: DType<DT>) =>
+          arr[acc] > curr ? acc : i,
+        0
+      );
+    if (
+      targets.data instanceof BigInt64Array ||
+      targets.data instanceof BigUint64Array
+    ) {
+      const newR = new Array(matrix.nCols).fill(0n);
+      newR[max] = 1n;
+      matrix.setRow(i, newR);
+    } else {
+      const newR = new Array(matrix.nCols).fill(0);
+      newR[max] = 1;
+      matrix.setRow(i, newR);
+    }
+  }
+  return matrix;
+}
diff --git a/packages/utilities/src/encoding/termfrequency.ts b/packages/utilities/src/encoding/termfrequency.ts
new file mode 100644
index 0000000..acb523f
--- /dev/null
+++ b/packages/utilities/src/encoding/termfrequency.ts
@@ -0,0 +1,43 @@
+import { Matrix } from "../mod.ts";
+import type { DataType } from "../utils/common_types.ts";
+
+/**
+ * Convert 2D array of indices into multi-hot encoded vectors
+ * where each index contains the number of times the respective
+ * value appears in a sample (term frequency encoder).
+ */
+export class TfEncoder {
+  /** Size of encoded vectors. */
+  mappingSize: number;
+  constructor(mappingSize: number) {
+    this.mappingSize = mappingSize;
+  }
+  /** Encoding values into count vectors */
+  transform<DT extends DataType>(targets: Matrix<DT>): Matrix<DT>;
+  transform<DT extends DataType>(targets: number[][], dType: DT): Matrix<DT>;
+  transform<DT extends DataType>(
+    targets: number[][] | Matrix<DT>,
+    dType?: DT
+  ): Matrix<DT> {
+    if (!dType && !(targets instanceof Matrix))
+      throw new Error("dType required when not dealing with matrices.");
+    const dataType = dType || (targets as Matrix<DT>).dType;
+    const res = new Matrix<DT>(dataType, [targets.length, this.mappingSize]);
+    let i = 0;
+    while (i < targets.length) {
+      const row = targets instanceof Matrix ? targets.row(i) : targets[i];
+      let j = 0;
+      while (j < row.length) {
+        if (Number(row[j]) >= row.length) {
+            j += 1;
+            continue;
+        }
+        res.setAdd(i, Number(row[j]), 1);
+        j += 1;
+      }
+
+      i += 1;
+    }
+    return res;
+  }
+}
diff --git a/packages/utilities/src/mapper/discrete.ts b/packages/utilities/src/mapper/discrete.ts
new file mode 100644
index 0000000..bdfbf5e
--- /dev/null
+++ b/packages/utilities/src/mapper/discrete.ts
@@ -0,0 +1,56 @@
+/** Map discrete values into numbers */
+export class DiscreteMapper<T> {
+  /** Map categories to indices */
+  mapping: Map<T, number>;
+  /** An internal counter for remembering the last index in mapping. */
+  #lastToken: Uint32Array;
+  constructor() {
+    this.mapping = new Map();
+    this.#lastToken = new Uint32Array(1);
+  }
+  /** Construct a mapping from a given set of text. */
+  fit(targets: T[]): this {
+    let i = 0;
+    while (i < targets.length) {
+      if (!this.mapping.has(targets[i])) {
+        const token = this.#incrementToken();
+        this.mapping.set(targets[i], token);
+      }
+      i += 1;
+    }
+    return this;
+  }
+  /**
+   * Encode values into their respective mappings.
+   * Returns -1 in case of missing mapping.
+   */
+  transform(targets: T[]): number[] {
+    const res = new Array(targets.length);
+    let i = 0;
+    while (i < targets.length) {
+      const index = this.mapping.get(targets[i]) ?? -1;
+      res[i] = index;
+      i += 1;
+    }
+    return res;
+  }
+  /** Convert mapped numbers into actual values */
+  untransform(data: number[]): T[] {
+    const res = new Array(data.length);
+    for (let i = 0; i < res.length; i += 1) {
+      res[i] = this.getOg(data[i]) || "__unknown__";
+    }
+    return res;
+  }
+  getOg(data: number): T | undefined {
+    for (const [k, v] of this.mapping.entries()) {
+      if (v === data) {
+        return k;
+      }
+    }
+    return undefined;
+  }
+  #incrementToken(): number {
+    return Atomics.add(this.#lastToken, 0, 1);
+  }
+}
diff --git a/packages/utilities/src/mapper/mod.ts b/packages/utilities/src/mapper/mod.ts
new file mode 100644
index 0000000..3942fc0
--- /dev/null
+++ b/packages/utilities/src/mapper/mod.ts
@@ -0,0 +1 @@
+export { DiscreteMapper } from "./discrete.ts";
diff --git a/packages/utilities/src/text/preprocess/cleaner.ts b/packages/utilities/src/text/cleaner.ts
similarity index 69%
rename from packages/utilities/src/text/preprocess/cleaner.ts
rename to packages/utilities/src/text/cleaner.ts
index 575bd28..5627d5b 100644
--- a/packages/utilities/src/text/preprocess/cleaner.ts
+++ b/packages/utilities/src/text/cleaner.ts
@@ -1,4 +1,5 @@
-import type { StandardizeConfig } from "../../utils/common_types.ts";
+import type { StandardizeConfig } from "../utils/common_types.ts";
+import { DefaultIgnoreList } from "../constants/stop_words.ts";
 
 /** Simple text cleaner */
 export class TextCleaner implements StandardizeConfig {
@@ -6,16 +7,19 @@ export class TextCleaner implements StandardizeConfig {
   lowercase: boolean;
   normalizeWhiteSpaces: boolean;
   stripNewlines: boolean;
+  removeStopWords: false | "english" | string[];
   constructor({
     stripHtml = false,
     lowercase = false,
     normalizeWhiteSpaces = true,
     stripNewlines = true,
+    removeStopWords = false,
   }: StandardizeConfig = {}) {
     this.stripHtml = stripHtml;
     this.lowercase = lowercase;
     this.normalizeWhiteSpaces = normalizeWhiteSpaces;
     this.stripNewlines = stripNewlines;
+    this.removeStopWords = removeStopWords;
   }
   clean(text: string): string;
   clean(text: string[]): string[];
@@ -35,7 +39,8 @@ export function preprocess(
     lowercase = false,
     normalizeWhiteSpaces = true,
     stripNewlines = true,
-  }: StandardizeConfig = {},
+    removeStopWords = false,
+  }: StandardizeConfig = {}
 ): string {
   if (lowercase) {
     text = text.toLowerCase();
@@ -49,5 +54,13 @@ export function preprocess(
   if (normalizeWhiteSpaces) {
     text = text.replace(/\s\s+/g, " ");
   }
+  if (removeStopWords) {
+    const stopWords =
+      removeStopWords === "english" ? DefaultIgnoreList : removeStopWords;
+    text = text
+      .split(" ")
+      .filter((x) => !stopWords.includes(x))
+      .join(" ");
+  }
   return text;
 }
diff --git a/packages/utilities/src/text/mod.ts b/packages/utilities/src/text/mod.ts
index 84e8bda..2e62a00 100644
--- a/packages/utilities/src/text/mod.ts
+++ b/packages/utilities/src/text/mod.ts
@@ -4,4 +4,5 @@
  * @module
  */
 
-export * from "./preprocess/mod.ts";
+export * from "./cleaner.ts";
+export * from "./vectorizer.ts"
\ No newline at end of file
diff --git a/packages/utilities/src/text/preprocess/mod.ts b/packages/utilities/src/text/preprocess/mod.ts
deleted file mode 100644
index 246a7c3..0000000
--- a/packages/utilities/src/text/preprocess/mod.ts
+++ /dev/null
@@ -1,29 +0,0 @@
-import type {
-  Cleaner,
-  Tokenizer,
-  Transformer,
-  Vectorizer,
-} from "../../utils/common_types.ts";
-
-// import { TextCleaner } from "./cleaner.ts";
-// import { SplitTokenizer } from "./tokenize/mod.ts";
-// import { CountVectorizer } from "./vectorize/mod.ts";
-// import { TfIdfTransformer } from "./transformer/mod.ts";
-
-/** TODO */
-interface PreprocessorConfig {
-  vectorizer: Vectorizer;
-  tokenizer: Tokenizer;
-  cleaner: Cleaner;
-  transformer: Transformer;
-};
-
-/** TODO */
-export class TextPreprocessor implements Partial<PreprocessorConfig> {
-  // todo
-}
-
-export * from "./cleaner.ts";
-export * from "./tokenize/mod.ts";
-export * from "./transformer/mod.ts";
-export * from "./vectorize/mod.ts";
diff --git a/packages/utilities/src/text/preprocess/tokenize/mod.ts b/packages/utilities/src/text/preprocess/tokenize/mod.ts
deleted file mode 100644
index 80a9941..0000000
--- a/packages/utilities/src/text/preprocess/tokenize/mod.ts
+++ /dev/null
@@ -1 +0,0 @@
-export { SplitTokenizer } from "./split.ts";
diff --git a/packages/utilities/src/text/preprocess/tokenize/split.ts b/packages/utilities/src/text/preprocess/tokenize/split.ts
deleted file mode 100644
index ba02928..0000000
--- a/packages/utilities/src/text/preprocess/tokenize/split.ts
+++ /dev/null
@@ -1,109 +0,0 @@
-import { DefaultIgnoreList } from "../../../constants/stop_words.ts";
-import type { BaseTokenizerOptions } from "../../../utils/common_types.ts";
-
-/** Tokenize text based on separator (whitespace) */
-export class SplitTokenizer {
-  /** Words to ignore from vocabulary */
-  skipWords: "english" | false | string[];
-  /** Configuration / Function for preprocessing */
-  vocabulary: Map<string, number>;
-  /** An internal counter for remembering the last index in vocabulary. */
-  #lastToken: Uint32Array;
-  constructor(
-    options: Partial<BaseTokenizerOptions & { indices: boolean }> = {},
-  ) {
-    this.skipWords = options.skipWords ?? false;
-    this.vocabulary = options.vocabulary ?? new Map();
-    this.#lastToken = new Uint32Array(1);
-    if (options.indices && !this.vocabulary.size) {
-      this.#lastToken[0] = 2;
-      this.vocabulary.set("__pad__", 0);
-      this.vocabulary.set("__unk__", 1);
-    }
-    if (this.vocabulary.size) {
-      this.#lastToken[0] = this.vocabulary.size;
-    }
-  }
-  get lastToken(): number {
-    return Atomics.load(this.#lastToken, 0);
-  }
-  /** Construct a vocabulary from a given set of text. */
-  fit(text: string | string[]): this {
-    if (Array.isArray(text)) {
-      let i = 0;
-      while (i < text.length) {
-        this.fit(text[i]);
-        i += 1;
-      }
-    } else {
-      const words = this.split(text);
-      let i = 0;
-      while (i < words.length) {
-        if (!this.vocabulary.has(words[i])) {
-          if (this.skipWords === "english") {
-            if (DefaultIgnoreList.includes(words[i])) {
-              i += 1;
-              continue;
-            }
-          } else if (Array.isArray(this.skipWords)) {
-            if (this.skipWords.includes(words[i])) {
-              i += 1;
-              continue;
-            }
-          }
-          const token = this.#incrementToken();
-          this.vocabulary.set(words[i], token);
-        }
-        i += 1;
-      }
-    }
-    return this;
-  }
-  #incrementToken(): number {
-    return Atomics.add(this.#lastToken, 0, 1);
-  }
-  /**
-   * Convert a document (string | array of strings) into vectors.
-   */
-  transform(text: string | string[]): number[][] {
-    if (!this.vocabulary.size) {
-      throw new Error(
-        "Tokenizer vocabulary not initialized yet. Call `Tokenizer()` with a custom vocabulary or use `.fit()` on text.",
-      );
-    }
-    if (Array.isArray(text)) {
-      const size = Math.max(...text.map((x) => this.split(x).length));
-      const res = Array(text.length);
-      let i = 0;
-      while (i < text.length) {
-        res[i] = this.#transform(text[i], size);
-        i++;
-      }
-      return res;
-    }
-    return [this.#transform(text, 0)];
-  }
-  #transform(text: string, size: number): number[] {
-    const words = this.split(text);
-    if (!size) size = words.length;
-    const res = new Array(size);
-    res.fill(this.vocabulary.get("__pad__") || 0);
-    let i = 0;
-    while (i < words.length && i < size) {
-      if (this.vocabulary.has(words[i])) {
-        const index = this.vocabulary.get(words[i]);
-        res[i] = typeof index === "number"
-          ? index
-          : this.vocabulary.get("__unk__") || 0;
-      } else {
-        res[i] = this.vocabulary.get("__unk__") || 0;
-      }
-      i++;
-    }
-    return res;
-  }
-  // TODO: Support custom split modes
-  split(text: string): string[] {
-    return text.split(" ");
-  }
-}
diff --git a/packages/utilities/src/text/preprocess/vectorize/count_vectorizer.ts b/packages/utilities/src/text/preprocess/vectorize/count_vectorizer.ts
deleted file mode 100644
index 189fe0f..0000000
--- a/packages/utilities/src/text/preprocess/vectorize/count_vectorizer.ts
+++ /dev/null
@@ -1,40 +0,0 @@
-import type { DataType, DType } from "../../../utils/common_types.ts";
-import { getConstructor } from "../../../utils/mod.ts";
-import { Matrix } from "../../../mod.ts";
-
-/**
- * Convert tokens into vectors based on term frequency
- */
-export class CountVectorizer {
-  #vocabSize: number;
-  constructor(vocabSize: number) {
-    this.#vocabSize = vocabSize;
-  }
-  /**
-   * Convert a document (string | array of strings) into vectors.
-   */
-  transform<T extends DataType>(tokens: number[][], dType: T): Matrix<T> {
-    if (!this.#vocabSize) {
-      throw new Error("Vocab not initialized.");
-    }
-    const res = new Matrix(dType, [tokens.length, this.#vocabSize]);
-    let i = 0;
-    while (i < tokens.length) {
-      res.setRow(i, this.#transform<T>(tokens[i], dType));
-      i += 1;
-    }
-    return res as Matrix<T>;
-  }
-  #transform<T extends DataType>(tokens: number[], dType: T): DType<T> {
-    const res = new (getConstructor<T>(dType))(this.#vocabSize);
-    let i = 0;
-    while (i < tokens.length) {
-      if (tokens[i] < this.#vocabSize) {
-        // @ts-ignore No error here
-        res[tokens[i]] += typeof res[tokens[i]] === "bigint" ? 1n : 1;
-      }
-      i += 1;
-    }
-    return res as DType<T>;
-  }
-}
diff --git a/packages/utilities/src/text/preprocess/vectorize/mod.ts b/packages/utilities/src/text/preprocess/vectorize/mod.ts
deleted file mode 100644
index 2f9fa65..0000000
--- a/packages/utilities/src/text/preprocess/vectorize/mod.ts
+++ /dev/null
@@ -1,2 +0,0 @@
-export { CountVectorizer } from "./count_vectorizer.ts";
-export { MultiHotVectorizer } from "./multi_hot.ts";
diff --git a/packages/utilities/src/text/preprocess/vectorize/multi_hot.ts b/packages/utilities/src/text/preprocess/vectorize/multi_hot.ts
deleted file mode 100644
index ba7800a..0000000
--- a/packages/utilities/src/text/preprocess/vectorize/multi_hot.ts
+++ /dev/null
@@ -1,39 +0,0 @@
-import type { DataType, DType } from "../../../utils/common_types.ts";
-import { getConstructor } from "../../../utils/mod.ts";
-import { Matrix } from "../../../mod.ts";
-
-/**
- * Convert tokens into vectors based on term frequency
- */
-export class MultiHotVectorizer {
-  #vocabSize: number;
-  constructor(vocabSize: number) {
-    this.#vocabSize = vocabSize;
-  }
-  /**
-   * Convert a document (string | array of strings) into vectors.
-   */
-  transform<T extends DataType>(tokens: number[][], dType: T): Matrix<T> {
-    if (!this.#vocabSize) {
-      throw new Error("Vocab not initialized.");
-    }
-    const res = new Matrix(dType, [tokens.length, this.#vocabSize]);
-    let i = 0;
-    while (i < tokens.length) {
-      res.setRow(i, this.#transform<T>(tokens[i], dType));
-      i += 1;
-    }
-    return res as Matrix<T>;
-  }
-  #transform<T extends DataType>(tokens: number[], dType: T): DType<T> {
-    const res = new (getConstructor<T>(dType))(this.#vocabSize);
-    let i = 0;
-    while (i < tokens.length) {
-      if (tokens[i] < this.#vocabSize) {
-        res[tokens[i]] = typeof res[tokens[i]] === "bigint" ? 1n : 1;
-      }
-      i += 1;
-    }
-    return res as DType<T>;
-  }
-}
diff --git a/packages/utilities/src/text/vectorizer.ts b/packages/utilities/src/text/vectorizer.ts
new file mode 100644
index 0000000..0f7e3ba
--- /dev/null
+++ b/packages/utilities/src/text/vectorizer.ts
@@ -0,0 +1,60 @@
+import { TfEncoder } from "../encoding/mod.ts";
+import { DiscreteMapper } from "../mapper/discrete.ts";
+import { Matrix } from "../mod.ts";
+import { TfIdfTransformer } from "../transformer/tfidf.ts";
+import type { DataType } from "../utils/common_types.ts";
+
+export class TextVectorizer {
+  mode: "tf" | "tfidf" | "indices";
+  mapper: DiscreteMapper<string>;
+  encoder?: TfEncoder;
+  transformer?: TfIdfTransformer;
+  constructor(mode: "tf" | "tfidf" | "indices" = "indices") {
+    this.mode = mode;
+    this.mapper = new DiscreteMapper();
+  }
+  fit(document: string | string[]) {
+    this.mapper.fit(
+      (Array.isArray(document) ? document.join(" ") : document).split(" ")
+    );
+    const tokens = Array.isArray(document)
+      ? document.map((x) => this.mapper.transform(x.split(" ")))
+      : [this.mapper.transform(document.split(" "))];
+    if (this.mode === "tf" || this.mode === "tfidf") {
+      this.encoder = new TfEncoder(this.mapper.mapping.size);
+      if (this.mode === "tfidf") {
+        this.transformer = new TfIdfTransformer();
+        this.transformer.fit(this.encoder.transform(tokens, "f32"));
+      }
+    }
+  }
+  transform<DT extends DataType>(
+    document: string | string[],
+    dType: DT
+  ): Matrix<DT> {
+    if (!this.mapper.mapping.size)
+      throw new Error("Text Vectorizer not trained yet. Use .fit() first.");
+    const tokens = Array.isArray(document)
+      ? document.map((x) => this.mapper.transform(x.split(" ")))
+      : [this.mapper.transform(document.split(" "))];
+    if (this.mode === "indices") {
+      const res = new Matrix(dType, [
+        tokens.length,
+        Math.max(...tokens.map((x) => x.length)),
+      ]);
+      for (let i = 0; i < res.nRows; i += 1) {
+        res.setRow(i, tokens[i]);
+      }
+      return res;
+    }
+    if (!this.encoder)
+      throw new Error("Text Vectorizer not trained yet. Use .fit() first.");
+    const encoded = this.encoder.transform(tokens, dType);
+    if (this.mode === "tf") return encoded;
+    else {
+      if (!this.transformer)
+        throw new Error("Text Vectorizer not trained yet. Use .fit() first.");
+      return this.transformer.transform<DT>(encoded);
+    }
+  }
+}
diff --git a/packages/utilities/src/text/preprocess/transformer/mod.ts b/packages/utilities/src/transformer/mod.ts
similarity index 100%
rename from packages/utilities/src/text/preprocess/transformer/mod.ts
rename to packages/utilities/src/transformer/mod.ts
diff --git a/packages/utilities/src/text/preprocess/transformer/tfidf.ts b/packages/utilities/src/transformer/tfidf.ts
similarity index 70%
rename from packages/utilities/src/text/preprocess/transformer/tfidf.ts
rename to packages/utilities/src/transformer/tfidf.ts
index cef8ea4..ce185a7 100644
--- a/packages/utilities/src/text/preprocess/transformer/tfidf.ts
+++ b/packages/utilities/src/transformer/tfidf.ts
@@ -1,11 +1,11 @@
-import type { DataType } from "../../../utils/common_types.ts";
-import type { Matrix, MatrixLike } from "../../../mod.ts";
-import { multiplyDiags } from "../../../utils/math.ts";
+import type { DataType } from "../utils/common_types.ts";
+import type { Matrix, MatrixLike } from "../mod.ts";
+import { multiplyDiags } from "../utils/math.ts";
 
-/** Convert tf features (CountVectorizer) into tf-idf features. */
+/** Convert tf features (Count) into tf-idf features. */
 export class TfIdfTransformer {
-  idf: null | Float64Array;
-  constructor({ idf }: { idf?: Float64Array } = {}) {
+  idf: null | Float32Array;
+  constructor({ idf }: { idf?: Float32Array } = {}) {
     this.idf = idf ?? null;
   }
   /**
@@ -20,7 +20,7 @@ export class TfIdfTransformer {
     };
     const freq = data.rowSum();
 
-    const idf = new Float64Array(freq.length);
+    const idf = new Float32Array(freq.length);
 
     let i = 0;
     while (i < idf.length) {
@@ -32,6 +32,7 @@ export class TfIdfTransformer {
   }
   /**
    * Transform an tf features into tf-idf features.
+   * Mutates the input.
    * @param data tf features from CountVectorizer
    * @returns Sparse matrix of Tf-Idf features
    */
diff --git a/packages/utilities/src/utils/common_types.ts b/packages/utilities/src/utils/common_types.ts
index d373484..005e0c9 100644
--- a/packages/utilities/src/utils/common_types.ts
+++ b/packages/utilities/src/utils/common_types.ts
@@ -51,27 +51,34 @@ interface TypedArrayValueMapping {
   f64: number;
 }
 
-export type DTypeValue<T extends keyof TypedArrayValueMapping> = T extends
-  keyof TypedArrayValueMapping ? TypedArrayValueMapping[T] : never;
+export type DTypeValue<T extends keyof TypedArrayValueMapping> =
+  T extends keyof TypedArrayValueMapping ? TypedArrayValueMapping[T] : never;
 
 type AddableTypes = number | bigint;
 
 export type AddDTypeValues<
   T1 extends AddableTypes,
-  T2 extends AddableTypes,
-> = T1 extends number ? T2 extends number ? number
-  : T2 extends bigint ? bigint
-  : never
-  : T1 extends bigint ? T2 extends number ? bigint
-    : T2 extends bigint ? bigint
+  T2 extends AddableTypes
+> = T1 extends number
+  ? T2 extends number
+    ? number
+    : T2 extends bigint
+    ? bigint
+    : never
+  : T1 extends bigint
+  ? T2 extends number
+    ? bigint
+    : T2 extends bigint
+    ? bigint
     : never
   : never;
 
-export type DType<T extends keyof TypedArrayMapping> = T extends
-  keyof TypedArrayMapping ? TypedArrayMapping[T] : never;
+export type DType<T extends keyof TypedArrayMapping> =
+  T extends keyof TypedArrayMapping ? TypedArrayMapping[T] : never;
 
 export type DTypeConstructor<T extends keyof TypedArrayConstructorMapping> =
-  T extends keyof TypedArrayConstructorMapping ? TypedArrayConstructorMapping[T]
+  T extends keyof TypedArrayConstructorMapping
+    ? TypedArrayConstructorMapping[T]
     : never;
 
 export type TypedArray =
@@ -93,8 +100,8 @@ export interface Sliceable {
     predicate: (
       value: unknown,
       index: number,
-      array: unknown[],
-    ) => value is unknown,
+      array: unknown[]
+    ) => value is unknown
   ): Sliceable;
   slice(start?: number, end?: number): Sliceable;
   length: number;
@@ -166,27 +173,29 @@ export interface StandardizeConfig {
   normalizeWhiteSpaces?: boolean;
   /** Strip Newlines */
   stripNewlines?: boolean;
+  /** Remove stop words from text */
+  removeStopWords?: "english" | false | string[];
 }
 
 export type VectorizerMode = "count" | "indices" | "multihot" | "tfidf";
 
 export type VectorizerModeConfig =
   | {
-    mode: "count";
-    config?: Partial<BaseVectorizerOptions>;
-  }
+      mode: "count";
+      config?: Partial<BaseVectorizerOptions>;
+    }
   | {
-    mode: "indices";
-    config?: Partial<BaseVectorizerOptions & { size: number }>;
-  }
+      mode: "indices";
+      config?: Partial<BaseVectorizerOptions & { size: number }>;
+    }
   | {
-    mode: "multihot";
-    config?: Partial<BaseVectorizerOptions>;
-  }
+      mode: "multihot";
+      config?: Partial<BaseVectorizerOptions>;
+    }
   | {
-    mode: "tfidf";
-    config?: Partial<BaseVectorizerOptions & { idf: Float64Array }>;
-  };
+      mode: "tfidf";
+      config?: Partial<BaseVectorizerOptions & { idf: Float64Array }>;
+    };
 
 export interface TokenizerModeConfig {
   mode: "whitespace";