From a830107c145b882fe72e076d1b122df231dd9fca Mon Sep 17 00:00:00 2001
From: Martin Stancsics <martin.stancsics@quantco.com>
Date: Thu, 17 Aug 2023 11:54:03 +0200
Subject: [PATCH] Allow missing values in `CategoricalMatrix` (#281)

* Add missing support to categoricals

* Rename functions

* Parametrize missing behavior in constructors

* Return a maskedarray from recover_orig

* Propagate missing_method when indexing

* Add tests

* Template all the things!

* Privatize has_missing attribute

* Add changelog entry

* Add option to treat missing values as a category

* Update changelog

* Raise if the missing category already exists

* Add tests for missing name and raise on existing

* Don't skip tests (they are fast)

* Apply suggestions from review

* Fix indxing

* Fix intercept name in formulas

* Add missing cateegorical functinoality to formulas

* Much cooler handlong of missing categoricals
---
 CHANGELOG.rst                             |   1 +
 src/tabmat/categorical_matrix.py          | 134 +++++++++++----
 src/tabmat/constructor.py                 |  24 +++
 src/tabmat/ext/cat_split_helpers-tmpl.cpp |  97 +++++++----
 src/tabmat/ext/categorical.pyx            |  70 ++++----
 src/tabmat/ext/split.pyx                  |  50 ++++--
 src/tabmat/formula.py                     |  43 ++++-
 tests/test_categorical_matrix.py          | 188 ++++++++++++++++++----
 tests/test_formula.py                     | 110 +++++++++++++
 tests/test_split_matrix.py                | 103 +++++++++---
 10 files changed, 657 insertions(+), 163 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index b3750c98..b948d334 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -14,6 +14,7 @@ Unreleased
 
 - Add column name and term name metadata to ``MatrixBase`` objects. These are automatically populated when initializing a ``MatrixBase`` from a ``pandas.DataFrame``. In addition, they can be accessed and modified via the ``column_names`` and ``term_names`` properties.
 - Add a formula interface for creating tabmat matrices from pandas data frames. See :func:`tabmat.from_formula` for details.
+- Add support for missing values in ``CategoricalMatrix`` by either creating a separate category for them or treating them as all-zero rows.
 
 **Other changes:**
 
diff --git a/src/tabmat/categorical_matrix.py b/src/tabmat/categorical_matrix.py
index 1180646c..e981bb2e 100644
--- a/src/tabmat/categorical_matrix.py
+++ b/src/tabmat/categorical_matrix.py
@@ -169,14 +169,14 @@ def matvec(mat, vec):
 from scipy import sparse as sps
 
 from .ext.categorical import (
-    matvec,
-    matvec_drop_first,
-    multiply_drop_first,
-    sandwich_categorical,
-    sandwich_categorical_drop_first,
-    subset_categorical_drop_first,
-    transpose_matvec,
-    transpose_matvec_drop_first,
+    matvec_complex,
+    matvec_fast,
+    multiply_complex,
+    sandwich_categorical_complex,
+    sandwich_categorical_fast,
+    subset_categorical_complex,
+    transpose_matvec_complex,
+    transpose_matvec_fast,
 )
 from .ext.split import sandwich_cat_cat, sandwich_cat_dense
 from .matrix_base import MatrixBase
@@ -237,6 +237,17 @@ class CategoricalMatrix(MatrixBase):
         drop the first level of the dummy encoding. This allows a CategoricalMatrix
         to be used in an unregularized setting.
 
+    cat_missing_method: str {'fail'|'zero'|'convert'}, default 'fail'
+        - if 'fail', raise an error if there are missing values.
+        - if 'zero', missing values will represent all-zero indicator columns.
+        - if 'convert', missing values will be converted to the ``cat_missing_name``
+          category.
+
+    cat_missing_name: str, default '(MISSING)'
+        Name of the category to which missing values will be converted if
+        ``cat_missing_method='convert'``. If this category already exists, an error
+        will be raised.
+
     dtype:
         data type
     """
@@ -249,15 +260,46 @@ def __init__(
         column_name: Optional[str] = None,
         term_name: Optional[str] = None,
         column_name_format: str = "{name}[{category}]",
+        cat_missing_method: str = "fail",
+        cat_missing_name: str = "(MISSING)",
     ):
-        if pd.isnull(cat_vec).any():
-            raise ValueError("Categorical data can't have missing values.")
+        if cat_missing_method not in ["fail", "zero", "convert"]:
+            raise ValueError(
+                "cat_missing_method must be one of 'fail' 'zero' or 'convert', "
+                f" got {cat_missing_method}"
+            )
+        self._missing_method = cat_missing_method
+        self._missing_category = cat_missing_name
 
         if isinstance(cat_vec, pd.Categorical):
             self.cat = cat_vec
         else:
             self.cat = pd.Categorical(cat_vec)
 
+        if pd.isnull(self.cat).any():
+            if self._missing_method == "fail":
+                raise ValueError(
+                    "Categorical data can't have missing values "
+                    "if cat_missing_method='fail'."
+                )
+
+            elif self._missing_method == "convert":
+                if self._missing_category in self.cat.categories:
+                    raise ValueError(
+                        f"Missing category {self._missing_category} already exists."
+                    )
+
+                self.cat = self.cat.add_categories([self._missing_category])
+
+                self.cat[pd.isnull(self.cat)] = self._missing_category
+                self._has_missings = False
+
+            else:
+                self._has_missings = True
+
+        else:
+            self._has_missings = False
+
         self.drop_first = drop_first
         self.shape = (len(self.cat), len(self.cat.categories) - int(drop_first))
         self.indices = self.cat.codes.astype(np.int32)
@@ -279,7 +321,20 @@ def recover_orig(self) -> np.ndarray:
 
         Test: matrix/test_categorical_matrix::test_recover_orig
         """
-        return self.cat.categories[self.cat.codes]
+        orig = self.cat.categories[self.cat.codes].to_numpy()
+
+        if self._has_missings:
+            orig = orig.view(np.ma.MaskedArray)
+            orig.mask = self.cat.codes == -1
+        elif (
+            self._missing_method == "convert"
+            and self._missing_category in self.cat.categories
+        ):
+            orig = orig.view(np.ma.MaskedArray)
+            missing_code = self.cat.categories.get_loc(self._missing_category)
+            orig.mask = self.cat.codes == missing_code
+
+        return orig
 
     def _matvec_setup(
         self,
@@ -335,12 +390,18 @@ def matvec(
         if out is None:
             out = np.zeros(self.shape[0], dtype=other_m.dtype)
 
-        if self.drop_first:
-            matvec_drop_first(
-                self.indices, other_m, self.shape[0], cols, self.shape[1], out
+        if self.drop_first or self._has_missings:
+            matvec_complex(
+                self.indices,
+                other_m,
+                self.shape[0],
+                cols,
+                self.shape[1],
+                out,
+                self.drop_first,
             )
         else:
-            matvec(self.indices, other_m, self.shape[0], cols, self.shape[1], out)
+            matvec_fast(self.indices, other_m, self.shape[0], cols, self.shape[1], out)
 
         if is_int:
             return out.astype(int)
@@ -402,12 +463,19 @@ def transpose_matvec(
         if cols is not None:
             cols = set_up_rows_or_cols(cols, self.shape[1])
 
-        if self.drop_first:
-            transpose_matvec_drop_first(
-                self.indices, vec, self.shape[1], vec.dtype, rows, cols, out
+        if self.drop_first or self._has_missings:
+            transpose_matvec_complex(
+                self.indices,
+                vec,
+                self.shape[1],
+                vec.dtype,
+                rows,
+                cols,
+                out,
+                self.drop_first,
             )
         else:
-            transpose_matvec(
+            transpose_matvec_fast(
                 self.indices, vec, self.shape[1], vec.dtype, rows, cols, out
             )
 
@@ -438,12 +506,12 @@ def sandwich(
         """
         d = np.asarray(d)
         rows = set_up_rows_or_cols(rows, self.shape[0])
-        if self.drop_first:
-            res_diag = sandwich_categorical_drop_first(
-                self.indices, d, rows, d.dtype, self.shape[1]
+        if self.drop_first or self._has_missings:
+            res_diag = sandwich_categorical_complex(
+                self.indices, d, rows, d.dtype, self.shape[1], self.drop_first
             )
         else:
-            res_diag = sandwich_categorical(
+            res_diag = sandwich_categorical_fast(
                 self.indices, d, rows, d.dtype, self.shape[1]
             )
 
@@ -490,9 +558,9 @@ def getcol(self, i: int) -> SparseMatrix:
 
     def tocsr(self) -> sps.csr_matrix:
         """Return scipy csr representation of matrix."""
-        if self.drop_first:
-            nnz, indices, indptr = subset_categorical_drop_first(
-                self.indices, self.shape[1]
+        if self.drop_first or self._has_missings:
+            nnz, indices, indptr = subset_categorical_complex(
+                self.indices, self.shape[1], self.drop_first
             )
             return sps.csr_matrix(
                 (np.ones(nnz, dtype=int), indices, indptr), shape=self.shape
@@ -549,6 +617,7 @@ def __getitem__(self, item):
                 dtype=self.dtype,
                 column_name=self._colname,
                 column_name_format=self._colname_format,
+                cat_missing_method=self._missing_method,
             )
         else:
             # return a SparseMatrix if we subset columns
@@ -576,15 +645,17 @@ def _cross_dense(
 
         res = sandwich_cat_dense(
             self.indices,
-            self.shape[1] + self.drop_first,
+            self.shape[1],
             d,
             other,
             rows,
             R_cols,
             is_c_contiguous,
+            has_missings=self._has_missings,
+            drop_first=self.drop_first,
         )
 
-        res = _row_col_indexing(res[self.drop_first :], L_cols, None)
+        res = _row_col_indexing(res, L_cols, None)
         return res
 
     def _cross_categorical(
@@ -612,6 +683,8 @@ def _cross_categorical(
             d.dtype,
             self.drop_first,
             other.drop_first,
+            self._has_missings,
+            other._has_missings,
         )
 
         res = _row_col_indexing(res, L_cols, R_cols)
@@ -642,14 +715,15 @@ def multiply(self, other) -> SparseMatrix:
                 f"Shapes do not match. Expected length of {self.shape[0]}. Got {len(other)}."
             )
 
-        if self.drop_first:
+        if self.drop_first or self._has_missings:
             return SparseMatrix(
                 sps.csr_matrix(
-                    multiply_drop_first(
+                    multiply_complex(
                         indices=self.indices,
                         d=np.squeeze(other),
                         ncols=self.shape[1],
                         dtype=other.dtype,
+                        drop_first=self.drop_first,
                     ),
                     shape=self.shape,
                 )
diff --git a/src/tabmat/constructor.py b/src/tabmat/constructor.py
index 3426a76b..9849cbde 100644
--- a/src/tabmat/constructor.py
+++ b/src/tabmat/constructor.py
@@ -29,6 +29,8 @@ def from_pandas(
     cat_position: str = "expand",
     drop_first: bool = False,
     categorical_format: str = "{name}[{category}]",
+    cat_missing_method: str = "fail",
+    cat_missing_name: str = "(MISSING)",
 ) -> MatrixBase:
     """
     Transform a pandas.DataFrame into an efficient SplitMatrix. For most users, this
@@ -58,6 +60,14 @@ def from_pandas(
         If true, categoricals variables will have their first category dropped.
         This allows multiple categorical variables to be included in an
         unregularized model. If False, all categories are included.
+    cat_missing_method: str {'fail'|'zero'|'convert'}, default 'fail'
+        How to handle missing values in categorical columns:
+        - if 'fail', raise an error if there are missing values
+        - if 'zero', missing values will represent all-zero indicator columns.
+        - if 'convert', missing values will be converted to the '(MISSING)' category.
+    cat_missing_name: str, default '(MISSING)'
+        Name of the category to which missing values will be converted if
+        ``cat_missing_method='convert'``.
 
     Returns
     -------
@@ -87,6 +97,8 @@ def from_pandas(
                 column_name=colname,
                 term_name=colname,
                 column_name_format=categorical_format,
+                cat_missing_method=cat_missing_method,
+                cat_missing_name=cat_missing_name,
             )
             if len(coldata.cat.categories) < cat_threshold:
                 (
@@ -207,6 +219,8 @@ def from_formula(
     cat_threshold: int = 4,
     interaction_separator: str = ":",
     categorical_format: str = "{name}[{category}]",
+    cat_missing_method: str = "fail",
+    cat_missing_name: str = "(MISSING)",
     intercept_name: str = "Intercept",
     include_intercept: bool = False,
     add_column_for_intercept: bool = True,
@@ -237,6 +251,14 @@ def from_formula(
     categorical_format: str, default "{name}[T.{category}]"
         The format string used to generate the names of categorical variables.
         Has to include the placeholders ``{name}`` and ``{category}``.
+    cat_missing_method: str {'fail'|'zero'|'convert'}, default 'fail'
+        How to handle missing values in categorical columns:
+        - if 'fail', raise an error if there are missing values
+        - if 'zero', missing values will represent all-zero indicator columns.
+        - if 'convert', missing values will be converted to the '(MISSING)' category.
+    cat_missing_name: str, default '(MISSING)'
+        Name of the category to which missing values will be converted if
+        ``cat_missing_method='convert'``.
     intercept_name: str, default "Intercept"
         The name of the intercept column.
     include_intercept: bool, default False
@@ -274,6 +296,8 @@ def from_formula(
         sparse_threshold=sparse_threshold,
         cat_threshold=cat_threshold,
         add_column_for_intercept=add_column_for_intercept,
+        cat_missing_method=cat_missing_method,
+        cat_missing_name=cat_missing_name,
     )
     result = materializer.get_model_matrix(spec)
 
diff --git a/src/tabmat/ext/cat_split_helpers-tmpl.cpp b/src/tabmat/ext/cat_split_helpers-tmpl.cpp
index c40f851c..5cd7ef5a 100644
--- a/src/tabmat/ext/cat_split_helpers-tmpl.cpp
+++ b/src/tabmat/ext/cat_split_helpers-tmpl.cpp
@@ -1,23 +1,26 @@
 #include <vector>
 
 
-<%def name="transpose_matvec(dropfirst)">
+<%def name="transpose_matvec(type)">
 template <typename Int, typename F>
-void _transpose_matvec_${dropfirst}(
+void _transpose_matvec_${type}(
     Int n_rows,
     Int* indices,
     F* other,
     F* res,
     Int res_size
+    % if type == 'all_rows_complex':
+        , bool drop_first
+    % endif
 ) {
     #pragma omp parallel
     {
         std::vector<F> restemp(res_size, 0.0);
         #pragma omp for
         for (Py_ssize_t i = 0; i < n_rows; i++) {
-            % if dropfirst == 'all_rows_drop_first':
-                Py_ssize_t col_idx = indices[i] - 1;
-                if (col_idx != -1) {
+            % if type == 'all_rows_complex':
+                Py_ssize_t col_idx = indices[i] - drop_first;
+                if (col_idx >= 0) {
                     restemp[col_idx] += other[i];
                 }
             % else:
@@ -33,8 +36,9 @@ void _transpose_matvec_${dropfirst}(
 </%def>
 
 
+<%def name="sandwich_cat_cat(type)">
 template <typename Int, typename F>
-void _sandwich_cat_cat(
+void _sandwich_cat_cat_${type}(
     F* d,
     const Int* i_indices,
     const Int* j_indices,
@@ -42,9 +46,11 @@ void _sandwich_cat_cat(
     Int len_rows,
     F* res,
     Int res_n_col,
-    Int res_size,
-    bool i_drop_first,
-    bool j_drop_first
+    Int res_size
+    % if type == 'complex':
+        , bool i_drop_first
+        , bool j_drop_first
+    % endif
 )
 {
     #pragma omp parallel
@@ -53,14 +59,25 @@ void _sandwich_cat_cat(
         # pragma omp for
         for (Py_ssize_t k_idx = 0; k_idx < len_rows; k_idx++) {
             Int k = rows[k_idx];
-            Int i = i_indices[k] - i_drop_first;
-            if (i == -1) {
-                continue;
-            }
-            Int j = j_indices[k] - j_drop_first;
-            if (j == -1) {
-                continue;
-            }
+
+            % if type == 'complex':
+                Int i = i_indices[k] - i_drop_first;
+                if (i < 0) {
+                    continue;
+                }
+            % else:
+                Int i = i_indices[k];
+            % endif
+
+            % if type == 'complex':
+                Int j = j_indices[k] - j_drop_first;
+                if (j < 0) {
+                    continue;
+                }
+            % else:
+                Int j = j_indices[k];
+            % endif
+
             restemp[(Py_ssize_t) i * res_n_col + j] += d[k];
         }
         for (Py_ssize_t i = 0; i < res_size; i++) {
@@ -69,11 +86,12 @@ void _sandwich_cat_cat(
         }
     }
 }
+</%def>
 
 
-<%def name="sandwich_cat_dense_tmpl(order)">
+<%def name="sandwich_cat_dense_tmpl(order, type)">
 template <typename Int, typename F>
-void _sandwich_cat_dense${order}(
+void _sandwich_cat_dense${order}_${type}(
     F* d,
     const Int* indices,
     Int* rows,
@@ -85,6 +103,9 @@ void _sandwich_cat_dense${order}(
     F* mat_j,
     Int mat_j_nrow,
     Int mat_j_ncol
+    % if type == 'complex':
+        , bool drop_first
+    % endif
     )
 {
     #pragma omp parallel
@@ -93,20 +114,28 @@ void _sandwich_cat_dense${order}(
         #pragma omp for
         for (Py_ssize_t k_idx = 0; k_idx < len_rows; k_idx++) {
             Py_ssize_t k = rows[k_idx];
-            Py_ssize_t i = indices[k];
             // MAYBE TODO: explore whether the column restriction slows things down a
             // lot, particularly if not restricting the columns allows using SIMD
             // instructions
             // MAYBE TODO: explore whether swapping the loop order for F-ordered mat_j
             // is useful.
-            for (Py_ssize_t j_idx = 0; j_idx < len_j_cols; j_idx++) {
-                Py_ssize_t j = j_cols[j_idx];
-                % if order == 'C':
-                    restemp[i * len_j_cols + j_idx] += d[k] * mat_j[k * mat_j_ncol + j];
-                % else:
-                    restemp[i * len_j_cols + j_idx] += d[k] * mat_j[j * mat_j_nrow + k];
-                % endif
-            }
+            % if type == 'complex':
+                Py_ssize_t i = indices[k] - drop_first;
+                if (i >= 0) {
+            % else:
+                Py_ssize_t i = indices[k];
+            % endif
+                for (Py_ssize_t j_idx = 0; j_idx < len_j_cols; j_idx++) {
+                    Py_ssize_t j = j_cols[j_idx];
+                    % if order == 'C':
+                        restemp[i * len_j_cols + j_idx] += d[k] * mat_j[k * mat_j_ncol + j];
+                    % else:
+                        restemp[i * len_j_cols + j_idx] += d[k] * mat_j[j * mat_j_nrow + k];
+                    % endif
+                }
+            % if type == 'complex':
+                }
+            % endif
         }
         for (Py_ssize_t i = 0; i < res_size; i++) {
             #pragma omp atomic
@@ -116,7 +145,11 @@ void _sandwich_cat_dense${order}(
 }
 </%def>
 
-${sandwich_cat_dense_tmpl('C')}
-${sandwich_cat_dense_tmpl('F')}
-${transpose_matvec('all_rows')}
-${transpose_matvec('all_rows_drop_first')}
+${sandwich_cat_dense_tmpl('C', 'fast')}
+${sandwich_cat_dense_tmpl('F', 'fast')}
+${sandwich_cat_dense_tmpl('C', 'complex')}
+${sandwich_cat_dense_tmpl('F', 'complex')}
+${sandwich_cat_cat('fast')}
+${sandwich_cat_cat('complex')}
+${transpose_matvec('all_rows_fast')}
+${transpose_matvec('all_rows_complex')}
diff --git a/src/tabmat/ext/categorical.pyx b/src/tabmat/ext/categorical.pyx
index 59ea308a..0c90b2c7 100644
--- a/src/tabmat/ext/categorical.pyx
+++ b/src/tabmat/ext/categorical.pyx
@@ -11,11 +11,11 @@ from libcpp cimport bool
 
 
 cdef extern from "cat_split_helpers.cpp":
-    void _transpose_matvec_all_rows[Int, F](Int, Int*, F*, F*, Int)
-    void _transpose_matvec_all_rows_drop_first[Int, F](Int, Int*, F*, F*, Int)
+    void _transpose_matvec_all_rows_fast[Int, F](Int, Int*, F*, F*, Int)
+    void _transpose_matvec_all_rows_complex[Int, F](Int, Int*, F*, F*, Int, bool)
 
 
-def transpose_matvec(
+def transpose_matvec_fast(
     int[:] indices,
     floating[:] other,
     int n_cols,
@@ -34,7 +34,7 @@ def transpose_matvec(
 
     # Case 1: No row or col restrictions
     if no_row_restrictions and no_col_restrictions:
-        _transpose_matvec_all_rows(n_rows, &indices[0], &other[0], &out[0], out_size)
+        _transpose_matvec_all_rows_fast(n_rows, &indices[0], &other[0], &out[0], out_size)
     # Case 2: row restrictions but no col restrictions
     elif no_col_restrictions:
         rows_view = rows
@@ -62,14 +62,15 @@ def transpose_matvec(
                     out[col] += other[row]
 
 
-def transpose_matvec_drop_first(
+def transpose_matvec_complex(
     int[:] indices,
     floating[:] other,
     int n_cols,
     dtype,
     rows,
     cols,
-    floating[:] out
+    floating[:] out,
+    bint drop_first
 ):
     cdef int row, row_idx, n_keep_rows, col_idx
     cdef int n_rows = len(indices)
@@ -81,15 +82,15 @@ def transpose_matvec_drop_first(
 
     # Case 1: No row or col restrictions
     if no_row_restrictions and no_col_restrictions:
-        _transpose_matvec_all_rows_drop_first(n_rows, &indices[0], &other[0], &out[0], out_size)
+        _transpose_matvec_all_rows_complex(n_rows, &indices[0], &other[0], &out[0], out_size, drop_first)
     # Case 2: row restrictions but no col restrictions
     elif no_col_restrictions:
         rows_view = rows
         n_keep_rows = len(rows_view)
         for row_idx in range(n_keep_rows):
             row = rows_view[row_idx]
-            col_idx = indices[row] - 1
-            if col_idx != -1:
+            col_idx = indices[row] - drop_first
+            if col_idx >= 0:
                 out[col_idx] += other[row]
     # Cases 3 and 4: col restrictions
     else:
@@ -97,8 +98,8 @@ def transpose_matvec_drop_first(
         # Case 3: Col restrictions but no row restrictions
         if no_row_restrictions:
             for row_idx in range(n_rows):
-                col_idx = indices[row_idx] - 1
-                if (col_idx != -1) and (cols_included[col_idx]):
+                col_idx = indices[row_idx] - drop_first
+                if (col_idx >= 0) and (cols_included[col_idx]):
                     out[col_idx] += other[row_idx]
         # Case 4: Both col restrictions and row restrictions
         else:
@@ -106,8 +107,8 @@ def transpose_matvec_drop_first(
             n_keep_rows = len(rows_view)
             for row_idx in range(n_keep_rows):
                 row = rows_view[row_idx]
-                col_idx = indices[row] - 1
-                if (col_idx != -1) and (cols_included[col_idx]):
+                col_idx = indices[row] - drop_first
+                if (col_idx >= 0) and (cols_included[col_idx]):
                     out[col_idx] += other[row]
 
 
@@ -119,7 +120,7 @@ def get_col_included(int[:] cols, int n_cols):
     return col_included
 
 
-def matvec(
+def matvec_fast(
     const int[:] indices,
     floating[:] other,
     int n_rows,
@@ -145,13 +146,14 @@ def matvec(
     return
 
 
-def matvec_drop_first(
+def matvec_complex(
     const int[:] indices, 
     floating[:] other, 
     int n_rows, 
     int[:] cols,
     int n_cols, 
-    floating[:] out_vec
+    floating[:] out_vec,
+    bint drop_first
 ):
     """See `matvec`. Here we drop the first category of the
     CategoricalMatrix so the indices refer to the column index + 1.
@@ -161,19 +163,19 @@ def matvec_drop_first(
 
     if cols is None:
         for i in prange(n_rows, nogil=True):
-            col_idx = indices[i] - 1  # reference category is always 0.
-            if col_idx != -1:
+            col_idx = indices[i] - drop_first  # reference category is always 0.
+            if col_idx >= 0:
                 out_vec[i] += other[col_idx]
     else:
         col_included = get_col_included(cols, n_cols)
         for i in prange(n_rows, nogil=True):
-            col_idx = indices[i] - 1
-            if (col_idx != -1) and (col_included[col_idx] == 1):
+            col_idx = indices[i] - drop_first
+            if (col_idx >= 0) and (col_included[col_idx] == 1):
                 out_vec[i] += other[col_idx]
     return
 
 
-def sandwich_categorical(
+def sandwich_categorical_fast(
     const int[:] indices,
     floating[:] d,
     int[:] rows,
@@ -191,12 +193,13 @@ def sandwich_categorical(
     return np.asarray(res)
 
 
-def sandwich_categorical_drop_first(
+def sandwich_categorical_complex(
     const int[:] indices,
     floating[:] d,
     int[:] rows,
     dtype,
-    int n_cols
+    int n_cols,
+    bint drop_first
 ):
     cdef floating[:] res = np.zeros(n_cols, dtype=dtype)
     cdef int col_idx, k, k_idx
@@ -204,26 +207,24 @@ def sandwich_categorical_drop_first(
 
     for k_idx in range(n_rows):
         k = rows[k_idx]
-        col_idx = indices[k] - 1  # reference category is always 0.
-        if col_idx != -1:
+        col_idx = indices[k] - drop_first  # reference category is always 0.
+        if col_idx >= 0:
             res[col_idx] += d[k]
     return np.asarray(res)
 
 
-def multiply_drop_first(
+def multiply_complex(
     int[:] indices,
     numeric[:] d,
     int ncols,
     dtype,
+    bint drop_first,
 ):
     """Multiply a CategoricalMatrix by a vector d.
 
     The output cannot be a CategoricalMatrix anymore. Here
     we return the inputs to transform to a csr_matrix.
 
-    Note that *_drop_first function assume the CategoricalMatrix
-    has its first category dropped.
-
     Parameters
     ----------
     indices:
@@ -255,9 +256,9 @@ def multiply_drop_first(
 
     for i in range(nrows):
         vnew_indptr[i] = nonref_cnt
-        if indices[i] != 0:
+        if indices[i] >= drop_first:
             vnew_data[nonref_cnt] = d[i]
-            vnew_indices[nonref_cnt] = indices[i] - 1
+            vnew_indices[nonref_cnt] = indices[i] - drop_first
             nonref_cnt += 1
 
     vnew_indptr[i+1] = nonref_cnt
@@ -265,9 +266,10 @@ def multiply_drop_first(
     return new_data[:nonref_cnt], new_indices[:nonref_cnt], new_indptr
 
 
-def subset_categorical_drop_first(
+def subset_categorical_complex(
     int[:] indices,
     int ncols,
+    bint drop_first
 ):
     """Construct the inputs to transform a CategoricalMatrix into a csr_matrix.
 
@@ -299,8 +301,8 @@ def subset_categorical_drop_first(
 
     for i in range(nrows):
         vnew_indptr[i] = nonzero_cnt
-        if indices[i] != 0:
-            vnew_indices[nonzero_cnt] = indices[i] - 1
+        if indices[i] >= drop_first:
+            vnew_indices[nonzero_cnt] = indices[i] - drop_first
             nonzero_cnt += 1
 
     vnew_indptr[i+1] = nonzero_cnt
diff --git a/src/tabmat/ext/split.pyx b/src/tabmat/ext/split.pyx
index 11f5d3c0..21d2f52e 100644
--- a/src/tabmat/ext/split.pyx
+++ b/src/tabmat/ext/split.pyx
@@ -21,9 +21,12 @@ ctypedef fused win_integral:
 
 
 cdef extern from "cat_split_helpers.cpp":
-    void _sandwich_cat_denseC[Int, F](F*, Int*, Int*, Int, Int*, Int, F*, Int, F*, Int, Int) nogil
-    void _sandwich_cat_denseF[Int, F](F*, Int*, Int*, Int, Int*, Int, F*, Int, F*, Int, Int) nogil
-    void _sandwich_cat_cat[Int, F](F*, const Int*, const Int*, Int*, Int, F*, Int, Int, bool, bool)
+    void _sandwich_cat_denseC_fast[Int, F](F*, Int*, Int*, Int, Int*, Int, F*, Int, F*, Int, Int) nogil
+    void _sandwich_cat_denseF_fast[Int, F](F*, Int*, Int*, Int, Int*, Int, F*, Int, F*, Int, Int) nogil
+    void _sandwich_cat_denseC_complex[Int, F](F*, Int*, Int*, Int, Int*, Int, F*, Int, F*, Int, Int, bool) nogil
+    void _sandwich_cat_denseF_complex[Int, F](F*, Int*, Int*, Int, Int*, Int, F*, Int, F*, Int, Int, bool) nogil
+    void _sandwich_cat_cat_fast[Int, F](F*, const Int*, const Int*, Int*, Int, F*, Int, Int)
+    void _sandwich_cat_cat_complex[Int, F](F*, const Int*, const Int*, Int*, Int, F*, Int, Int, bool, bool)
 
 
 def sandwich_cat_dense(
@@ -33,7 +36,9 @@ def sandwich_cat_dense(
     np.ndarray mat_j,
     int[:] rows,
     int[:] j_cols,
-    bool is_c_contiguous
+    bool is_c_contiguous,
+    bool has_missings,
+    bint drop_first
 ):
     cdef int nj_rows = mat_j.shape[0]
     cdef int nj_cols = mat_j.shape[1]
@@ -53,14 +58,24 @@ def sandwich_cat_dense(
 
     cdef floating* mat_j_p = <floating*>mat_j.data
 
-    if is_c_contiguous:
-        _sandwich_cat_denseC(d_p, i_indices_p, rows_p, n_active_rows, j_cols_p,
-                            nj_active_cols, &res[0, 0], res_size, mat_j_p,
-                            nj_rows, nj_cols)
+    if (not drop_first) and (not has_missings):
+        if is_c_contiguous:
+            _sandwich_cat_denseC_fast(d_p, i_indices_p, rows_p, n_active_rows, j_cols_p,
+                                      nj_active_cols, &res[0, 0], res_size, mat_j_p,
+                                      nj_rows, nj_cols)
+        else:
+            _sandwich_cat_denseF_fast(d_p, i_indices_p, rows_p, n_active_rows, j_cols_p,
+                                      nj_active_cols, &res[0, 0], res_size, mat_j_p,
+                                      nj_rows, nj_cols)
     else:
-        _sandwich_cat_denseF(d_p, i_indices_p, rows_p, n_active_rows, j_cols_p,
-                            nj_active_cols, &res[0, 0], res_size, mat_j_p,
-                            nj_rows, nj_cols)
+        if is_c_contiguous:
+            _sandwich_cat_denseC_complex(d_p, i_indices_p, rows_p, n_active_rows, j_cols_p,
+                                         nj_active_cols, &res[0, 0], res_size, mat_j_p,
+                                         nj_rows, nj_cols, drop_first)
+        else:
+            _sandwich_cat_denseF_complex(d_p, i_indices_p, rows_p, n_active_rows, j_cols_p,
+                                         nj_active_cols, &res[0, 0], res_size, mat_j_p,
+                                         nj_rows, nj_cols, drop_first)
 
     return np.asarray(res)
 
@@ -74,7 +89,9 @@ def sandwich_cat_cat(
     int[:] rows,
     dtype,
     bint i_drop_first,
-    bint j_drop_first
+    bint j_drop_first,
+    bool i_has_missings,
+    bool j_has_missings
 ):
     """
     (X1.T @ diag(d) @ X2)[i, j] = sum_k X1[k, i] d[k] X2[k, j]
@@ -83,8 +100,13 @@ def sandwich_cat_cat(
     cdef int n_rows = len(rows)
     cdef int res_size = res.size
 
-    _sandwich_cat_cat(&d[0], &i_indices[0], &j_indices[0], &rows[0], n_rows,
-                        &res[0, 0], j_ncol, res_size, i_drop_first, j_drop_first)
+    if i_drop_first or j_drop_first or i_has_missings or j_has_missings:
+        _sandwich_cat_cat_complex(&d[0], &i_indices[0], &j_indices[0], &rows[0],
+                                  n_rows, &res[0, 0], j_ncol, res_size,
+                                  i_drop_first, j_drop_first)
+    else:
+        _sandwich_cat_cat_fast(&d[0], &i_indices[0], &j_indices[0], &rows[0],
+                               n_rows, &res[0, 0], j_ncol, res_size)
 
     return np.asarray(res)
 
diff --git a/src/tabmat/formula.py b/src/tabmat/formula.py
index b97d5617..61335af3 100644
--- a/src/tabmat/formula.py
+++ b/src/tabmat/formula.py
@@ -44,6 +44,8 @@ def _init(self):
         self.add_column_for_intercept = self.params.get(
             "add_column_for_intercept", True
         )
+        self.cat_missing_method = self.params.get("cat_missing_method", "fail")
+        self.cat_missing_name = self.params.get("cat_missing_name", "(MISSING)")
 
         # We can override formulaic's C() function here
         self.context["C"] = _C
@@ -100,6 +102,8 @@ def _encode_categorical(
         return encode_contrasts(
             values,
             reduced_rank=reduced_rank,
+            missing_method=self.cat_missing_method,
+            missing_name=self.cat_missing_name,
             _metadata=metadata,
             _state=encoder_state,
             _spec=spec,
@@ -115,7 +119,11 @@ def _combine_columns(self, cols, spec, drop_rows):
         # Otherwise, concatenate columns into SplitMatrix
         return SplitMatrix(
             [
-                col[1].to_tabmat(self.dtype, self.sparse_threshold, self.cat_threshold)
+                col[1].to_tabmat(
+                    self.dtype,
+                    self.sparse_threshold,
+                    self.cat_threshold,
+                )
                 for col in cols
             ]
         )
@@ -145,7 +153,7 @@ def _build_model_matrix(self, spec: ModelSpec, drop_rows):
                     if not self.add_column_for_intercept:
                         continue
                     scoped_cols[
-                        "Intercept"
+                        self.intercept_name
                     ] = scoped_term.scale * self._encode_constant(
                         1, None, {}, spec, drop_rows
                     )
@@ -412,15 +420,31 @@ def __init__(
 
     @classmethod
     def from_categorical(
-        cls, cat: pandas.Categorical, reduced_rank: bool
+        cls,
+        cat: pandas.Categorical,
+        reduced_rank: bool,
+        missing_method: str = "fail",
+        missing_name: str = "(MISSING)",
     ) -> "_InteractableCategoricalVector":
         """Create an interactable categorical vector from a pandas categorical."""
         categories = list(cat.categories)
         codes = cat.codes.copy().astype(numpy.int64)
+
         if reduced_rank:
             codes[codes == 0] = -2
             codes[codes > 0] -= 1
             categories = categories[1:]
+
+        if missing_method == "fail" and -1 in codes:
+            raise ValueError(
+                "Categorical data can't have missing values "
+                "if [cat_]missing_method='fail'."
+            )
+
+        if missing_method == "convert" and -1 in codes:
+            codes[codes == -1] = len(categories)
+            categories.append(missing_name)
+
         return cls(
             codes=codes,
             categories=categories,
@@ -464,6 +488,7 @@ def to_tabmat(
             dtype=dtype,
             column_name=self.name,
             column_name_format="{category}",
+            cat_missing_method="zero",  # missing values are already handled
         )
 
         if (self.codes == -2).all():
@@ -631,6 +656,8 @@ def _C(
     data,
     *,
     levels: Optional[Iterable[str]] = None,
+    missing_method: str = "fail",
+    missing_name: str = "(MISSING)",
     spans_intercept: bool = True,
 ):
     """
@@ -654,6 +681,8 @@ def encoder(
             values,
             levels=levels,
             reduced_rank=reduced_rank,
+            missing_method=missing_method,
+            missing_name=missing_name,
             _state=encoder_state,
             _spec=model_spec,
         )
@@ -671,6 +700,8 @@ def encode_contrasts(
     data,
     *,
     levels: Optional[Iterable[str]] = None,
+    missing_method: str = "fail",
+    missing_name: str = "(MISSING)",
     reduced_rank: bool = False,
     _state=None,
     _spec=None,
@@ -689,8 +720,12 @@ def encode_contrasts(
     levels = levels if levels is not None else _state.get("categories")
     cat = pandas.Categorical(data._values, categories=levels)
     _state["categories"] = cat.categories
+
     return _InteractableCategoricalVector.from_categorical(
-        cat, reduced_rank=reduced_rank
+        cat,
+        reduced_rank=reduced_rank,
+        missing_method=missing_method,
+        missing_name=missing_name,
     )
 
 
diff --git a/tests/test_categorical_matrix.py b/tests/test_categorical_matrix.py
index f1be385a..9e7cb7a9 100644
--- a/tests/test_categorical_matrix.py
+++ b/tests/test_categorical_matrix.py
@@ -1,3 +1,5 @@
+import re
+
 import numpy as np
 import pandas as pd
 import pytest
@@ -6,55 +8,146 @@
 
 
 @pytest.fixture
-def cat_vec():
+def cat_vec(missing):
     m = 10
     seed = 0
     rng = np.random.default_rng(seed)
-    return rng.choice([0, 1, 2, np.inf, -np.inf], size=m)
+    vec = rng.choice([0, 1, 2, np.inf, -np.inf], size=m)
+    if missing:
+        vec[vec == 1] = np.nan
+    return vec
 
 
 @pytest.mark.parametrize("vec_dtype", [np.float64, np.float32, np.int64, np.int32])
-@pytest.mark.parametrize("drop_first", [True, False])
-def test_recover_orig(cat_vec, vec_dtype, drop_first):
-    orig_recovered = CategoricalMatrix(cat_vec, drop_first=drop_first).recover_orig()
+@pytest.mark.parametrize("drop_first", [True, False], ids=["drop_first", "no_drop"])
+@pytest.mark.parametrize("missing", [True, False], ids=["missing", "no_missing"])
+@pytest.mark.parametrize("cat_missing_method", ["fail", "zero", "convert"])
+def test_recover_orig(cat_vec, vec_dtype, drop_first, missing, cat_missing_method):
+    if missing and cat_missing_method == "fail":
+        with pytest.raises(
+            ValueError, match="Categorical data can't have missing values"
+        ):
+            CategoricalMatrix(
+                cat_vec, drop_first=drop_first, cat_missing_method=cat_missing_method
+            )
+        return
+
+    orig_recovered = CategoricalMatrix(
+        cat_vec, drop_first=drop_first, cat_missing_method=cat_missing_method
+    ).recover_orig()
     np.testing.assert_equal(orig_recovered, cat_vec)
 
 
 @pytest.mark.parametrize("vec_dtype", [np.float64, np.float32, np.int64, np.int32])
-@pytest.mark.parametrize("drop_first", [True, False])
-def test_csr_matvec_categorical(cat_vec, vec_dtype, drop_first):
-    mat = pd.get_dummies(cat_vec, drop_first=drop_first, dtype="uint8")
-    cat_mat = CategoricalMatrix(cat_vec, drop_first=drop_first)
+@pytest.mark.parametrize("drop_first", [True, False], ids=["drop_first", "no_drop"])
+@pytest.mark.parametrize("missing", [True, False], ids=["missing", "no_missing"])
+@pytest.mark.parametrize("cat_missing_method", ["fail", "zero", "convert"])
+def test_csr_matvec_categorical(
+    cat_vec, vec_dtype, drop_first, missing, cat_missing_method
+):
+    if missing and cat_missing_method == "fail":
+        with pytest.raises(
+            ValueError, match="Categorical data can't have missing values"
+        ):
+            CategoricalMatrix(
+                cat_vec, drop_first=drop_first, cat_missing_method=cat_missing_method
+            )
+        return
+
+    mat = pd.get_dummies(
+        cat_vec,
+        drop_first=drop_first,
+        dtype="uint8",
+        dummy_na=(cat_missing_method == "convert" and missing),
+    )
+    cat_mat = CategoricalMatrix(
+        cat_vec, drop_first=drop_first, cat_missing_method=cat_missing_method
+    )
     vec = np.random.choice(np.arange(4, dtype=vec_dtype), mat.shape[1])
     res = cat_mat.matvec(vec)
     np.testing.assert_allclose(res, cat_mat.A.dot(vec))
 
 
-@pytest.mark.parametrize("drop_first", [True, False])
-def test_tocsr(cat_vec, drop_first):
-    cat_mat = CategoricalMatrix(cat_vec, drop_first=drop_first)
+@pytest.mark.parametrize("drop_first", [True, False], ids=["drop_first", "no_drop"])
+@pytest.mark.parametrize("missing", [True, False], ids=["missing", "no_missing"])
+@pytest.mark.parametrize("cat_missing_method", ["fail", "zero", "convert"])
+def test_tocsr(cat_vec, drop_first, missing, cat_missing_method):
+    if missing and cat_missing_method == "fail":
+        with pytest.raises(
+            ValueError, match="Categorical data can't have missing values"
+        ):
+            CategoricalMatrix(
+                cat_vec, drop_first=drop_first, cat_missing_method=cat_missing_method
+            )
+        return
+
+    cat_mat = CategoricalMatrix(
+        cat_vec, drop_first=drop_first, cat_missing_method=cat_missing_method
+    )
     res = cat_mat.tocsr().A
-    expected = pd.get_dummies(cat_vec, drop_first=drop_first, dtype="uint8")
+    expected = pd.get_dummies(
+        cat_vec,
+        drop_first=drop_first,
+        dtype="uint8",
+        dummy_na=cat_missing_method == "convert" and missing,
+    )
     np.testing.assert_allclose(res, expected)
 
 
-@pytest.mark.parametrize("drop_first", [True, False])
-def test_transpose_matvec(cat_vec, drop_first):
-    cat_mat = CategoricalMatrix(cat_vec, drop_first=drop_first)
+@pytest.mark.parametrize("drop_first", [True, False], ids=["drop_first", "no_drop"])
+@pytest.mark.parametrize("missing", [True, False], ids=["missing", "no_missing"])
+@pytest.mark.parametrize("cat_missing_method", ["fail", "zero", "convert"])
+def test_transpose_matvec(cat_vec, drop_first, missing, cat_missing_method):
+    if missing and cat_missing_method == "fail":
+        with pytest.raises(
+            ValueError, match="Categorical data can't have missing values"
+        ):
+            CategoricalMatrix(
+                cat_vec, drop_first=drop_first, cat_missing_method=cat_missing_method
+            )
+        return
+
+    cat_mat = CategoricalMatrix(
+        cat_vec, drop_first=drop_first, cat_missing_method=cat_missing_method
+    )
     other = np.random.random(cat_mat.shape[0])
     res = cat_mat.transpose_matvec(other)
-    expected = pd.get_dummies(cat_vec, drop_first=drop_first, dtype="uint8").T.dot(
-        other
-    )
+    expected = pd.get_dummies(
+        cat_vec,
+        drop_first=drop_first,
+        dtype="uint8",
+        dummy_na=cat_missing_method == "convert" and missing,
+    ).T.dot(other)
     np.testing.assert_allclose(res, expected)
 
 
-@pytest.mark.parametrize("drop_first", [True, False])
-def test_multiply(cat_vec, drop_first):
-    cat_mat = CategoricalMatrix(cat_vec, drop_first=drop_first)
+@pytest.mark.parametrize("drop_first", [True, False], ids=["drop_first", "no_drop"])
+@pytest.mark.parametrize("missing", [True, False], ids=["missing", "no_missing"])
+@pytest.mark.parametrize("cat_missing_method", ["fail", "zero", "convert"])
+def test_multiply(cat_vec, drop_first, missing, cat_missing_method):
+    if missing and cat_missing_method == "fail":
+        with pytest.raises(
+            ValueError, match="Categorical data can't have missing values"
+        ):
+            CategoricalMatrix(
+                cat_vec, drop_first=drop_first, cat_missing_method=cat_missing_method
+            )
+        return
+
+    cat_mat = CategoricalMatrix(
+        cat_vec, drop_first=drop_first, cat_missing_method=cat_missing_method
+    )
     other = np.arange(len(cat_vec))[:, None]
     actual = cat_mat.multiply(other)
-    expected = pd.get_dummies(cat_vec, drop_first=drop_first, dtype="uint8") * other
+    expected = (
+        pd.get_dummies(
+            cat_vec,
+            drop_first=drop_first,
+            dtype="uint8",
+            dummy_na=cat_missing_method == "convert" and missing,
+        )
+        * other
+    )
     np.testing.assert_allclose(actual.A, expected)
 
 
@@ -65,9 +158,48 @@ def test_nulls(mi_element):
         CategoricalMatrix(vec)
 
 
-@pytest.mark.parametrize("drop_first", [True, False])
-def test_categorical_indexing(drop_first):
-    catvec = [0, 1, 2, 0, 1, 2, 0, 1, 2, 3, 3]
-    mat = CategoricalMatrix(catvec, drop_first=drop_first)
-    expected = pd.get_dummies(catvec, drop_first=drop_first).to_numpy()[:, [0, 1]]
+@pytest.mark.parametrize("cat_missing_name", ["(MISSING)", "__None__", "[NULL]"])
+def test_cat_missing_name(cat_missing_name):
+    vec = [None, "(MISSING)", "__None__", "a", "b"]
+    if cat_missing_name in vec:
+        with pytest.raises(
+            ValueError,
+            match=re.escape(f"Missing category {cat_missing_name} already exists."),
+        ):
+            CategoricalMatrix(
+                vec, cat_missing_method="convert", cat_missing_name=cat_missing_name
+            )
+    else:
+        cat = CategoricalMatrix(
+            vec, cat_missing_method="convert", cat_missing_name=cat_missing_name
+        )
+        assert set(cat.cat.categories) == set(vec) - {None} | {cat_missing_name}
+
+
+@pytest.mark.parametrize("drop_first", [True, False], ids=["drop_first", "no_drop"])
+@pytest.mark.parametrize("missing", [True, False], ids=["missing", "no_missing"])
+@pytest.mark.parametrize("cat_missing_method", ["fail", "zero", "convert"])
+def test_categorical_indexing(drop_first, missing, cat_missing_method):
+    if not missing:
+        cat_vec = [0, 1, 2, 0, 1, 2, 0, 1, 2, 3, 3]
+    else:
+        cat_vec = [0, None, 2, 0, None, 2, 0, None, 2, 3, 3]
+
+    if missing and cat_missing_method == "fail":
+        with pytest.raises(
+            ValueError, match="Categorical data can't have missing values"
+        ):
+            CategoricalMatrix(
+                cat_vec, drop_first=drop_first, cat_missing_method=cat_missing_method
+            )
+        return
+
+    mat = CategoricalMatrix(
+        cat_vec, drop_first=drop_first, cat_missing_method=cat_missing_method
+    )
+    expected = pd.get_dummies(
+        cat_vec,
+        drop_first=drop_first,
+        dummy_na=cat_missing_method == "convert" and missing,
+    ).to_numpy()[:, [0, 1]]
     np.testing.assert_allclose(mat[:, [0, 1]].A, expected)
diff --git a/tests/test_formula.py b/tests/test_formula.py
index 88a55c71..65930106 100644
--- a/tests/test_formula.py
+++ b/tests/test_formula.py
@@ -630,6 +630,116 @@ def test_interactable_vectors(left, right, reverse):
         assert result_vec.name == right.name + ":" + left.name
 
 
+@pytest.mark.parametrize("cat_missing_method", ["zero", "convert"])
+@pytest.mark.parametrize(
+    "cat_missing_name",
+    ["__missing__", "(MISSING)"],
+)
+def test_cat_missing_handling(cat_missing_method, cat_missing_name):
+    df = pd.DataFrame(
+        {
+            "cat_1": pd.Categorical(["a", "b", None, "b", "a"]),
+        }
+    )
+
+    mat_from_pandas = tm.from_pandas(
+        df,
+        cat_threshold=0,
+        cat_missing_method=cat_missing_method,
+        cat_missing_name=cat_missing_name,
+    )
+
+    mat_from_formula = tm.from_formula(
+        "cat_1 - 1",
+        df,
+        cat_threshold=0,
+        cat_missing_method=cat_missing_method,
+        cat_missing_name=cat_missing_name,
+    )
+
+    assert mat_from_pandas.column_names == mat_from_formula.column_names
+    assert mat_from_pandas.term_names == mat_from_formula.term_names
+    np.testing.assert_array_equal(mat_from_pandas.A, mat_from_formula.A)
+
+    mat_from_formula_new = mat_from_formula.model_spec.get_model_matrix(df)
+    assert mat_from_pandas.column_names == mat_from_formula_new.column_names
+    assert mat_from_pandas.term_names == mat_from_formula_new.term_names
+    np.testing.assert_array_equal(mat_from_pandas.A, mat_from_formula_new.A)
+
+
+def test_cat_missing_C():
+    df = pd.DataFrame(
+        {
+            "cat_1": pd.Categorical(["a", "b", None, "b", "a"]),
+            "cat_2": pd.Categorical(["1", "2", None, "1", "2"]),
+        }
+    )
+    formula = (
+        "C(cat_1, missing_method='convert', missing_name='M') "
+        "+ C(cat_2, missing_method='zero')"
+    )
+    expected_names = [
+        "C(cat_1, missing_method='convert', missing_name='M')[a]",
+        "C(cat_1, missing_method='convert', missing_name='M')[b]",
+        "C(cat_1, missing_method='convert', missing_name='M')[M]",
+        "C(cat_2, missing_method='zero')[1]",
+        "C(cat_2, missing_method='zero')[2]",
+    ]
+
+    result = tm.from_formula(formula, df)
+
+    assert result.column_names == expected_names
+    assert result.model_spec.get_model_matrix(df).column_names == expected_names
+
+
+@pytest.mark.parametrize(
+    "cat_missing_method", ["zero", "convert"], ids=["zero", "convert"]
+)
+def test_cat_missing_unseen(cat_missing_method):
+    df = pd.DataFrame(
+        {
+            "cat_1": pd.Categorical(["a", "b", None, "b", "a"]),
+        }
+    )
+    df_unseen = pd.DataFrame(
+        {
+            "cat_1": pd.Categorical(["a", None]),
+        }
+    )
+    result_seen = tm.from_formula(
+        "cat_1 - 1", df, cat_missing_method=cat_missing_method
+    )
+    result_unseen = result_seen.model_spec.get_model_matrix(df_unseen)
+
+    assert result_seen.column_names == result_unseen.column_names
+    if cat_missing_method == "convert":
+        expected_array = np.array([[1, 0, 0], [0, 0, 1]], dtype=np.float64)
+    elif cat_missing_method == "zero":
+        expected_array = np.array([[1, 0], [0, 0]], dtype=np.float64)
+
+    np.testing.assert_array_equal(result_unseen.A, expected_array)
+
+
+def test_cat_missing_interactions():
+    df = pd.DataFrame(
+        {
+            "cat_1": pd.Categorical(["a", "b", None, "b", "a"]),
+            "cat_2": pd.Categorical(["1", "2", None, "1", "2"]),
+        }
+    )
+    formula = "C(cat_1, missing_method='convert') : C(cat_2, missing_method='zero') - 1"
+    expected_names = [
+        "C(cat_1, missing_method='convert')[a]:C(cat_2, missing_method='zero')[1]",
+        "C(cat_1, missing_method='convert')[b]:C(cat_2, missing_method='zero')[1]",
+        "C(cat_1, missing_method='convert')[(MISSING)]:C(cat_2, missing_method='zero')[1]",
+        "C(cat_1, missing_method='convert')[a]:C(cat_2, missing_method='zero')[2]",
+        "C(cat_1, missing_method='convert')[b]:C(cat_2, missing_method='zero')[2]",
+        "C(cat_1, missing_method='convert')[(MISSING)]:C(cat_2, missing_method='zero')[2]",
+    ]
+
+    assert tm.from_formula(formula, df).column_names == expected_names
+
+
 # Tests from formulaic's test suite
 # ---------------------------------
 
diff --git a/tests/test_split_matrix.py b/tests/test_split_matrix.py
index 5de01638..a102e8a6 100644
--- a/tests/test_split_matrix.py
+++ b/tests/test_split_matrix.py
@@ -61,27 +61,32 @@ def split_mat() -> SplitMatrix:
     return mat
 
 
-def get_split_with_cat_components() -> (
-    List[Union[tm.SparseMatrix, tm.DenseMatrix, tm.CategoricalMatrix]]
-):
+def get_split_with_cat_components(
+    missing,
+) -> List[Union[tm.SparseMatrix, tm.DenseMatrix, tm.CategoricalMatrix]]:
     n_rows = 10
     np.random.seed(0)
     dense_1 = tm.DenseMatrix(np.random.random((n_rows, 3)))
     sparse_1 = tm.SparseMatrix(sps.random(n_rows, 3).tocsc())
-    cat = tm.CategoricalMatrix(np.random.choice(range(3), n_rows))
+    if missing:
+        cat = tm.CategoricalMatrix(
+            np.random.choice([0, 1, 2, None], n_rows), cat_missing_method="zero"
+        )
+    else:
+        cat = tm.CategoricalMatrix(np.random.choice(range(3), n_rows))
     dense_2 = tm.DenseMatrix(np.random.random((n_rows, 3)))
     sparse_2 = tm.SparseMatrix(sps.random(n_rows, 3, density=0.5).tocsc())
     cat_2 = tm.CategoricalMatrix(np.random.choice(range(3), n_rows), drop_first=True)
     return [dense_1, sparse_1, cat, dense_2, sparse_2, cat_2]
 
 
-def split_with_cat() -> SplitMatrix:
+def split_with_cat(missing) -> SplitMatrix:
     """Initialized with multiple sparse and dense parts and no indices."""
-    return tm.SplitMatrix(get_split_with_cat_components())
+    return tm.SplitMatrix(get_split_with_cat_components(missing))
 
 
-def split_with_cat_64() -> SplitMatrix:
-    mat = tm.SplitMatrix(get_split_with_cat_components())
+def split_with_cat_64(missing) -> SplitMatrix:
+    mat = tm.SplitMatrix(get_split_with_cat_components(missing))
     matrices = mat.matrices
 
     for i, mat_ in enumerate(mat.matrices):
@@ -99,7 +104,15 @@ def split_with_cat_64() -> SplitMatrix:
     return tm.SplitMatrix(matrices, mat.indices)
 
 
-@pytest.mark.parametrize("mat", [split_with_cat(), split_with_cat_64()])
+@pytest.mark.parametrize(
+    "mat",
+    [
+        split_with_cat(False),
+        split_with_cat_64(False),
+        split_with_cat(True),
+        split_with_cat_64(True),
+    ],
+)
 def test_init(mat: SplitMatrix):
     assert len(mat.indices) == 4
     assert len(mat.matrices) == 4
@@ -109,7 +122,15 @@ def test_init(mat: SplitMatrix):
     assert mat.matrices[2].shape == (10, 3)
 
 
-@pytest.mark.parametrize("mat", [split_mat(), split_with_cat(), split_with_cat_64()])
+@pytest.mark.parametrize(
+    "mat",
+    [
+        split_with_cat(False),
+        split_with_cat_64(False),
+        split_with_cat(True),
+        split_with_cat_64(True),
+    ],
+)
 def test_init_from_split(mat):
     np.testing.assert_array_equal(mat.A, tm.SplitMatrix([mat]).A)
     np.testing.assert_array_equal(
@@ -146,7 +167,15 @@ def test_sandwich_sparse_dense(X: np.ndarray, Acols, Bcols):
 
 
 # TODO: ensure cols are in order
-@pytest.mark.parametrize("mat", [split_mat(), split_with_cat(), split_with_cat_64()])
+@pytest.mark.parametrize(
+    "mat",
+    [
+        split_with_cat(False),
+        split_with_cat_64(False),
+        split_with_cat(True),
+        split_with_cat_64(True),
+    ],
+)
 @pytest.mark.parametrize(
     "cols",
     [None, [0], [1, 2, 3], [1, 5]],
@@ -160,7 +189,15 @@ def test_sandwich(mat: tm.SplitMatrix, cols):
         np.testing.assert_allclose(y1, y2, atol=1e-12)
 
 
-@pytest.mark.parametrize("mat", [split_mat(), split_with_cat(), split_with_cat_64()])
+@pytest.mark.parametrize(
+    "mat",
+    [
+        split_with_cat(False),
+        split_with_cat_64(False),
+        split_with_cat(True),
+        split_with_cat_64(True),
+    ],
+)
 @pytest.mark.parametrize("cols", [None, [0], [1, 2, 3], [1, 5]])
 def test_split_col_subsets(mat: tm.SplitMatrix, cols):
     subset_cols_indices, subset_cols, n_cols = mat._split_col_subsets(cols)
@@ -189,56 +226,66 @@ def _get_lengths(vec_list: List[Optional[np.ndarray]]):
             assert (mat.indices[i] == subset_cols_indices[i]).all()
 
 
-def random_split_matrix(seed=0, n_rows=10, n_cols_per=3):
+def random_split_matrix(seed=0, n_rows=10, n_cols_per=3, missing=False):
     if seed is not None:
         np.random.seed(seed)
     dense_1 = tm.DenseMatrix(np.random.random((n_rows, n_cols_per)))
     sparse = tm.SparseMatrix(sps.random(n_rows, n_cols_per).tocsc())
-    cat = tm.CategoricalMatrix(np.random.choice(range(n_cols_per), n_rows))
+    if missing:
+        cat = tm.CategoricalMatrix(
+            np.random.choice(list(range(n_cols_per)) + [None], n_rows),
+            cat_missing_method="zero",
+        )
+    else:
+        cat = tm.CategoricalMatrix(np.random.choice(range(n_cols_per), n_rows))
     dense_2 = tm.DenseMatrix(np.random.random((n_rows, n_cols_per)))
     cat_2 = tm.CategoricalMatrix(np.random.choice(range(n_cols_per), n_rows))
     mat = tm.SplitMatrix([dense_1, sparse, cat, dense_2, cat_2])
     return mat
 
 
-def many_random_tests(checker):
+def many_random_tests(checker, missing):
     for i in range(10):
         mat = random_split_matrix(
             seed=(1 if i == 0 else None),
             n_rows=np.random.randint(130),
             n_cols_per=1 + np.random.randint(10),
+            missing=missing,
         )
         checker(mat)
 
 
-def test_sandwich_many_types():
+@pytest.mark.parametrize("missing", [False, True], ids=["no_missing", "missing"])
+def test_sandwich_many_types(missing):
     def check(mat):
         d = np.random.random(mat.shape[0])
         res = mat.sandwich(d)
         expected = (mat.A.T * d[None, :]) @ mat.A
         np.testing.assert_allclose(res, expected)
 
-    many_random_tests(check)
+    many_random_tests(check, missing)
 
 
-def test_transpose_matvec_many_types():
+@pytest.mark.parametrize("missing", [False, True], ids=["no_missing", "missing"])
+def test_transpose_matvec_many_types(missing):
     def check(mat):
         d = np.random.random(mat.shape[0])
         res = mat.transpose_matvec(d)
         expected = mat.A.T.dot(d)
         np.testing.assert_almost_equal(res, expected)
 
-    many_random_tests(check)
+    many_random_tests(check, missing)
 
 
-def test_matvec_many_types():
+@pytest.mark.parametrize("missing", [False, True], ids=["no_missing", "missing"])
+def test_matvec_many_types(missing):
     def check(mat):
         d = np.random.random(mat.shape[1])
         res = mat.matvec(d)
         expected = mat.A.dot(d)
         np.testing.assert_almost_equal(res, expected)
 
-    many_random_tests(check)
+    many_random_tests(check, missing)
 
 
 def test_init_from_1d():
@@ -259,3 +306,17 @@ def test_matvec(n_rows):
     )
     mat = from_pandas(X, cat_threshold=0)
     np.testing.assert_allclose(mat.matvec(np.array(mat.shape[1] * [1])), n_cols)
+
+
+@pytest.mark.parametrize("cat_missing_method", ["fail", "zero", "convert"])
+def test_from_pandas_missing(cat_missing_method):
+    df = pd.DataFrame({"cat": pd.Categorical([1, 2, pd.NA, 1, 2, pd.NA])})
+    if cat_missing_method == "fail":
+        with pytest.raises(
+            ValueError, match="Categorical data can't have missing values"
+        ):
+            from_pandas(df, cat_missing_method=cat_missing_method)
+    elif cat_missing_method == "zero":
+        assert from_pandas(df, cat_missing_method=cat_missing_method).shape == (6, 2)
+    elif cat_missing_method == "convert":
+        assert from_pandas(df, cat_missing_method=cat_missing_method).shape == (6, 3)