diff --git a/nalgebra-sparse/src/csc.rs b/nalgebra-sparse/src/csc.rs
index 6cc06f1d3..877d95b7e 100644
--- a/nalgebra-sparse/src/csc.rs
+++ b/nalgebra-sparse/src/csc.rs
@@ -557,7 +557,6 @@ impl<T> CscMatrix<T> {
 
     /// Solves a lower triangular system, `self` is a matrix of NxN, and `b` is a column vector of size N
     /// Assuming that b is dense.
-    // TODO add an option here for assuming diagonal is one.
     pub fn dense_lower_triangular_solve(&self, b: &[T], out: &mut [T], unit_diagonal: bool)
     where
         T: RealField + Copy,
@@ -569,16 +568,62 @@ impl<T> CscMatrix<T> {
         let n = b.len();
 
         for i in 0..n {
-            let mul = out[i];
             let col = self.col(i);
+            let mut iter = col.row_indices().iter().zip(col.values().iter()).peekable();
+            while iter.next_if(|n| *n.0 < i).is_some() {}
+            if let Some(n) = iter.peek() {
+                if *n.0 == i && !unit_diagonal {
+                    assert!(*n.0 <= i);
+                    out[i] /= *n.1;
+                    iter.next();
+                }
+            }
+            let mul = out[i];
             for (&ri, &v) in col.row_indices().iter().zip(col.values().iter()) {
+                use std::cmp::Ordering::*;
                 // ensure that only using the lower part
-                if ri == i {
-                    if !unit_diagonal {
-                        out[ri] /= v;
-                    }
-                } else if ri > i {
-                    out[ri] -= v * mul;
+                match ri.cmp(&i) {
+                    Greater => out[ri] -= v * mul,
+                    Equal | Less => {}
+                }
+            }
+        }
+    }
+
+    /// Solves an upper triangular system, `self` is a matrix of NxN, and `b` is a column vector of size N
+    /// Assuming that b is dense.
+    pub fn dense_upper_triangular_solve(&self, b: &[T], out: &mut [T])
+    where
+        T: RealField + Copy,
+    {
+        assert_eq!(self.nrows(), self.ncols());
+        assert_eq!(self.ncols(), b.len());
+        assert_eq!(out.len(), b.len());
+        out.copy_from_slice(b);
+        let n = b.len();
+
+        for i in (0..n).rev() {
+            let col = self.col(i);
+            let mut iter = col
+                .row_indices()
+                .iter()
+                .zip(col.values().iter())
+                .rev()
+                .peekable();
+            while iter.next_if(|n| *n.0 > i).is_some() {}
+            if let Some(n) = iter.peek() {
+                if *n.0 == i {
+                    out[i] /= *n.1;
+                    iter.next();
+                }
+            }
+            // introduce a NaN, intentionally, if the diagonal doesn't have a value.
+            let mul = out[i];
+            for (&row, &v) in iter {
+                use std::cmp::Ordering::*;
+                match row.cmp(&i) {
+                    Less => out[row] -= v * mul,
+                    Equal | Greater => {}
                 }
             }
         }
@@ -619,9 +664,17 @@ impl<T> CscMatrix<T> {
                 .zip(col.values().iter())
                 .rev()
                 .peekable();
+
+            while iter.next_if(|n| *n.0 > row).is_some() {}
+            match iter.peek() {
+                Some((&r, &l_val)) if r == row => out[i] /= l_val,
+                // here it now becomes implicitly 0,
+                // likely this should introduce NaN or some other behavior.
+                _ => {}
+            }
             let mul = out[i];
-            for (ni, &nrow) in out_sparsity_pattern.iter().enumerate().rev().skip(i + 1) {
-                assert!(nrow < row);
+            for (ni, &nrow) in out_sparsity_pattern[i + 1..].iter().enumerate().rev() {
+                assert!(nrow > row);
                 while iter.next_if(|n| *n.0 > nrow).is_some() {}
                 let l_val = match iter.peek() {
                     Some((&r, &l_val)) if r == nrow => l_val,
diff --git a/nalgebra-sparse/src/factorization/lu.rs b/nalgebra-sparse/src/factorization/lu.rs
index 1df195821..8ca6e6c28 100644
--- a/nalgebra-sparse/src/factorization/lu.rs
+++ b/nalgebra-sparse/src/factorization/lu.rs
@@ -1,6 +1,6 @@
 use crate::SparseEntryMut;
 use crate::{CscBuilder, CscMatrix};
-use nalgebra::RealField;
+use nalgebra::{DMatrix, RealField};
 
 /// Constructs an LU Factorization using a left-looking approach.
 /// This means it will construct each column, starting from the leftmost one.
@@ -34,6 +34,17 @@ impl<T: RealField + Copy> LeftLookingLUFactorization<T> {
         l
     }
 
+    /// Computes `x` in `LUx = b`, where `b` is a dense vector.
+    pub fn solve(&self, b: &[T]) -> DMatrix<T> {
+        let mut y = vec![T::zero(); b.len()];
+        // Implementation: Solve two systems: Ly = b, then Ux = y.
+        self.l_u.dense_lower_triangular_solve(b, &mut y, true);
+        let mut out = y.clone();
+        self.l_u.dense_upper_triangular_solve(&y, &mut out);
+
+        DMatrix::from_vec(b.len(), 1, out)
+    }
+
     /// Construct a new sparse LU factorization
     /// from a given CSC matrix.
     pub fn new(a: &CscMatrix<T>) -> Self {
diff --git a/nalgebra-sparse/src/pattern.rs b/nalgebra-sparse/src/pattern.rs
index 4b85571e3..daa4bed1b 100644
--- a/nalgebra-sparse/src/pattern.rs
+++ b/nalgebra-sparse/src/pattern.rs
@@ -324,6 +324,41 @@ impl SparsityPattern {
 
             out.push(j);
             for &i in sp.lane(j) {
+                if i < j {
+                    continue;
+                }
+                reach(sp, i, out);
+            }
+        }
+
+        for &i in b {
+            reach(&self, i, out);
+        }
+    }
+
+    /// Computes the output sparsity pattern of `x` in `Ax = b`.
+    /// where A's nonzero pattern is given by `self` and the non-zero indices
+    /// of vector `b` are specified as a slice.
+    /// The output is not necessarily in sorted order, but is topological sort order.
+    /// Treats `self` as upper triangular, even if there are elements in the lower triangle.
+    /// Acts as if b is one major lane (i.e. CSC matrix and one column)
+    pub fn sparse_upper_triangular_solve(&self, b: &[usize], out: &mut Vec<usize>) {
+        assert!(b.iter().all(|&i| i < self.major_dim()));
+        out.clear();
+
+        // From a given starting column, traverses and finds all reachable indices.
+        fn reach(sp: &SparsityPattern, j: usize, out: &mut Vec<usize>) {
+            // already traversed
+            if out.contains(&j) {
+                return;
+            }
+
+            out.push(j);
+            // iteration order here does not matter, but technically it should be rev?
+            for &i in sp.lane(j).iter().rev() {
+                if i > j {
+                    continue;
+                }
                 reach(sp, i, out);
             }
         }
diff --git a/nalgebra-sparse/tests/sparsity.rs b/nalgebra-sparse/tests/sparsity.rs
index 1a0d67dba..5029e1eee 100644
--- a/nalgebra-sparse/tests/sparsity.rs
+++ b/nalgebra-sparse/tests/sparsity.rs
@@ -55,6 +55,49 @@ fn lower_sparse_solve() {
     assert_eq!(buf, vec![3, 8, 11, 12, 13, 5, 9, 10]);
 }
 
+// this test is a flipped version of lower sparse solve
+#[test]
+fn upper_sparse_solve() {
+    // just a smaller number so it's easier to debug
+    let n = 8;
+    let speye = SparsityPattern::identity(n);
+    let mut buf = vec![];
+    speye.sparse_lower_triangular_solve(&[0, 5], &mut buf);
+    assert_eq!(buf, vec![0, 5]);
+
+    // test case from
+    // https://www.youtube.com/watch?v=1dGRTOwBkQs&ab_channel=TimDavis
+    let mut builder = SparsityPatternBuilder::new(14, 14);
+    // building CscMatrix, so it will be col, row
+    #[rustfmt::skip]
+    let mut indices = vec![
+      (0, 0), (0, 2),
+      (1, 1), (1, 3), (1, 6), (1, 8),
+      (2,2), (2,4), (2,7),
+      (3,3), (3,8),
+      (4,4), (4,7),
+      (5,5), (5,8), (5,9),
+      (6,6), (6,9), (6,10),
+      (7,7), (7,9),
+      (8,8), (8,11), (8,12),
+      (9,9), (9,10), (9, 12), (9, 13),
+      (10,10), (10,11), (10,12),
+      (11,11), (11,12),
+      (12,12), (12,13),
+      (13,13),
+    ];
+    indices.sort_by_key(|&(min, maj)| (maj, min));
+    for (min, maj) in indices.iter().copied() {
+        assert!(builder.insert(maj, min).is_ok());
+    }
+    let sp = builder.build();
+    assert_eq!(sp.major_dim(), 14);
+    assert_eq!(sp.minor_dim(), 14);
+    assert_eq!(sp.nnz(), indices.len());
+    sp.sparse_upper_triangular_solve(&[9], &mut buf);
+    assert_eq!(buf, vec![9, 7, 4, 2, 0, 6, 1, 5]);
+}
+
 #[test]
 fn test_builder() {
     let mut builder = SparsityPatternBuilder::new(2, 2);
diff --git a/nalgebra-sparse/tests/unit_tests/csc.rs b/nalgebra-sparse/tests/unit_tests/csc.rs
index fc3ebc868..a7930fa3e 100644
--- a/nalgebra-sparse/tests/unit_tests/csc.rs
+++ b/nalgebra-sparse/tests/unit_tests/csc.rs
@@ -787,3 +787,44 @@ fn test_sparse_lower_triangular_solve() {
     a.sparse_lower_triangular_solve(&vi, &v, &vi, &mut out, true);
     assert_eq!(out, [3., -1.]);
 }
+
+#[test]
+fn test_sparse_upper_triangular_solve() {
+    let a = CscMatrix::identity(3);
+    let vi = [0, 1, 2];
+    let v = [1., 2., 3.];
+    let mut out = [0.; 3];
+    a.sparse_upper_triangular_solve_sorted(&vi, &v, &vi, &mut out);
+    assert_eq!(out, v);
+
+    let vi = [0, 999];
+    let v = [3., 2.];
+    let mut out = [0.; 2];
+
+    // test with large identity matrix
+    let mut a = CscMatrix::identity(1000);
+    a.sparse_upper_triangular_solve_sorted(&vi, &v, &vi, &mut out);
+    assert_eq!(out, v);
+
+    if let SparseEntryMut::NonZero(e) = a.index_entry_mut(0, 0) {
+        *e = 2.;
+    };
+    a.sparse_upper_triangular_solve_sorted(&vi, &v, &vi, &mut out);
+    assert_eq!(out, [1.5, 2.]);
+
+    use nalgebra_sparse::coo::CooMatrix;
+    let a: CscMatrix<f32> = (&CooMatrix::<f32>::try_from_triplets(
+        5,
+        5,
+        vec![0, 0, 4],
+        vec![0, 4, 4],
+        vec![2., 1., 4.],
+    )
+    .unwrap())
+        .into();
+    assert_eq!(a.col(0).nnz(), 1);
+    assert_eq!(a.col(4).nnz(), 2);
+
+    a.sparse_upper_triangular_solve_sorted(&[0, 4], &v, &[0, 4], &mut out);
+    assert_eq!(out, [1.5, 0.5]);
+}
diff --git a/nalgebra-sparse/tests/unit_tests/left_looking_lu.rs b/nalgebra-sparse/tests/unit_tests/left_looking_lu.rs
index d55e6acef..dfc52dcd1 100644
--- a/nalgebra-sparse/tests/unit_tests/left_looking_lu.rs
+++ b/nalgebra-sparse/tests/unit_tests/left_looking_lu.rs
@@ -5,11 +5,16 @@ use nalgebra_sparse::factorization::LeftLookingLUFactorization;
 use matrixcompare::{assert_matrix_eq, prop_assert_matrix_eq};
 use proptest::prelude::*;
 
+use crate::common::value_strategy;
+use nalgebra::proptest::matrix;
 use nalgebra_sparse::CscMatrix;
 
 proptest! {
+  // Note that positive definite matrices are guaranteed to be invertible.
+  // That's why they're used here, but it's not necessary for a matrix to be pd to be
+  // invertible.
   #[test]
-  fn lu_factorization_correct_for_positive_def_matrices(
+  fn lu_for_positive_def_matrices(
     matrix in positive_definite()
   ) {
     let lu = LeftLookingLUFactorization::new(&matrix);
@@ -20,6 +25,35 @@ proptest! {
     prop_assert!(u.triplet_iter().all(|(i, j, _)| j >= i));
     prop_assert_matrix_eq!(l * u, matrix, comp = abs, tol = 1e-7);
   }
+
+  #[test]
+  fn lu_solve_correct_for_positive_def_matrices(
+    (matrix, b) in positive_definite()
+      .prop_flat_map(|csc| {
+        let rhs = matrix(value_strategy::<f64>(), csc.nrows(), 1);
+        (Just(csc), rhs)
+      })
+  ) {
+    let lu = LeftLookingLUFactorization::new(&matrix);
+
+    let l = lu.l();
+    prop_assert!(l.triplet_iter().all(|(i, j, _)| j <= i));
+
+    let mut x_l = b.clone_owned();
+    l.dense_lower_triangular_solve(b.as_slice(), x_l.as_mut_slice(), true);
+
+    prop_assert_matrix_eq!(l * x_l, b, comp=abs, tol=1e-12);
+
+    let u = lu.u();
+    prop_assert!(u.triplet_iter().all(|(i, j, _)| j >= i));
+
+    let mut x_u = b.clone_owned();
+    u.dense_upper_triangular_solve(b.as_slice(), x_u.as_mut_slice());
+    prop_assert_matrix_eq!(u * x_u, b, comp = abs, tol = 1e-7);
+
+    let x = lu.solve(b.as_slice());
+    prop_assert_matrix_eq!(&matrix * &x, b, comp=abs, tol=1e-12);
+  }
 }
 
 #[test]