Skip to content
This repository has been archived by the owner on Feb 19, 2024. It is now read-only.

Commit

Permalink
preallocate memory
Browse files Browse the repository at this point in the history
  • Loading branch information
Hanting Zhang committed Dec 3, 2023
1 parent 640de67 commit 41f0573
Show file tree
Hide file tree
Showing 10 changed files with 584 additions and 292 deletions.
4 changes: 3 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
{
"files.associations": {
"__locale": "cpp",
"ios": "cpp"
"ios": "cpp",
"functional": "cpp",
"__functional_base": "cpp"
}
}
9 changes: 5 additions & 4 deletions benches/spmvm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,14 @@ fn criterion_benchmark(c: &mut Criterion) {
.unwrap_or("17".to_string())
.parse()
.unwrap();
let n: usize = 1 << bench_npow;
let n = 1usize << (bench_npow + 1);
let m = 1usize << bench_npow;

println!("generating random matrix and scalars, just hang on...");
let csr = generate_csr(n, n);
let csr = generate_csr(n, m);
let cuda_csr =
CudaSparseMatrix::new(&csr.data, &csr.indices, &csr.indptr, n, n);
let W = crate::tests::gen_scalars(n - 10);
CudaSparseMatrix::new(&csr.data, &csr.indices, &csr.indptr, n, m);
let W = crate::tests::gen_scalars(m - 10);
let U = crate::tests::gen_scalars(9);
let witness = CudaWitness::new(&W, &pallas::Scalar::ONE, &U);
let scalars = [W.clone(), vec![pallas::Scalar::ONE], U.clone()].concat();
Expand Down
17 changes: 17 additions & 0 deletions cuda/pallas.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,34 @@ extern "C" RustError cuda_double_pallas(double_host_t<scalar_t> *csr, scalar_t *
return double_scalars<scalar_t>(csr, scalars, out);
}

extern "C" void drop_spmvm_context_pallas(spmvm_context_t<scalar_t> &ref)
{
drop_spmvm_context<scalar_t>(ref);
}

extern "C" RustError cuda_sparse_matrix_mul_pallas(spmvm_host_t<scalar_t> *csr, const scalar_t *scalars, scalar_t *out, size_t nthreads)
{
return sparse_matrix_mul<scalar_t>(csr, scalars, out, nthreads);
}

extern "C" RustError cuda_sparse_matrix_witness_init_pallas(
spmvm_host_t<scalar_t> *csr, spmvm_context_t<scalar_t> *context)
{
return sparse_matrix_witness_init<scalar_t>(csr, context);
}

extern "C" RustError cuda_sparse_matrix_witness_pallas(
spmvm_host_t<scalar_t> *csr, const witness_t<scalar_t> *witness, scalar_t *out, size_t nthreads)
{
return sparse_matrix_witness<scalar_t>(csr, witness, out, nthreads);
}

extern "C" RustError cuda_sparse_matrix_witness_with_pallas(
spmvm_context_t<scalar_t> *context, const witness_t<scalar_t> *witness, scalar_t *out, size_t nthreads)
{
return sparse_matrix_witness_with<scalar_t>(context, witness, out, nthreads);
}

extern "C" RustError cuda_sparse_matrix_witness_pallas_cpu(
spmvm_host_t<scalar_t> *csr, const witness_t<scalar_t> *witness, scalar_t *out)
{
Expand Down
17 changes: 15 additions & 2 deletions cuda/vesta.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,34 @@ typedef pallas_t scalar_t;

#ifndef __CUDA_ARCH__

extern "C" void drop_msm_context_vesta(msm_context_t<affine_t::mem_t> &ref) {
CUDA_OK(cudaFree(ref.d_points));
extern "C" void drop_spmvm_context_vesta(spmvm_context_t<scalar_t> &ref)
{
drop_spmvm_context<scalar_t>(ref);
}

extern "C" RustError cuda_sparse_matrix_mul_vesta(spmvm_host_t<scalar_t> *csr, const scalar_t *scalars, scalar_t *out, size_t nthreads)
{
return sparse_matrix_mul<scalar_t>(csr, scalars, out, nthreads);
}

extern "C" RustError cuda_sparse_matrix_witness_init_vesta(
spmvm_host_t<scalar_t> *csr, spmvm_context_t<scalar_t> *context)
{
return sparse_matrix_witness_init<scalar_t>(csr, context);
}

extern "C" RustError cuda_sparse_matrix_witness_vesta(
spmvm_host_t<scalar_t> *csr, const witness_t<scalar_t> *witness, scalar_t *out, size_t nthreads)
{
return sparse_matrix_witness<scalar_t>(csr, witness, out, nthreads);
}

extern "C" RustError cuda_sparse_matrix_witness_with_vesta(
spmvm_context_t<scalar_t> *context, const witness_t<scalar_t> *witness, scalar_t *out, size_t nthreads)
{
return sparse_matrix_witness_with<scalar_t>(context, witness, out, nthreads);
}

extern "C" RustError cuda_sparse_matrix_witness_vesta_cpu(
spmvm_host_t<scalar_t> *csr, const witness_t<scalar_t> *witness, scalar_t *out)
{
Expand Down
9 changes: 5 additions & 4 deletions examples/spmvm_pallas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::time::Instant;

use pasta_curves::{group::ff::{PrimeField, Field}, pallas};
use pasta_msm::{
spmvm::{sparse_matrix_mul_pallas, CudaSparseMatrix, CudaWitness, sparse_matrix_witness_pallas},
spmvm::{CudaSparseMatrix, CudaWitness, pallas::{sparse_matrix_witness_with_pallas, sparse_matrix_witness_init_pallas}},
utils::SparseMatrix,
};
use rand::Rng;
Expand Down Expand Up @@ -53,12 +53,12 @@ pub fn generate_scalars<F: PrimeField>(len: usize) -> Vec<F> {
/// cargo run --release --example spmvm
fn main() {
let npow: usize = std::env::var("NPOW")
.unwrap_or("17".to_string())
.unwrap_or("20".to_string())
.parse()
.unwrap();
let n = 1usize << npow;
let nthreads: usize = std::env::var("NTHREADS")
.unwrap_or("128".to_string())
.unwrap_or("256".to_string())
.parse()
.unwrap();

Expand All @@ -73,10 +73,11 @@ fn main() {
let res = csr.multiply_vec(&scalars);
println!("cpu took: {:?}", start.elapsed());

let spmvm_context = sparse_matrix_witness_init_pallas(&cuda_csr);
let witness = CudaWitness::new(&W, &pallas::Scalar::ONE, &U);
let mut cuda_res = vec![pallas::Scalar::ONE; cuda_csr.num_rows];
let start = Instant::now();
sparse_matrix_witness_pallas(&cuda_csr, &witness, &mut cuda_res, nthreads);
sparse_matrix_witness_with_pallas(&spmvm_context, &witness, &mut cuda_res, nthreads);
println!("gpu took: {:?}", start.elapsed());

assert_eq!(res, cuda_res);
Expand Down
59 changes: 30 additions & 29 deletions examples/spmvm_vesta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ use pasta_curves::{
};
use pasta_msm::{
spmvm::{
sparse_matrix_mul_vesta, sparse_matrix_witness_vesta, CudaSparseMatrix,
CudaWitness, sparse_matrix_witness_vesta_cpu,
vesta::{
sparse_matrix_witness_init_vesta, sparse_matrix_witness_with_vesta, sparse_matrix_witness_vesta,
},
CudaSparseMatrix, CudaWitness,
},
utils::SparseMatrix,
};
Expand Down Expand Up @@ -59,43 +61,42 @@ pub fn generate_scalars<F: PrimeField>(len: usize) -> Vec<F> {
/// cargo run --release --example spmvm
fn main() {
let npow: usize = std::env::var("NPOW")
.unwrap_or("3".to_string())
.unwrap_or("20".to_string())
.parse()
.unwrap();
let n = 1usize << npow;
let n = 1usize << (npow + 1);
let nthreads: usize = std::env::var("NTHREADS")
.unwrap_or("1".to_string())
.unwrap_or("128".to_string())
.parse()
.unwrap();

let csr_A = generate_csr(n, n);
let cuda_csr_A =
CudaSparseMatrix::new(&csr_A.data, &csr_A.indices, &csr_A.indptr, n, n);
let csr_B = generate_csr(n, n);
let cuda_csr_B =
CudaSparseMatrix::new(&csr_B.data, &csr_B.indices, &csr_B.indptr, n, n);

// let W = generate_scalars(n - 10);
// let U = generate_scalars(9);
// let scalars = [W.clone(), vec![vesta::Scalar::ONE], U.clone()].concat();
let W = vec![vesta::Scalar::ZERO; n - 3];
let U = vec![vesta::Scalar::ZERO; 2];
let scalars = vec![vesta::Scalar::ZERO; n];
let csr = generate_csr(n, n);
let cuda_csr =
CudaSparseMatrix::new(&csr.data, &csr.indices, &csr.indptr, n, n);
let W = generate_scalars(n - 10);
let U = generate_scalars(9);
let scalars = [W.clone(), vec![vesta::Scalar::ONE], U.clone()].concat();

let start = Instant::now();
let res_A = csr_A.multiply_vec(&scalars);
let res_B = csr_B.multiply_vec(&scalars);
println!("native took: {:?}", start.elapsed());
let res = csr.multiply_vec(&scalars);
println!("cpu took: {:?}", start.elapsed());

let witness = CudaWitness::new(&W, &vesta::Scalar::ZERO, &U);
let mut cuda_res_A = vec![vesta::Scalar::ZERO; cuda_csr_A.num_rows];
let mut cuda_res_B = vec![vesta::Scalar::ZERO; cuda_csr_B.num_rows];
let witness = CudaWitness::new(&W, &vesta::Scalar::ONE, &U);
let mut cuda_res = vec![vesta::Scalar::ONE; cuda_csr.num_rows];
let start = Instant::now();
sparse_matrix_witness_vesta(&cuda_csr_A, &witness, &mut cuda_res_A, nthreads);
sparse_matrix_witness_vesta(&cuda_csr_B, &witness, &mut cuda_res_B, nthreads);
println!("ffi took: {:?}", start.elapsed());
sparse_matrix_witness_vesta(&cuda_csr, &witness, &mut cuda_res, nthreads);
println!("gpu took: {:?}", start.elapsed());

assert_eq!(res_A, cuda_res_A);
assert!(res_B == cuda_res_B);
let spmvm_context = sparse_matrix_witness_init_vesta(&cuda_csr);
let start = Instant::now();
sparse_matrix_witness_with_vesta(
&spmvm_context,
&witness,
&mut cuda_res,
nthreads,
);
println!("preallocated gpu took: {:?}", start.elapsed());

assert_eq!(res, cuda_res);
println!("success!");
}
Loading

0 comments on commit 41f0573

Please sign in to comment.