Skip to content

Commit

Permalink
New AVX quotients (#549)
Browse files Browse the repository at this point in the history
  • Loading branch information
spapinistarkware authored Mar 28, 2024
1 parent 136858a commit d4ebbc6
Show file tree
Hide file tree
Showing 12 changed files with 472 additions and 44 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,7 @@ harness = false
[[bench]]
name = "eval_at_point"
harness = false

[[bench]]
name = "quotients"
harness = false
90 changes: 90 additions & 0 deletions benches/quotients.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#![feature(iter_array_chunks)]

use criterion::{black_box, Criterion};
use itertools::Itertools;
use stwo::core::backend::CPUBackend;
use stwo::core::circle::SECURE_FIELD_CIRCLE_GEN;
use stwo::core::commitment_scheme::quotients::{ColumnSampleBatch, QuotientOps};
use stwo::core::fields::m31::BaseField;
use stwo::core::fields::qm31::SecureField;
use stwo::core::poly::circle::{CanonicCoset, CircleEvaluation};
use stwo::core::poly::BitReversedOrder;

pub fn cpu_quotients(c: &mut criterion::Criterion) {
const LOG_SIZE: u32 = 16;
const SIZE: usize = 1 << LOG_SIZE;
const N_COLS: usize = 1 << 8;
let domain = CanonicCoset::new(LOG_SIZE).circle_domain();
let cols = (0..N_COLS)
.map(|_| {
let values = (0..SIZE).map(BaseField::from).collect();
CircleEvaluation::<CPUBackend, _, BitReversedOrder>::new(domain, values)
})
.collect_vec();
let random_coeff = SecureField::from_u32_unchecked(0, 1, 2, 3);
let a = SecureField::from_u32_unchecked(5, 6, 7, 8);
let samples = vec![ColumnSampleBatch {
point: SECURE_FIELD_CIRCLE_GEN,
columns_and_values: (0..N_COLS).map(|i| (i, a)).collect(),
}];

let col_refs = &cols.iter().collect_vec();
c.bench_function("cpu quotients 2^8 x 2^16", |b| {
b.iter(|| {
black_box(CPUBackend::accumulate_quotients(
black_box(domain),
black_box(col_refs),
black_box(random_coeff),
black_box(&samples),
))
})
});
}

#[cfg(target_arch = "x86_64")]
pub fn avx512_quotients(c: &mut criterion::Criterion) {
use stwo::core::backend::avx512::AVX512Backend;

const LOG_SIZE: u32 = 20;
const SIZE: usize = 1 << LOG_SIZE;
const N_COLS: usize = 1 << 8;
let domain = CanonicCoset::new(LOG_SIZE).circle_domain();
let cols = (0..N_COLS)
.map(|_| {
let values = (0..SIZE as u32)
.map(BaseField::from_u32_unchecked)
.collect();
CircleEvaluation::<AVX512Backend, _, BitReversedOrder>::new(domain, values)
})
.collect_vec();
let random_coeff = SecureField::from_m31_array(std::array::from_fn(BaseField::from));
let a = SecureField::from_m31_array(std::array::from_fn(|i| BaseField::from(3 * i)));
let samples = vec![ColumnSampleBatch {
point: SECURE_FIELD_CIRCLE_GEN,
columns_and_values: (0..N_COLS).map(|i| (i, a)).collect(),
}];

let col_refs = &cols.iter().collect_vec();
c.bench_function("avx quotients 2^8 x 2^20", |b| {
b.iter(|| {
black_box(AVX512Backend::accumulate_quotients(
black_box(domain),
black_box(col_refs),
black_box(random_coeff),
black_box(&samples),
))
})
});
}

#[cfg(target_arch = "x86_64")]
criterion::criterion_group!(
name=quotients;
config = Criterion::default().sample_size(10);
targets=avx512_quotients, cpu_quotients);
#[cfg(not(target_arch = "x86_64"))]
criterion::criterion_group!(
name=quotients;
config = Criterion::default().sample_size(10);
targets=cpu_quotients);
criterion::criterion_main!(quotients);
2 changes: 1 addition & 1 deletion src/core/air/accumulation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ impl DomainEvaluationAccumulator<CPUBackend> {
.zip(self.n_cols_per_size.iter())
.skip(1)
{
let coeffs = SecureColumn {
let coeffs = SecureColumn::<CPUBackend> {
columns: values.columns.map(|c| {
CPUCircleEvaluation::<_, BitReversedOrder>::new(
CanonicCoset::new(log_size as u32).circle_domain(),
Expand Down
56 changes: 54 additions & 2 deletions src/core/backend/avx512/cm31.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
use std::ops::{Add, Mul, Sub};
use std::ops::{Add, Mul, MulAssign, Sub};

use num_traits::{One, Zero};

use super::m31::{PackedBaseField, K_BLOCK_SIZE};
use crate::core::fields::cm31::CM31;
use crate::core::fields::cm31::{CM31, P2};
use crate::core::fields::FieldExpOps;

/// AVX implementation for the complex extension field of M31.
/// See [crate::core::fields::cm31::CM31] for more information.
#[derive(Copy, Clone)]
pub struct PackedCM31(pub [PackedBaseField; 2]);
impl PackedCM31 {
pub fn broadcast(value: CM31) -> Self {
Self([
PackedBaseField::broadcast(value.0),
PackedBaseField::broadcast(value.1),
])
}
pub fn a(&self) -> PackedBaseField {
self.0[0]
}
Expand Down Expand Up @@ -42,6 +51,49 @@ impl Mul for PackedCM31 {
Self([ac - bd, ab_t_cd - ac - bd])
}
}
impl Zero for PackedCM31 {
fn zero() -> Self {
Self([PackedBaseField::zero(), PackedBaseField::zero()])
}
fn is_zero(&self) -> bool {
self.a().is_zero() && self.b().is_zero()
}
}
impl One for PackedCM31 {
fn one() -> Self {
Self([PackedBaseField::one(), PackedBaseField::zero()])
}
}
impl MulAssign for PackedCM31 {
fn mul_assign(&mut self, rhs: Self) {
*self = *self * rhs;
}
}
impl FieldExpOps for PackedCM31 {
fn inverse(&self) -> Self {
assert!(!self.is_zero(), "0 has no inverse");
self.pow((P2 - 2) as u128)
}
}

impl Add<PackedBaseField> for PackedCM31 {
type Output = Self;
fn add(self, rhs: PackedBaseField) -> Self::Output {
Self([self.a() + rhs, self.b()])
}
}
impl Sub<PackedBaseField> for PackedCM31 {
type Output = Self;
fn sub(self, rhs: PackedBaseField) -> Self::Output {
Self([self.a() - rhs, self.b()])
}
}
impl Mul<PackedBaseField> for PackedCM31 {
type Output = Self;
fn mul(self, rhs: PackedBaseField) -> Self::Output {
Self([self.a() * rhs, self.b() * rhs])
}
}

#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
#[cfg(test)]
Expand Down
25 changes: 19 additions & 6 deletions src/core/backend/avx512/m31.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@ use core::arch::x86_64::{
__m512i, _mm512_add_epi32, _mm512_min_epu32, _mm512_mul_epu32, _mm512_srli_epi64,
_mm512_sub_epi32,
};
use std::arch::x86_64::{_mm512_load_epi32, _mm512_permutex2var_epi32, _mm512_store_epi32};
use std::arch::x86_64::{
_mm512_load_epi32, _mm512_permutex2var_epi32, _mm512_set1_epi32, _mm512_setzero_si512,
_mm512_store_epi32,
};
use std::fmt::Display;
use std::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign};

use num_traits::One;
use num_traits::{One, Zero};

use super::tranpose_utils::{
EVENS_CONCAT_EVENS, HHALF_INTERLEAVE_HHALF, LHALF_INTERLEAVE_LHALF, ODDS_CONCAT_ODDS,
Expand All @@ -24,6 +27,10 @@ pub const M512P: __m512i = unsafe { core::mem::transmute([P; K_BLOCK_SIZE]) };
pub struct PackedBaseField(pub __m512i);

impl PackedBaseField {
pub fn broadcast(value: M31) -> Self {
Self(unsafe { _mm512_set1_epi32(value.0 as i32) })
}

pub fn from_array(v: [M31; K_BLOCK_SIZE]) -> PackedBaseField {
unsafe { Self(std::mem::transmute(v)) }
}
Expand Down Expand Up @@ -81,10 +88,6 @@ impl PackedBaseField {
pub fn pointwise_sum(self) -> M31 {
self.to_array().into_iter().sum()
}

pub fn broadcast(x: M31) -> Self {
Self(unsafe { std::arch::x86_64::_mm512_set1_epi32(x.0 as i32) })
}
}

impl Display for PackedBaseField {
Expand Down Expand Up @@ -234,6 +237,15 @@ impl SubAssign for PackedBaseField {
}
}

impl Zero for PackedBaseField {
fn zero() -> Self {
Self(unsafe { _mm512_setzero_si512() })
}
fn is_zero(&self) -> bool {
self.to_array().iter().all(|x| x.is_zero())
}
}

impl One for PackedBaseField {
fn one() -> Self {
Self(unsafe { core::mem::transmute([M31::one(); K_BLOCK_SIZE]) })
Expand All @@ -242,6 +254,7 @@ impl One for PackedBaseField {

impl FieldExpOps for PackedBaseField {
fn inverse(&self) -> Self {
assert!(!self.is_zero(), "0 has no inverse");
self.pow((P - 2) as u128)
}
}
Expand Down
45 changes: 44 additions & 1 deletion src/core/backend/avx512/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,25 @@ pub mod cm31;
pub mod fft;
pub mod m31;
pub mod qm31;
pub mod quotients;
pub mod tranpose_utils;

use bytemuck::{cast_slice, cast_slice_mut, Pod, Zeroable};
use itertools::izip;
use num_traits::Zero;

use self::bit_reverse::bit_reverse_m31;
use self::cm31::PackedCM31;
pub use self::m31::{PackedBaseField, K_BLOCK_SIZE};
use self::qm31::PackedQM31;
use super::{Column, ColumnOps};
use crate::core::fields::m31::BaseField;
use crate::core::fields::qm31::SecureField;
use crate::core::fields::secure_column::SecureColumn;
use crate::core::fields::{FieldExpOps, FieldOps};
use crate::core::utils;

const VECS_LOG_SIZE: usize = 4;
pub const VECS_LOG_SIZE: usize = 4;

#[derive(Copy, Clone, Debug)]
pub struct AVX512Backend;
Expand Down Expand Up @@ -128,6 +134,43 @@ impl FromIterator<BaseField> for BaseFieldVec {
}
}

impl SecureColumn<AVX512Backend> {
pub fn packed_at(&self, vec_index: usize) -> PackedQM31 {
unsafe {
PackedQM31([
PackedCM31([
*self.columns[0].data.get_unchecked(vec_index),
*self.columns[1].data.get_unchecked(vec_index),
]),
PackedCM31([
*self.columns[2].data.get_unchecked(vec_index),
*self.columns[3].data.get_unchecked(vec_index),
]),
])
}
}

pub fn set_packed(&mut self, vec_index: usize, value: PackedQM31) {
unsafe {
*self.columns[0].data.get_unchecked_mut(vec_index) = value.a().a();
*self.columns[1].data.get_unchecked_mut(vec_index) = value.a().b();
*self.columns[2].data.get_unchecked_mut(vec_index) = value.b().a();
*self.columns[3].data.get_unchecked_mut(vec_index) = value.b().b();
}
}

pub fn to_vec(&self) -> Vec<SecureField> {
izip!(
self.columns[0].to_vec(),
self.columns[1].to_vec(),
self.columns[2].to_vec(),
self.columns[3].to_vec(),
)
.map(|(a, b, c, d)| SecureField::from_m31_array([a, b, c, d]))
.collect()
}
}

#[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))]
#[cfg(test)]
mod tests {
Expand Down
Loading

0 comments on commit d4ebbc6

Please sign in to comment.