Skip to content
This repository has been archived by the owner on Oct 1, 2024. It is now read-only.

Commit

Permalink
Coset LDE based on row-oriented DFT (Plonky3#440)
Browse files Browse the repository at this point in the history
* First draft.

* Second draft.

* Add root tables; remove dumb reduce; prepare for Monty version.

* Use Monty rather than Barrett.

* Remove old comments.

* Do partial reduction; inline sizes 128 and 256.

* Remove Barrett reduc code.

* Refactor butterfly.

* Use u32 repr rather than i64; misc. tidying.

* Working version; initial benchmarking harness.

* Working with non-square inputs.

* Four-step FFT fiddling.

* Move BabyBear FFT to Monty 31 crate.

* Remove 'Real' typedef.

* Move implementation into MontyField31 struct.

* Implement the TwoAdicSubgroupDft trait; move tests to concrete field

* More thorough transpose benchmark.

* Move `pretty_name` to utils crate; use `pretty_name` in fft benches

* Remove unused four-step code.

* Tidy up implementation and testing; store precomputed roots.

* Tidying.

* Remove unused 'backward' transform.

* Move `split_at_mut_unchecked` to utils crate; remove unused import.

* Clippy.

* Remove unnecessary function.

* Fix name of algo.

* Refactor bitrev & transpose parts of dft

* Refactor DFT tests.

* Minor simplification.

* Fix specification of twiddle table.

* Expanded benchmarks.

* Remove unnecessary borrows.

* Add more tracing information.

* Messy but working version of `coset_lde_batch`.

* Reduce allocations by removing dependency on `RowMajorMatrix`.

* Unsafe scratch initialisation.

* Don't apply coset powers to zero elements.

* Tidying up; parallelise `scale()`.

* Update Keccak AIR examples

* Use new FFT in KoalaBear example; misc tidying.

* Fix dumb bug.

* Rename var.

* Switch DIT and DIF for DFT and IDFT; adjust bit-reversals & zeroing; scale and shift at once.

* Refactor internal functions; rename some things.

* clippy

* Remove unused function.

* Update some documentation.

* Expand first layer of DFT.

* Reduce memory consumption.

* Specialise inverse roots; unroll radix4; move fn's to utils.

* Remove unused fn; comment.

* Rename Radix2Dft -> RecursiveDft.

* Clean up examples.

* Miscellaneous documentation and tidying.

* Minor tidying.

* cargo fmt

* Address review comments.

* Fix URL.

* `split_at_mut_unchecked` is now available in stable.

* Remove comment.

* Remove `partial_monty_reduce`; add comments; cargo fmt.

* Faster alloc and padding; remove specialised first FFT layer.

* Use `transmute` instead of `Vec::set_len`.

* "Tidying"

* Review comments.
  • Loading branch information
unzvfu authored Sep 6, 2024
1 parent b314769 commit 2df15fd
Show file tree
Hide file tree
Showing 29 changed files with 899 additions and 226 deletions.
1 change: 1 addition & 0 deletions baby-bear/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ serde = { version = "1.0", default-features = false, features = ["derive"] }

[dev-dependencies]
p3-field-testing = { path = "../field-testing" }
p3-dft = { path = "../dft" }
rand = { version = "0.8.5", features = ["min_const_gen"] }
criterion = "0.5.1"
rand_chacha = "0.3.1"
Expand Down
25 changes: 22 additions & 3 deletions baby-bear/src/baby_bear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,24 @@ impl FieldParameters for BabyBearParameters {
impl TwoAdicData for BabyBearParameters {
const TWO_ADICITY: usize = 27;

type ArrayLike = [BabyBear; Self::TWO_ADICITY + 1];
type ArrayLike = &'static [BabyBear];

const TWO_ADIC_GENERATORS: Self::ArrayLike = BabyBear::new_array([
const TWO_ADIC_GENERATORS: Self::ArrayLike = &BabyBear::new_array([
0x1, 0x78000000, 0x67055c21, 0x5ee99486, 0xbb4c4e4, 0x2d4cc4da, 0x669d6090, 0x17b56c64,
0x67456167, 0x688442f9, 0x145e952d, 0x4fe61226, 0x4c734715, 0x11c33e2a, 0x62c3d2b1,
0x77cad399, 0x54c131f4, 0x4cabd6a6, 0x5cf5713f, 0x3e9430e8, 0xba067a3, 0x18adc27d,
0x21fd55bc, 0x4b859b3d, 0x3bd57996, 0x4483d85a, 0x3a26eef8, 0x1a427a41,
]);

const ROOTS_8: Self::ArrayLike = &BabyBear::new_array([0x5ee99486, 0x67055c21, 0xc9ea3ba]);
const INV_ROOTS_8: Self::ArrayLike = &BabyBear::new_array([0x6b615c47, 0x10faa3e0, 0x19166b7b]);

const ROOTS_16: Self::ArrayLike = &BabyBear::new_array([
0xbb4c4e4, 0x5ee99486, 0x4b49e08, 0x67055c21, 0x5376917a, 0xc9ea3ba, 0x563112a7,
]);
const INV_ROOTS_16: Self::ArrayLike = &BabyBear::new_array([
0x21ceed5a, 0x6b615c47, 0x24896e87, 0x10faa3e0, 0x734b61f9, 0x19166b7b, 0x6c4b3b1d,
]);
}

impl BinomialExtensionData<4> for BabyBearParameters {
Expand Down Expand Up @@ -102,7 +112,7 @@ mod tests {
use core::array;

use p3_field::{PrimeField32, PrimeField64, TwoAdicField};
use p3_field_testing::{test_field, test_two_adic_field};
use p3_field_testing::{test_field, test_field_dft, test_two_adic_field};

use super::*;

Expand Down Expand Up @@ -215,4 +225,13 @@ mod tests {

test_field!(crate::BabyBear);
test_two_adic_field!(crate::BabyBear);

test_field_dft!(radix2dit, crate::BabyBear, p3_dft::Radix2Dit<_>);
test_field_dft!(bowers, crate::BabyBear, p3_dft::Radix2Bowers);
test_field_dft!(parallel, crate::BabyBear, p3_dft::Radix2DitParallel);
test_field_dft!(
recur_dft,
crate::BabyBear,
p3_monty_31::dft::RecursiveDft<_>
);
}
12 changes: 1 addition & 11 deletions circle/benches/cfft.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::any::type_name;

use criterion::measurement::Measurement;
use criterion::{criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion};
use p3_baby_bear::BabyBear;
Expand All @@ -8,18 +6,10 @@ use p3_dft::{Radix2Bowers, Radix2Dit, Radix2DitParallel, TwoAdicSubgroupDft};
use p3_field::TwoAdicField;
use p3_matrix::dense::RowMajorMatrix;
use p3_mersenne_31::Mersenne31;
use p3_util::pretty_name;
use rand::distributions::{Distribution, Standard};
use rand::thread_rng;

fn pretty_name<T>() -> String {
let name = type_name::<T>();
let mut result = String::new();
for qual in name.split_inclusive(&['<', '>', ',']) {
result.push_str(qual.split("::").last().unwrap());
}
result
}

fn bench_lde(c: &mut Criterion) {
let log_n = 18;
let log_w = 8;
Expand Down
1 change: 1 addition & 0 deletions dft/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ tracing = "0.1.37"
itertools = "0.13.0"

[dev-dependencies]
p3-monty-31 = { path = "../monty-31" }
p3-baby-bear = { path = "../baby-bear" }
p3-goldilocks = { path = "../goldilocks" }
p3-mersenne-31 = { path = "../mersenne-31" }
Expand Down
55 changes: 29 additions & 26 deletions dft/benches/fft.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
use std::any::type_name;

use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use p3_baby_bear::BabyBear;
use p3_dft::{Radix2Bowers, Radix2Dit, Radix2DitParallel, TwoAdicSubgroupDft};
Expand All @@ -8,19 +6,22 @@ use p3_field::TwoAdicField;
use p3_goldilocks::Goldilocks;
use p3_matrix::dense::RowMajorMatrix;
use p3_mersenne_31::{Mersenne31, Mersenne31ComplexRadix2Dit, Mersenne31Dft};
use p3_monty_31::dft::RecursiveDft;
use p3_util::pretty_name;
use rand::distributions::{Distribution, Standard};
use rand::thread_rng;

fn bench_fft(c: &mut Criterion) {
// log_sizes correspond to the sizes of DFT we want to benchmark;
// for the DFT over the quadratic extension "Mersenne31Complex" a
// fairer comparison is to use half sizes, which is the log minus 1.
let log_sizes = &[14, 16, 18];
let log_sizes = &[14, 16, 18, 20, 22];
let log_half_sizes = &[13, 15, 17];

const BATCH_SIZE: usize = 100;
const BATCH_SIZE: usize = 256;

fft::<BabyBear, Radix2Dit<_>, BATCH_SIZE>(c, log_sizes);
fft::<BabyBear, RecursiveDft<_>, BATCH_SIZE>(c, log_sizes);
fft::<BabyBear, Radix2Bowers, BATCH_SIZE>(c, log_sizes);
fft::<BabyBear, Radix2DitParallel, BATCH_SIZE>(c, log_sizes);
fft::<Goldilocks, Radix2Dit<_>, BATCH_SIZE>(c, log_sizes);
Expand All @@ -34,11 +35,13 @@ fn bench_fft(c: &mut Criterion) {
m31_fft::<Radix2Dit<_>, BATCH_SIZE>(c, log_sizes);
m31_fft::<Mersenne31ComplexRadix2Dit, BATCH_SIZE>(c, log_sizes);

ifft::<Goldilocks, Radix2Dit<_>, BATCH_SIZE>(c);
ifft::<Goldilocks, Radix2Dit<_>, BATCH_SIZE>(c, log_sizes);

coset_lde::<BabyBear, Radix2Bowers, BATCH_SIZE>(c);
coset_lde::<Goldilocks, Radix2Bowers, BATCH_SIZE>(c);
coset_lde::<BabyBear, Radix2DitParallel, BATCH_SIZE>(c);
coset_lde::<BabyBear, RecursiveDft<_>, BATCH_SIZE>(c, log_sizes);
coset_lde::<BabyBear, Radix2Dit<_>, BATCH_SIZE>(c, log_sizes);
coset_lde::<BabyBear, Radix2Bowers, BATCH_SIZE>(c, log_sizes);
coset_lde::<BabyBear, Radix2DitParallel, BATCH_SIZE>(c, log_sizes);
coset_lde::<Goldilocks, Radix2Bowers, BATCH_SIZE>(c, log_sizes);
}

fn fft<F, Dft, const BATCH_SIZE: usize>(c: &mut Criterion, log_sizes: &[usize])
Expand All @@ -47,10 +50,10 @@ where
Dft: TwoAdicSubgroupDft<F>,
Standard: Distribution<F>,
{
let mut group = c.benchmark_group(&format!(
"fft::<{}, {}, {}>",
type_name::<F>(),
type_name::<Dft>(),
let mut group = c.benchmark_group(format!(
"fft/{}/{}/ncols={}",
pretty_name::<F>(),
pretty_name::<Dft>(),
BATCH_SIZE
));
group.sample_size(10);
Expand All @@ -75,9 +78,9 @@ where
Dft: TwoAdicSubgroupDft<Complex<Mersenne31>>,
Standard: Distribution<Mersenne31>,
{
let mut group = c.benchmark_group(&format!(
let mut group = c.benchmark_group(format!(
"m31_fft::<{}, {}>",
type_name::<Dft>(),
pretty_name::<Dft>(),
BATCH_SIZE
));
group.sample_size(10);
Expand All @@ -96,22 +99,22 @@ where
}
}

fn ifft<F, Dft, const BATCH_SIZE: usize>(c: &mut Criterion)
fn ifft<F, Dft, const BATCH_SIZE: usize>(c: &mut Criterion, log_sizes: &[usize])
where
F: TwoAdicField,
Dft: TwoAdicSubgroupDft<F>,
Standard: Distribution<F>,
{
let mut group = c.benchmark_group(&format!(
"ifft::<{}, {}, {}>",
type_name::<F>(),
type_name::<Dft>(),
let mut group = c.benchmark_group(format!(
"ifft/{}/{}/ncols={}",
pretty_name::<F>(),
pretty_name::<Dft>(),
BATCH_SIZE
));
group.sample_size(10);

let mut rng = thread_rng();
for n_log in [14, 16, 18] {
for n_log in log_sizes {
let n = 1 << n_log;

let messages = RowMajorMatrix::rand(&mut rng, n, BATCH_SIZE);
Expand All @@ -125,22 +128,22 @@ where
}
}

fn coset_lde<F, Dft, const BATCH_SIZE: usize>(c: &mut Criterion)
fn coset_lde<F, Dft, const BATCH_SIZE: usize>(c: &mut Criterion, log_sizes: &[usize])
where
F: TwoAdicField,
Dft: TwoAdicSubgroupDft<F>,
Standard: Distribution<F>,
{
let mut group = c.benchmark_group(&format!(
"coset_lde::<{}, {}, {}>",
type_name::<F>(),
type_name::<Dft>(),
let mut group = c.benchmark_group(format!(
"coset_lde/{}/{}/ncols={}",
pretty_name::<F>(),
pretty_name::<Dft>(),
BATCH_SIZE
));
group.sample_size(10);

let mut rng = thread_rng();
for n_log in [14, 16, 18] {
for n_log in log_sizes {
let n = 1 << n_log;

let messages = RowMajorMatrix::rand(&mut rng, n, BATCH_SIZE);
Expand Down
2 changes: 0 additions & 2 deletions dft/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ mod naive;
mod radix_2_bowers;
mod radix_2_dit;
mod radix_2_dit_parallel;
#[cfg(test)]
mod testing;
mod traits;
mod util;

Expand Down
45 changes: 0 additions & 45 deletions dft/src/radix_2_bowers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,48 +126,3 @@ fn butterfly_layer<F: Field, B: Butterfly<F>>(
});
});
}

#[cfg(test)]
mod tests {
use p3_baby_bear::BabyBear;
use p3_goldilocks::Goldilocks;

use crate::radix_2_bowers::Radix2Bowers;
use crate::testing::*;

#[test]
fn dft_matches_naive() {
test_dft_matches_naive::<BabyBear, Radix2Bowers>();
}

#[test]
fn coset_dft_matches_naive() {
test_coset_dft_matches_naive::<BabyBear, Radix2Bowers>();
}

#[test]
fn idft_matches_naive() {
test_idft_matches_naive::<Goldilocks, Radix2Bowers>();
}

#[test]
fn coset_idft_matches_naive() {
test_coset_idft_matches_naive::<BabyBear, Radix2Bowers>();
test_coset_idft_matches_naive::<Goldilocks, Radix2Bowers>();
}

#[test]
fn lde_matches_naive() {
test_lde_matches_naive::<BabyBear, Radix2Bowers>();
}

#[test]
fn coset_lde_matches_naive() {
test_coset_lde_matches_naive::<BabyBear, Radix2Bowers>();
}

#[test]
fn dft_idft_consistency() {
test_dft_idft_consistency::<BabyBear, Radix2Bowers>();
}
}
45 changes: 0 additions & 45 deletions dft/src/radix_2_dit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,48 +67,3 @@ fn dit_layer<F: Field>(mat: &mut RowMajorMatrixViewMut<'_, F>, layer: usize, twi
});
});
}

#[cfg(test)]
mod tests {
use p3_baby_bear::BabyBear;
use p3_goldilocks::Goldilocks;

use crate::testing::*;
use crate::Radix2Dit;

#[test]
fn dft_matches_naive() {
test_dft_matches_naive::<BabyBear, Radix2Dit<_>>();
}

#[test]
fn coset_dft_matches_naive() {
test_coset_dft_matches_naive::<BabyBear, Radix2Dit<_>>();
}

#[test]
fn idft_matches_naive() {
test_idft_matches_naive::<Goldilocks, Radix2Dit<_>>();
}

#[test]
fn coset_idft_matches_naive() {
test_coset_idft_matches_naive::<BabyBear, Radix2Dit<_>>();
test_coset_idft_matches_naive::<Goldilocks, Radix2Dit<_>>();
}

#[test]
fn lde_matches_naive() {
test_lde_matches_naive::<BabyBear, Radix2Dit<_>>();
}

#[test]
fn coset_lde_matches_naive() {
test_coset_lde_matches_naive::<BabyBear, Radix2Dit<_>>();
}

#[test]
fn dft_idft_consistency() {
test_dft_idft_consistency::<BabyBear, Radix2Dit<_>>();
}
}
45 changes: 0 additions & 45 deletions dft/src/radix_2_dit_parallel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,48 +191,3 @@ fn dit_layer_rev<F: Field>(
DitButterfly(twiddle).apply_to_rows(lo, hi)
}
}

#[cfg(test)]
mod tests {
use p3_baby_bear::BabyBear;
use p3_goldilocks::Goldilocks;

use crate::testing::*;
use crate::Radix2DitParallel;

#[test]
fn dft_matches_naive() {
test_dft_matches_naive::<BabyBear, Radix2DitParallel>();
}

#[test]
fn coset_dft_matches_naive() {
test_coset_dft_matches_naive::<BabyBear, Radix2DitParallel>();
}

#[test]
fn idft_matches_naive() {
test_idft_matches_naive::<Goldilocks, Radix2DitParallel>();
}

#[test]
fn coset_idft_matches_naive() {
test_coset_idft_matches_naive::<BabyBear, Radix2DitParallel>();
test_coset_idft_matches_naive::<Goldilocks, Radix2DitParallel>();
}

#[test]
fn lde_matches_naive() {
test_lde_matches_naive::<BabyBear, Radix2DitParallel>();
}

#[test]
fn coset_lde_matches_naive() {
test_coset_lde_matches_naive::<BabyBear, Radix2DitParallel>();
}

#[test]
fn dft_idft_consistency() {
test_dft_idft_consistency::<BabyBear, Radix2DitParallel>();
}
}
Loading

0 comments on commit 2df15fd

Please sign in to comment.