Coset LDE based on row-oriented DFT (Plonky3#440)

* First draft. * Second draft. * Add root tables; remove dumb reduce; prepare for Monty version. * Use Monty rather than Barrett. * Remove old comments. * Do partial reduction; inline sizes 128 and 256. * Remove Barrett reduc code. * Refactor butterfly. * Use u32 repr rather than i64; misc. tidying. * Working version; initial benchmarking harness. * Working with non-square inputs. * Four-step FFT fiddling. * Move BabyBear FFT to Monty 31 crate. * Remove 'Real' typedef. * Move implementation into MontyField31 struct. * Implement the TwoAdicSubgroupDft trait; move tests to concrete field * More thorough transpose benchmark. * Move `pretty_name` to utils crate; use `pretty_name` in fft benches * Remove unused four-step code. * Tidy up implementation and testing; store precomputed roots. * Tidying. * Remove unused 'backward' transform. * Move `split_at_mut_unchecked` to utils crate; remove unused import. * Clippy. * Remove unnecessary function. * Fix name of algo. * Refactor bitrev & transpose parts of dft * Refactor DFT tests. * Minor simplification. * Fix specification of twiddle table. * Expanded benchmarks. * Remove unnecessary borrows. * Add more tracing information. * Messy but working version of `coset_lde_batch`. * Reduce allocations by removing dependency on `RowMajorMatrix`. * Unsafe scratch initialisation. * Don't apply coset powers to zero elements. * Tidying up; parallelise `scale()`. * Update Keccak AIR examples * Use new FFT in KoalaBear example; misc tidying. * Fix dumb bug. * Rename var. * Switch DIT and DIF for DFT and IDFT; adjust bit-reversals & zeroing; scale and shift at once. * Refactor internal functions; rename some things. * clippy * Remove unused function. * Update some documentation. * Expand first layer of DFT. * Reduce memory consumption. * Specialise inverse roots; unroll radix4; move fn's to utils. * Remove unused fn; comment. * Rename Radix2Dft -> RecursiveDft. * Clean up examples. * Miscellaneous documentation and tidying. * Minor tidying. * cargo fmt * Address review comments. * Fix URL. * `split_at_mut_unchecked` is now available in stable. * Remove comment. * Remove `partial_monty_reduce`; add comments; cargo fmt. * Faster alloc and padding; remove specialised first FFT layer. * Use `transmute` instead of `Vec::set_len`. * "Tidying" * Review comments.
powdr-labs · Sep 6, 2024 · 2df15fd · 2df15fd
1 parent b314769
commit 2df15fd
Show file tree

Hide file tree

Showing 29 changed files with 899 additions and 226 deletions.
diff --git a/baby-bear/Cargo.toml b/baby-bear/Cargo.toml
@@ -18,6 +18,7 @@ serde = { version = "1.0", default-features = false, features = ["derive"] }
 
 [dev-dependencies]
 p3-field-testing = { path = "../field-testing" }
+p3-dft = { path = "../dft" }
 rand = { version = "0.8.5", features = ["min_const_gen"] }
 criterion = "0.5.1"
 rand_chacha = "0.3.1"

diff --git a/baby-bear/src/baby_bear.rs b/baby-bear/src/baby_bear.rs
@@ -66,14 +66,24 @@ impl FieldParameters for BabyBearParameters {
 impl TwoAdicData for BabyBearParameters {
     const TWO_ADICITY: usize = 27;
 
-    type ArrayLike = [BabyBear; Self::TWO_ADICITY + 1];
+    type ArrayLike = &'static [BabyBear];
 
-    const TWO_ADIC_GENERATORS: Self::ArrayLike = BabyBear::new_array([
+    const TWO_ADIC_GENERATORS: Self::ArrayLike = &BabyBear::new_array([
         0x1, 0x78000000, 0x67055c21, 0x5ee99486, 0xbb4c4e4, 0x2d4cc4da, 0x669d6090, 0x17b56c64,
         0x67456167, 0x688442f9, 0x145e952d, 0x4fe61226, 0x4c734715, 0x11c33e2a, 0x62c3d2b1,
         0x77cad399, 0x54c131f4, 0x4cabd6a6, 0x5cf5713f, 0x3e9430e8, 0xba067a3, 0x18adc27d,
         0x21fd55bc, 0x4b859b3d, 0x3bd57996, 0x4483d85a, 0x3a26eef8, 0x1a427a41,
     ]);
+
+    const ROOTS_8: Self::ArrayLike = &BabyBear::new_array([0x5ee99486, 0x67055c21, 0xc9ea3ba]);
+    const INV_ROOTS_8: Self::ArrayLike = &BabyBear::new_array([0x6b615c47, 0x10faa3e0, 0x19166b7b]);
+
+    const ROOTS_16: Self::ArrayLike = &BabyBear::new_array([
+        0xbb4c4e4, 0x5ee99486, 0x4b49e08, 0x67055c21, 0x5376917a, 0xc9ea3ba, 0x563112a7,
+    ]);
+    const INV_ROOTS_16: Self::ArrayLike = &BabyBear::new_array([
+        0x21ceed5a, 0x6b615c47, 0x24896e87, 0x10faa3e0, 0x734b61f9, 0x19166b7b, 0x6c4b3b1d,
+    ]);
 }
 
 impl BinomialExtensionData<4> for BabyBearParameters {
@@ -102,7 +112,7 @@ mod tests {
     use core::array;
 
     use p3_field::{PrimeField32, PrimeField64, TwoAdicField};
-    use p3_field_testing::{test_field, test_two_adic_field};
+    use p3_field_testing::{test_field, test_field_dft, test_two_adic_field};
 
     use super::*;
 
@@ -215,4 +225,13 @@ mod tests {
 
     test_field!(crate::BabyBear);
     test_two_adic_field!(crate::BabyBear);
+
+    test_field_dft!(radix2dit, crate::BabyBear, p3_dft::Radix2Dit<_>);
+    test_field_dft!(bowers, crate::BabyBear, p3_dft::Radix2Bowers);
+    test_field_dft!(parallel, crate::BabyBear, p3_dft::Radix2DitParallel);
+    test_field_dft!(
+        recur_dft,
+        crate::BabyBear,
+        p3_monty_31::dft::RecursiveDft<_>
+    );
 }
diff --git a/circle/benches/cfft.rs b/circle/benches/cfft.rs
@@ -1,5 +1,3 @@
-use std::any::type_name;
-
 use criterion::measurement::Measurement;
 use criterion::{criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion};
 use p3_baby_bear::BabyBear;
@@ -8,18 +6,10 @@ use p3_dft::{Radix2Bowers, Radix2Dit, Radix2DitParallel, TwoAdicSubgroupDft};
 use p3_field::TwoAdicField;
 use p3_matrix::dense::RowMajorMatrix;
 use p3_mersenne_31::Mersenne31;
+use p3_util::pretty_name;
 use rand::distributions::{Distribution, Standard};
 use rand::thread_rng;
 
-fn pretty_name<T>() -> String {
-    let name = type_name::<T>();
-    let mut result = String::new();
-    for qual in name.split_inclusive(&['<', '>', ',']) {
-        result.push_str(qual.split("::").last().unwrap());
-    }
-    result
-}
-
 fn bench_lde(c: &mut Criterion) {
     let log_n = 18;
     let log_w = 8;

diff --git a/dft/Cargo.toml b/dft/Cargo.toml
@@ -13,6 +13,7 @@ tracing = "0.1.37"
 itertools = "0.13.0"
 
 [dev-dependencies]
+p3-monty-31 = { path = "../monty-31" }
 p3-baby-bear = { path = "../baby-bear" }
 p3-goldilocks = { path = "../goldilocks" }
 p3-mersenne-31 = { path = "../mersenne-31" }

diff --git a/dft/benches/fft.rs b/dft/benches/fft.rs
@@ -1,5 +1,3 @@
-use std::any::type_name;
-
 use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
 use p3_baby_bear::BabyBear;
 use p3_dft::{Radix2Bowers, Radix2Dit, Radix2DitParallel, TwoAdicSubgroupDft};
@@ -8,19 +6,22 @@ use p3_field::TwoAdicField;
 use p3_goldilocks::Goldilocks;
 use p3_matrix::dense::RowMajorMatrix;
 use p3_mersenne_31::{Mersenne31, Mersenne31ComplexRadix2Dit, Mersenne31Dft};
+use p3_monty_31::dft::RecursiveDft;
+use p3_util::pretty_name;
 use rand::distributions::{Distribution, Standard};
 use rand::thread_rng;
 
 fn bench_fft(c: &mut Criterion) {
     // log_sizes correspond to the sizes of DFT we want to benchmark;
     // for the DFT over the quadratic extension "Mersenne31Complex" a
     // fairer comparison is to use half sizes, which is the log minus 1.
-    let log_sizes = &[14, 16, 18];
+    let log_sizes = &[14, 16, 18, 20, 22];
     let log_half_sizes = &[13, 15, 17];
 
-    const BATCH_SIZE: usize = 100;
+    const BATCH_SIZE: usize = 256;
 
     fft::<BabyBear, Radix2Dit<_>, BATCH_SIZE>(c, log_sizes);
+    fft::<BabyBear, RecursiveDft<_>, BATCH_SIZE>(c, log_sizes);
     fft::<BabyBear, Radix2Bowers, BATCH_SIZE>(c, log_sizes);
     fft::<BabyBear, Radix2DitParallel, BATCH_SIZE>(c, log_sizes);
     fft::<Goldilocks, Radix2Dit<_>, BATCH_SIZE>(c, log_sizes);
@@ -34,11 +35,13 @@ fn bench_fft(c: &mut Criterion) {
     m31_fft::<Radix2Dit<_>, BATCH_SIZE>(c, log_sizes);
     m31_fft::<Mersenne31ComplexRadix2Dit, BATCH_SIZE>(c, log_sizes);
 
-    ifft::<Goldilocks, Radix2Dit<_>, BATCH_SIZE>(c);
+    ifft::<Goldilocks, Radix2Dit<_>, BATCH_SIZE>(c, log_sizes);
 
-    coset_lde::<BabyBear, Radix2Bowers, BATCH_SIZE>(c);
-    coset_lde::<Goldilocks, Radix2Bowers, BATCH_SIZE>(c);
-    coset_lde::<BabyBear, Radix2DitParallel, BATCH_SIZE>(c);
+    coset_lde::<BabyBear, RecursiveDft<_>, BATCH_SIZE>(c, log_sizes);
+    coset_lde::<BabyBear, Radix2Dit<_>, BATCH_SIZE>(c, log_sizes);
+    coset_lde::<BabyBear, Radix2Bowers, BATCH_SIZE>(c, log_sizes);
+    coset_lde::<BabyBear, Radix2DitParallel, BATCH_SIZE>(c, log_sizes);
+    coset_lde::<Goldilocks, Radix2Bowers, BATCH_SIZE>(c, log_sizes);
 }
 
 fn fft<F, Dft, const BATCH_SIZE: usize>(c: &mut Criterion, log_sizes: &[usize])
@@ -47,10 +50,10 @@ where
     Dft: TwoAdicSubgroupDft<F>,
     Standard: Distribution<F>,
 {
-    let mut group = c.benchmark_group(&format!(
-        "fft::<{}, {}, {}>",
-        type_name::<F>(),
-        type_name::<Dft>(),
+    let mut group = c.benchmark_group(format!(
+        "fft/{}/{}/ncols={}",
+        pretty_name::<F>(),
+        pretty_name::<Dft>(),
         BATCH_SIZE
     ));
     group.sample_size(10);
@@ -75,9 +78,9 @@ where
     Dft: TwoAdicSubgroupDft<Complex<Mersenne31>>,
     Standard: Distribution<Mersenne31>,
 {
-    let mut group = c.benchmark_group(&format!(
+    let mut group = c.benchmark_group(format!(
         "m31_fft::<{}, {}>",
-        type_name::<Dft>(),
+        pretty_name::<Dft>(),
         BATCH_SIZE
     ));
     group.sample_size(10);
@@ -96,22 +99,22 @@ where
     }
 }
 
-fn ifft<F, Dft, const BATCH_SIZE: usize>(c: &mut Criterion)
+fn ifft<F, Dft, const BATCH_SIZE: usize>(c: &mut Criterion, log_sizes: &[usize])
 where
     F: TwoAdicField,
     Dft: TwoAdicSubgroupDft<F>,
     Standard: Distribution<F>,
 {
-    let mut group = c.benchmark_group(&format!(
-        "ifft::<{}, {}, {}>",
-        type_name::<F>(),
-        type_name::<Dft>(),
+    let mut group = c.benchmark_group(format!(
+        "ifft/{}/{}/ncols={}",
+        pretty_name::<F>(),
+        pretty_name::<Dft>(),
         BATCH_SIZE
     ));
     group.sample_size(10);
 
     let mut rng = thread_rng();
-    for n_log in [14, 16, 18] {
+    for n_log in log_sizes {
         let n = 1 << n_log;
 
         let messages = RowMajorMatrix::rand(&mut rng, n, BATCH_SIZE);
@@ -125,22 +128,22 @@ where
     }
 }
 
-fn coset_lde<F, Dft, const BATCH_SIZE: usize>(c: &mut Criterion)
+fn coset_lde<F, Dft, const BATCH_SIZE: usize>(c: &mut Criterion, log_sizes: &[usize])
 where
     F: TwoAdicField,
     Dft: TwoAdicSubgroupDft<F>,
     Standard: Distribution<F>,
 {
-    let mut group = c.benchmark_group(&format!(
-        "coset_lde::<{}, {}, {}>",
-        type_name::<F>(),
-        type_name::<Dft>(),
+    let mut group = c.benchmark_group(format!(
+        "coset_lde/{}/{}/ncols={}",
+        pretty_name::<F>(),
+        pretty_name::<Dft>(),
         BATCH_SIZE
     ));
     group.sample_size(10);
 
     let mut rng = thread_rng();
-    for n_log in [14, 16, 18] {
+    for n_log in log_sizes {
         let n = 1 << n_log;
 
         let messages = RowMajorMatrix::rand(&mut rng, n, BATCH_SIZE);

diff --git a/dft/src/lib.rs b/dft/src/lib.rs
@@ -9,8 +9,6 @@ mod naive;
 mod radix_2_bowers;
 mod radix_2_dit;
 mod radix_2_dit_parallel;
-#[cfg(test)]
-mod testing;
 mod traits;
 mod util;
 

diff --git a/dft/src/radix_2_bowers.rs b/dft/src/radix_2_bowers.rs
@@ -126,48 +126,3 @@ fn butterfly_layer<F: Field, B: Butterfly<F>>(
                 });
         });
 }
-
-#[cfg(test)]
-mod tests {
-    use p3_baby_bear::BabyBear;
-    use p3_goldilocks::Goldilocks;
-
-    use crate::radix_2_bowers::Radix2Bowers;
-    use crate::testing::*;
-
-    #[test]
-    fn dft_matches_naive() {
-        test_dft_matches_naive::<BabyBear, Radix2Bowers>();
-    }
-
-    #[test]
-    fn coset_dft_matches_naive() {
-        test_coset_dft_matches_naive::<BabyBear, Radix2Bowers>();
-    }
-
-    #[test]
-    fn idft_matches_naive() {
-        test_idft_matches_naive::<Goldilocks, Radix2Bowers>();
-    }
-
-    #[test]
-    fn coset_idft_matches_naive() {
-        test_coset_idft_matches_naive::<BabyBear, Radix2Bowers>();
-        test_coset_idft_matches_naive::<Goldilocks, Radix2Bowers>();
-    }
-
-    #[test]
-    fn lde_matches_naive() {
-        test_lde_matches_naive::<BabyBear, Radix2Bowers>();
-    }
-
-    #[test]
-    fn coset_lde_matches_naive() {
-        test_coset_lde_matches_naive::<BabyBear, Radix2Bowers>();
-    }
-
-    #[test]
-    fn dft_idft_consistency() {
-        test_dft_idft_consistency::<BabyBear, Radix2Bowers>();
-    }
-}
diff --git a/dft/src/radix_2_dit.rs b/dft/src/radix_2_dit.rs
@@ -67,48 +67,3 @@ fn dit_layer<F: Field>(mat: &mut RowMajorMatrixViewMut<'_, F>, layer: usize, twi
                 });
         });
 }
-
-#[cfg(test)]
-mod tests {
-    use p3_baby_bear::BabyBear;
-    use p3_goldilocks::Goldilocks;
-
-    use crate::testing::*;
-    use crate::Radix2Dit;
-
-    #[test]
-    fn dft_matches_naive() {
-        test_dft_matches_naive::<BabyBear, Radix2Dit<_>>();
-    }
-
-    #[test]
-    fn coset_dft_matches_naive() {
-        test_coset_dft_matches_naive::<BabyBear, Radix2Dit<_>>();
-    }
-
-    #[test]
-    fn idft_matches_naive() {
-        test_idft_matches_naive::<Goldilocks, Radix2Dit<_>>();
-    }
-
-    #[test]
-    fn coset_idft_matches_naive() {
-        test_coset_idft_matches_naive::<BabyBear, Radix2Dit<_>>();
-        test_coset_idft_matches_naive::<Goldilocks, Radix2Dit<_>>();
-    }
-
-    #[test]
-    fn lde_matches_naive() {
-        test_lde_matches_naive::<BabyBear, Radix2Dit<_>>();
-    }
-
-    #[test]
-    fn coset_lde_matches_naive() {
-        test_coset_lde_matches_naive::<BabyBear, Radix2Dit<_>>();
-    }
-
-    #[test]
-    fn dft_idft_consistency() {
-        test_dft_idft_consistency::<BabyBear, Radix2Dit<_>>();
-    }
-}
diff --git a/dft/src/radix_2_dit_parallel.rs b/dft/src/radix_2_dit_parallel.rs
@@ -191,48 +191,3 @@ fn dit_layer_rev<F: Field>(
         DitButterfly(twiddle).apply_to_rows(lo, hi)
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use p3_baby_bear::BabyBear;
-    use p3_goldilocks::Goldilocks;
-
-    use crate::testing::*;
-    use crate::Radix2DitParallel;
-
-    #[test]
-    fn dft_matches_naive() {
-        test_dft_matches_naive::<BabyBear, Radix2DitParallel>();
-    }
-
-    #[test]
-    fn coset_dft_matches_naive() {
-        test_coset_dft_matches_naive::<BabyBear, Radix2DitParallel>();
-    }
-
-    #[test]
-    fn idft_matches_naive() {
-        test_idft_matches_naive::<Goldilocks, Radix2DitParallel>();
-    }
-
-    #[test]
-    fn coset_idft_matches_naive() {
-        test_coset_idft_matches_naive::<BabyBear, Radix2DitParallel>();
-        test_coset_idft_matches_naive::<Goldilocks, Radix2DitParallel>();
-    }
-
-    #[test]
-    fn lde_matches_naive() {
-        test_lde_matches_naive::<BabyBear, Radix2DitParallel>();
-    }
-
-    #[test]
-    fn coset_lde_matches_naive() {
-        test_coset_lde_matches_naive::<BabyBear, Radix2DitParallel>();
-    }
-
-    #[test]
-    fn dft_idft_consistency() {
-        test_dft_idft_consistency::<BabyBear, Radix2DitParallel>();
-    }
-}