Skip to content

Commit

Permalink
feat: criterion-table
Browse files Browse the repository at this point in the history
  • Loading branch information
0xWOLAND committed Nov 19, 2023
1 parent fdbc63c commit 1a34dab
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 23 deletions.
58 changes: 58 additions & 0 deletions BENCHMARKS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Benchmarks

## Table of Contents

- [Overview](#overview)
- [Benchmark Results](#benchmark-results)
- [Number-Theoretic Transform Benchmarks](#number-theoretic-transform-benchmarks)
- [Polynomial Multiplication Benchmarks](#polynomial-multiplication-benchmarks)

## Overview

This benchmark comparison report shows the difference in performance between parallel, NTT-based and serial, brute-force
polynomial multiplication algorithms. Each entry in the first table.

Computer Stats:

```
CPU(s): 16
Thread(s) per core: 2
Core(s) per socket: 8
Socket(s): 1
```

## Benchmark Results

### Number-Theoretic Transform Benchmarks

| | `NTT` |
|:------------|:-------------------------- |
| **`64`** | `202.26 us` (✅ **1.00x**) |
| **`128`** | `354.08 us` (✅ **1.00x**) |
| **`256`** | `665.54 us` (✅ **1.00x**) |
| **`512`** | `1.12 ms` (✅ **1.00x**) |
| **`1024`** | `2.00 ms` (✅ **1.00x**) |
| **`2048`** | `3.94 ms` (✅ **1.00x**) |
| **`4096`** | `7.69 ms` (✅ **1.00x**) |
| **`8192`** | `16.13 ms` (✅ **1.00x**) |
| **`16384`** | `34.01 ms` (✅ **1.00x**) |
| **`32768`** | `74.65 ms` (✅ **1.00x**) |

### Polynomial Multiplication Benchmarks

| | `NTT-Based` | `Brute-Force` |
|:------------|:--------------------------|:---------------------------------- |
| **`64`** | `1.18 ms` (✅ **1.00x**) | `48.62 us` (🚀 **24.21x faster**) |
| **`128`** | `2.30 ms` (✅ **1.00x**) | `198.30 us` (🚀 **11.59x faster**) |
| **`256`** | `3.54 ms` (✅ **1.00x**) | `766.71 us` (🚀 **4.62x faster**) |
| **`512`** | `6.50 ms` (✅ **1.00x**) | `3.11 ms` (🚀 **2.09x faster**) |
| **`1024`** | `12.43 ms` (✅ **1.00x**) | `12.34 ms` (✅ **1.01x faster**) |
| **`2048`** | `24.68 ms` (✅ **1.00x**) | `49.90 ms` (❌ *2.02x slower*) |
| **`4096`** | `51.36 ms` (✅ **1.00x**) | `200.91 ms` (❌ *3.91x slower*) |
| **`8192`** | `106.21 ms` (✅ **1.00x**) | `803.87 ms` (❌ *7.57x slower*) |
| **`16384`** | `226.19 ms` (✅ **1.00x**) | `3.24 s` (❌ *14.31x slower*) |
| **`32768`** | `467.75 ms` (✅ **1.00x**) | `12.75 s` (❌ *27.25x slower*) |

---
Made with [criterion-table](https://github.com/nu11ptr/criterion-table)

43 changes: 20 additions & 23 deletions benches/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,41 +29,38 @@ fn bench_forward(n: usize, c: &Constants) {
}

fn criterion_forward(c: &mut Criterion) {
let mut group = c.benchmark_group("bench_forward");
(6..deg).for_each(|x| {
group.bench_function(BenchmarkId::from_parameter(x), |b| {
let c = working_modulus(BigInt::from(x), BigInt::from(2 * x + 1));
b.iter(|| bench_forward(black_box(1 << x), black_box(&c)))
let mut group = c.benchmark_group("Number-Theoretic Transform Benchmarks");
(6..deg).for_each(|n| {
let id = BenchmarkId::new("NTT", 1 << n);
let c = working_modulus(BigInt::from(n), BigInt::from(2 * n + 1));
group.bench_with_input(id, &n, |b, n| {
b.iter(|| bench_forward(black_box(1 << n), black_box(&c)))
});
});
}

fn criterion_mul(c: &mut Criterion) {
let mut group = c.benchmark_group("bench_mul");
(6..deg).for_each(|x| {
group.bench_function(BenchmarkId::from_parameter(x), |b| {
let N = BigInt::from((2 * x as usize).next_power_of_two());
let M = N << 1 + 1;
let c = working_modulus(N, M);
b.iter(|| bench_mul(black_box(1 << x), black_box(1 << x), black_box(&c)))
fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("Polynomial Multiplication Benchmarks");

(6..deg).for_each(|n| {
let id = BenchmarkId::new("NTT-Based", 1 << n);
let N = BigInt::from((2 * n).next_power_of_two());
let M = N << 1 + 1;
let c = working_modulus(N, M);
group.bench_with_input(id, &n, |b, n| {
b.iter(|| bench_mul(black_box(1 << n), black_box(1 << n), black_box(&c)))
});
});
group.finish();
}

fn criterion_brute_mul(c: &mut Criterion) {
let mut group = c.benchmark_group("bench_brute_mul");
(6..deg).for_each(|x| {
group.bench_function(BenchmarkId::from_parameter(x), |b| {
b.iter(|| bench_mul_brute(black_box(1 << x), black_box(1 << x)))
let id = BenchmarkId::new("Brute-Force", 1 << n);
group.bench_with_input(id, &n, |b, n| {
b.iter(|| bench_mul_brute(black_box(1 << n), black_box(1 << n)))
});
});
group.finish();
}

criterion_group! {
name = benches;
config = Criterion::default().sample_size(10);
targets = criterion_forward, criterion_mul, criterion_brute_mul
targets = criterion_forward, criterion_benchmark
}
criterion_main!(benches);
17 changes: 17 additions & 0 deletions tables.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[top_comments]
Overview = """
This benchmark comparison report shows the difference in performance between parallel, NTT-based and serial, brute-force
polynomial multiplication algorithms. Each entry in the first table.
Computer Stats:
```
CPU(s): 16
Thread(s) per core: 2
Core(s) per socket: 8
Socket(s): 1
```
"""

[table_comments]
criterion_benchmark = """"""

0 comments on commit 1a34dab

Please sign in to comment.