From e8ee08a19b663481f78595d831a4ce31bc7a3a14 Mon Sep 17 00:00:00 2001 From: lovasoa Date: Fri, 12 Apr 2024 22:41:04 +0200 Subject: [PATCH] add benchmark --- Cargo.toml | 8 + README.md | 40 +++ benches/lines.svg | 277 +++++++++++++++++ benches/violin.svg | 741 +++++++++++++++++++++++++++++++++++++++++++++ benches/vs_json.rs | 81 +++++ 5 files changed, 1147 insertions(+) create mode 100644 benches/lines.svg create mode 100644 benches/violin.svg create mode 100644 benches/vs_json.rs diff --git a/Cargo.toml b/Cargo.toml index 436eb8c..1b4a4e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,3 +23,11 @@ default = ["serde_json"] [dev-dependencies] serde_derive = "1.0" rusqlite = { version = "0.31", features = ["bundled", "blob"] } +criterion = { version = "0.4", features = ["html_reports"] } + +[profile.bench] +debug = 1 + +[[bench]] +name = "vs_json" +harness = false \ No newline at end of file diff --git a/README.md b/README.md index 0362ad3..468b17b 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,46 @@ This crate provides a custom Serde deserializer for SQLite JSONB columns. It was initially developed for inclusion in the [SQLPage](https://github.com/lovasoa/SQLpage) website builder. +## Why + +Since version 3.45.0, SQLite supports JSONB columns, +which can store JSON data in a binary format that is more efficient +to manipulate than JSON. + +The problem is that applications that use SQLite currently need +to convert the data from JSONB to JSON, and then from JSON to their +own data structures to use it. +This prevents reading the blob data directly from the +database in a streaming fashion using SQLite's blob streaming API, +and requires making a SQL query to extract and convert the data to JSON. + +This crate provides a custom Serde serializer and deserializer +for JSONB directly, which allows skipping the JSON conversion step. + +This can lead to significant performance improvements in certain scenarios, +as demonstrated in this crate's benchmarks. + +### Benchmarks + +These graphs show the time taken: + - deserializing a JSONB column directly to a struct using this crate + - making a SQL query to extract the JSONB column as JSON and then deserializing it to a struct using serde_json. + +The data being deserialized contains a string, the length of which varies from 50 to 1000 characters, to demonstrate the evolution of the performance with the size of the data. + +
+ +![Benchmark results](./benches/lines.svg) +![Benchmark results](./benches/violin.svg) + +
+ +> *Disclaimer*: These benchmarks should always be taken with a grain of salt. +> When performance matters, you should measure the performance +> inside your own application with your own data. +> `serde_json` is very well optimized and can be faster than this crate +> in some scenarios, especially when the JSON data is small. + ## Crate features The binary format can contain raw json data, so this crate depends on the `serde_json` crate to parse the JSON data. diff --git a/benches/lines.svg b/benches/lines.svg new file mode 100644 index 0000000..91109fc --- /dev/null +++ b/benches/lines.svg @@ -0,0 +1,277 @@ + + + +Gnuplot +Produced by GNUPLOT 5.4 patchlevel 2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 2 + + + + + + + + + + + + + 3 + + + + + + + + + + + + + 4 + + + + + + + + + + + + + 5 + + + + + + + + + + + + + 6 + + + + + + + + + + + + + 7 + + + + + + + + + + + + + 8 + + + + + + + + + + + + + 9 + + + + + 0 + + + + + 100 + + + + + 200 + + + + + 300 + + + + + 400 + + + + + 500 + + + + + 600 + + + + + 700 + + + + + 800 + + + + + 900 + + + + + 1000 + + + + + + + + + Average time (µs) + + + + + Input + + + + + convert to json then deserialize + + + + + convert to json then deserialize + + + + + + gnuplot_plot_2 + + + + + + + + + + deserialize jsonb directly from blob + + + + + deserialize jsonb directly from blob + + + + + + gnuplot_plot_4 + + + + + + + + + + + + + + + + + + + + reading a stored jsonb blob: Comparison + + + + + + + diff --git a/benches/violin.svg b/benches/violin.svg new file mode 100644 index 0000000..9a59658 --- /dev/null +++ b/benches/violin.svg @@ -0,0 +1,741 @@ + + + +Gnuplot +Produced by GNUPLOT 5.4 patchlevel 2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + reading a stored jsonb blob/deserialize jsonb directly from blob/1000 + + + + + reading a stored jsonb blob/deserialize jsonb directly from blob/500 + + + + + reading a stored jsonb blob/deserialize jsonb directly from blob/100 + + + + + reading a stored jsonb blob/deserialize jsonb directly from blob/50 + + + + + reading a stored jsonb blob/convert to json then deserialize/1000 + + + + + reading a stored jsonb blob/convert to json then deserialize/500 + + + + + reading a stored jsonb blob/convert to json then deserialize/100 + + + + + reading a stored jsonb blob/convert to json then deserialize/50 + + + + + + + + + + + + + 0 + + + + + + + + + + + + + 5 + + + + + + + + + + + + + 10 + + + + + + + + + + + + + 15 + + + + + + + + + + + + + 20 + + + + + + + + + Input + + + + + Average time (µs) + + + + + PDF + + + PDF + + + + + + + + + + gnuplot_plot_2 + + + + + + + gnuplot_plot_3 + + + + + + + gnuplot_plot_4 + + + + + + + gnuplot_plot_5 + + + + + + + gnuplot_plot_6 + + + + + + + + + gnuplot_plot_7 + + + + + + + gnuplot_plot_8 + + + + + + + + + + + + + + + + + reading a stored jsonb blob: Violin plot + + + + + + + diff --git a/benches/vs_json.rs b/benches/vs_json.rs new file mode 100644 index 0000000..e47576c --- /dev/null +++ b/benches/vs_json.rs @@ -0,0 +1,81 @@ +use std::io::BufReader; + +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use rusqlite::{Connection, DatabaseName}; +use serde_derive::{Deserialize, Serialize}; + +#[derive(Debug, PartialEq, Deserialize, Serialize)] +struct Person { + id: usize, + name: String, + phone_numbers: Vec, + active: bool, + data: String, +} + +fn convert_to_json_then_deserialize(conn: &Connection) -> Person { + let json_str: String = conn + .query_row( + "SELECT json(data) from bigdata where id=?", // convert jsonb to json + [42], + |row| row.get(0), + ) + .unwrap(); + serde_json::from_str(&json_str).unwrap() +} + +fn deserialize_jsonb_directly_from_blob(conn: &Connection) -> Person { + let my_blob = conn + .blob_open(DatabaseName::Main, "bigdata", "data", 42, true) + .unwrap(); + let buffered = BufReader::new(my_blob); + serde_sqlite_jsonb::from_reader(buffered).unwrap() +} + +fn insert_big_data(conn: &Connection, data_size: usize) { + let person = Person { + id: 123, + name: "John Doe".to_string(), + phone_numbers: vec!["1234".to_string()], + active: true, + data: "x".repeat(data_size), + }; + conn.execute( + "INSERT OR REPLACE INTO bigdata (id, data) VALUES (42, ?)", + [serde_sqlite_jsonb::to_vec(&person).unwrap()], + ) + .unwrap(); +} + +fn bench_deserialize_json_vs_jsonb(c: &mut Criterion) { + let conn = Connection::open_in_memory().unwrap(); + // Store a large json string as a jsonb blob in a table + conn.execute_batch( + "create table bigdata (id integer primary key, data blob)", + ) + .unwrap(); + + let mut group = c.benchmark_group("reading a stored jsonb blob"); + for data_size in [50, 100, 500, 1000].iter() { + group.bench_with_input( + BenchmarkId::new("convert to json then deserialize", data_size), + data_size, + |b, data_size| { + insert_big_data(&conn, *data_size); + b.iter(|| convert_to_json_then_deserialize(&conn)) + }, + ); + group.bench_with_input( + BenchmarkId::new("deserialize jsonb directly from blob", data_size), + data_size, + |b, data_size| { + insert_big_data(&conn, *data_size); + b.iter(|| deserialize_jsonb_directly_from_blob(&conn)) + }, + ); + } + group.finish(); +} + +criterion_group!(benches, bench_deserialize_json_vs_jsonb); +criterion_main!(benches);