Skip to content

Commit

Permalink
perf: improve apply update performance (y-crdt/y-octo#34)
Browse files Browse the repository at this point in the history
this PR partially solves y-crdt/y-octo##33 and greatly improves the performance of
apply update
test cases show a performance improvement of about 40-50%
  • Loading branch information
darkskygit committed Jan 31, 2024
1 parent 49a6b7a commit 7428401
Show file tree
Hide file tree
Showing 23 changed files with 244 additions and 108 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions libs/jwst-codec-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,9 @@ name = "text_ops_benchmarks"
harness = false
name = "update_benchmarks"

[[bench]]
harness = false
name = "apply_benchmarks"

[lib]
bench = true
35 changes: 35 additions & 0 deletions libs/jwst-codec-utils/benches/apply_benchmarks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
mod utils;

use std::time::Duration;

use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use path_ext::PathExt;
use utils::Files;

fn apply(c: &mut Criterion) {
let files = Files::load();

let mut group = c.benchmark_group("apply");
group.measurement_time(Duration::from_secs(15));

for file in &files.files {
group.throughput(Throughput::Bytes(file.content.len() as u64));
group.bench_with_input(
BenchmarkId::new("apply with yrs", file.path.name_str()),
&file.content,
|b, content| {
b.iter(|| {
use yrs::{updates::decoder::Decode, Doc, Transact, Update};
let update = Update::decode_v1(content).unwrap();
let doc = Doc::new();
doc.transact_mut().apply_update(update);
});
},
);
}

group.finish();
}

criterion_group!(benches, apply);
criterion_main!(benches);
5 changes: 5 additions & 0 deletions libs/jwst-codec/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ version = "0.1.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
ahash = "0.8"
bitvec = "1.0"
byteorder = "1.5"
nom = "7.1"
Expand Down Expand Up @@ -77,5 +78,9 @@ name = "text_ops_benchmarks"
harness = false
name = "update_benchmarks"

[[bench]]
harness = false
name = "apply_benchmarks"

[lib]
bench = true
34 changes: 34 additions & 0 deletions libs/jwst-codec/benches/apply_benchmarks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
mod utils;

use std::time::Duration;

use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use path_ext::PathExt;
use utils::Files;

fn apply(c: &mut Criterion) {
let files = Files::load();

let mut group = c.benchmark_group("apply");
group.measurement_time(Duration::from_secs(15));

for file in &files.files {
group.throughput(Throughput::Bytes(file.content.len() as u64));
group.bench_with_input(
BenchmarkId::new("apply with jwst", file.path.name_str()),
&file.content,
|b, content| {
b.iter(|| {
use y_octo::*;
let mut doc = Doc::new();
doc.apply_update_from_binary(content.clone()).unwrap()
});
},
);
}

group.finish();
}

criterion_group!(benches, apply);
criterion_main!(benches);
2 changes: 1 addition & 1 deletion libs/jwst-codec/src/doc/codec/any.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{collections::HashMap, fmt, ops::RangeInclusive};
use std::{fmt, ops::RangeInclusive};

use ordered_float::OrderedFloat;

Expand Down
3 changes: 2 additions & 1 deletion libs/jwst-codec/src/doc/codec/content.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ impl Content {
match self {
Self::Deleted(len) => *len,
Self::Json(strings) => strings.len() as u64,
Self::String(string) => string.encode_utf16().count() as u64,
// TODO: need a custom wrapper with length cached, this cost too much
Self::String(string) => string.chars().map(|c| c.len_utf16()).sum::<usize>() as u64,
Self::Any(any) => any.len() as u64,
Self::Binary(_) | Self::Embed(_) | Self::Format { .. } | Self::Type(_) | Self::Doc { .. } => 1,
}
Expand Down
19 changes: 9 additions & 10 deletions libs/jwst-codec/src/doc/codec/delete_set.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::{
collections::{hash_map::Entry, HashMap},
collections::{hash_map::Entry, VecDeque},
ops::{Deref, DerefMut, Range},
};

Expand Down Expand Up @@ -28,10 +28,10 @@ impl<R: CrdtReader> CrdtRead<R> for OrderRange {
if num_of_deletes == 1 {
Ok(OrderRange::Range(Range::<u64>::read(decoder)?))
} else {
let mut deletes = Vec::with_capacity(num_of_deletes);
let mut deletes = VecDeque::with_capacity(num_of_deletes);

for _ in 0..num_of_deletes {
deletes.push(Range::<u64>::read(decoder)?);
deletes.push_back(Range::<u64>::read(decoder)?);
}

Ok(OrderRange::Fragment(deletes))
Expand Down Expand Up @@ -59,10 +59,10 @@ impl<W: CrdtWriter> CrdtWrite<W> for OrderRange {
}

#[derive(Debug, Default, Clone, PartialEq)]
pub struct DeleteSet(pub HashMap<Client, OrderRange>);
pub struct DeleteSet(pub ClientMap<OrderRange>);

impl Deref for DeleteSet {
type Target = HashMap<Client, OrderRange>;
type Target = ClientMap<OrderRange>;

fn deref(&self) -> &Self::Target {
&self.0
Expand All @@ -71,7 +71,7 @@ impl Deref for DeleteSet {

impl<const N: usize> From<[(Client, Vec<Range<u64>>); N]> for DeleteSet {
fn from(value: [(Client, Vec<Range<u64>>); N]) -> Self {
let mut map = HashMap::with_capacity(N);
let mut map = ClientMap::with_capacity(N);
for (client, ranges) in value {
map.insert(client, ranges.into());
}
Expand Down Expand Up @@ -106,7 +106,6 @@ impl DeleteSet {
}
}

#[allow(dead_code)]
pub fn batch_push(&mut self, client: Client, ranges: Vec<Range<u64>>) {
match self.0.entry(client) {
Entry::Occupied(e) => {
Expand Down Expand Up @@ -136,7 +135,7 @@ impl<R: CrdtReader> CrdtRead<R> for DeleteSet {
fn read(decoder: &mut R) -> JwstCodecResult<Self> {
let num_of_clients = decoder.read_var_u64()? as usize;
// See: [HASHMAP_SAFE_CAPACITY]
let mut map = HashMap::with_capacity(num_of_clients.min(HASHMAP_SAFE_CAPACITY));
let mut map = ClientMap::with_capacity(num_of_clients.min(HASHMAP_SAFE_CAPACITY));

for _ in 0..num_of_clients {
let client = decoder.read_var_u64()?;
Expand Down Expand Up @@ -188,7 +187,7 @@ mod tests {
{
let mut delete_set = delete_set;
delete_set.add(1, 5, 10);
assert_eq!(delete_set.get(&1), Some(&OrderRange::Fragment(vec![0..15, 20..30])));
assert_eq!(delete_set.get(&1), Some(&OrderRange::from(vec![0..15, 20..30])));
}
}

Expand All @@ -210,7 +209,7 @@ mod tests {
{
let mut delete_set = delete_set;
delete_set.batch_push(1, vec![40..50, 10..20]);
assert_eq!(delete_set.get(&1), Some(&OrderRange::Fragment(vec![0..30, 40..50])));
assert_eq!(delete_set.get(&1), Some(&OrderRange::from(vec![0..30, 40..50])));
}
}

Expand Down
21 changes: 9 additions & 12 deletions libs/jwst-codec/src/doc/codec/update.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,17 @@
use std::{
collections::{HashMap, VecDeque},
ops::Range,
};
use std::{collections::VecDeque, ops::Range};

use super::*;
use crate::doc::StateVector;

#[derive(Debug, Default, Clone)]
pub struct Update {
pub(crate) structs: HashMap<u64, VecDeque<Node>>,
pub(crate) structs: ClientMap<VecDeque<Node>>,
pub(crate) delete_set: DeleteSet,

/// all unapplicable items that we can't integrate into doc
/// any item with inconsistent id clock or missing dependency will be put
/// here
pub(crate) pending_structs: HashMap<Client, VecDeque<Node>>,
pub(crate) pending_structs: ClientMap<VecDeque<Node>>,
/// missing state vector after applying updates
pub(crate) missing_state: StateVector,
/// all unapplicable delete set
Expand All @@ -26,7 +23,7 @@ impl<R: CrdtReader> CrdtRead<R> for Update {
let num_of_clients = decoder.read_var_u64()? as usize;

// See: [HASHMAP_SAFE_CAPACITY]
let mut map = HashMap::with_capacity(num_of_clients.min(HASHMAP_SAFE_CAPACITY));
let mut map = ClientMap::with_capacity(num_of_clients.min(HASHMAP_SAFE_CAPACITY));
for _ in 0..num_of_clients {
let num_of_structs = decoder.read_var_u64()? as usize;
let client = decoder.read_var_u64()?;
Expand Down Expand Up @@ -531,7 +528,7 @@ mod tests {
fn test_update_iterator() {
loom_model!({
let mut update = Update {
structs: HashMap::from([
structs: ClientMap::from_iter([
(
0,
VecDeque::from([
Expand Down Expand Up @@ -571,7 +568,7 @@ mod tests {
loom_model!({
let mut update = Update {
// an item with higher sequence id than local state
structs: HashMap::from([(0, VecDeque::from([struct_item((0, 4), 1)]))]),
structs: ClientMap::from_iter([(0, VecDeque::from([struct_item((0, 4), 1)]))]),
..Update::default()
};

Expand Down Expand Up @@ -623,7 +620,7 @@ mod tests {
fn should_add_skip_when_clock_not_continuous() {
loom_model!({
let update = Update {
structs: HashMap::from([(
structs: ClientMap::from_iter([(
0,
VecDeque::from([
struct_item((0, 0), 1),
Expand Down Expand Up @@ -655,7 +652,7 @@ mod tests {
fn merged_update_should_not_be_released_in_next_turn() {
loom_model!({
let update = Update {
structs: HashMap::from([(
structs: ClientMap::from_iter([(
0,
VecDeque::from([
struct_item((0, 0), 1),
Expand All @@ -670,7 +667,7 @@ mod tests {
let merged = Update::merge([update]);

let update2 = Update {
structs: HashMap::from([(
structs: ClientMap::from_iter([(
0,
VecDeque::from([struct_item((0, 30), 1), Node::new_gc((0, 32).into(), 1)]),
)]),
Expand Down
2 changes: 2 additions & 0 deletions libs/jwst-codec/src/doc/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ mod state;
pub use range::*;
pub use somr::*;
pub use state::*;

use super::*;
Loading

0 comments on commit 7428401

Please sign in to comment.