Skip to content

Commit

Permalink
streaming compaction
Browse files Browse the repository at this point in the history
  • Loading branch information
MarinPostma committed Sep 30, 2024
1 parent 912aaf3 commit 06e177f
Show file tree
Hide file tree
Showing 13 changed files with 860 additions and 701 deletions.
3 changes: 3 additions & 0 deletions libsql-wal/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ rand = "0.8.5"
aws-smithy-types-convert = { version = "0.60.8", features = ["convert-chrono"] }
petgraph = "0.6.5"
anyhow = { version = "1.0.86", optional = true }
futures = "0.3.30"
memmap = "0.7.0"
pin-project-lite = "0.2.14"

[dev-dependencies]
criterion = "0.5.1"
Expand Down
16 changes: 13 additions & 3 deletions libsql-wal/src/replication/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use tokio_stream::Stream;
use zerocopy::FromZeroes;

use crate::io::buf::ZeroCopyBoxIoBuf;
use crate::segment::compacted::CompactedFrame;
use crate::segment::Frame;
use crate::storage::backend::FindSegmentReq;
use crate::storage::Storage;
Expand Down Expand Up @@ -65,10 +66,19 @@ where
},
};

let (frame, ret) = segment.read_frame(ZeroCopyBoxIoBuf::new_uninit(Frame::new_box_zeroed()), offset as u32).await;
// TODO: The copy here is inneficient. This is OK for now, until we rewrite
// this code to read from the main db file instead of storage.
let (compacted_frame, ret) = segment.read_frame(ZeroCopyBoxIoBuf::new_uninit(CompactedFrame::new_box_zeroed()), offset as u32).await;
ret?;
let frame = frame.into_inner();
debug_assert_eq!(frame.header().size_after(), 0, "all frames in a compacted segment should have size_after set to 0");
let compacted_frame = compacted_frame.into_inner();
let mut frame = Frame::new_box_zeroed();
frame.data_mut().copy_from_slice(&compacted_frame.data);

let header = frame.header_mut();
header.frame_no = compacted_frame.header().frame_no;
header.size_after = 0.into();
header.page_no = compacted_frame.header().page_no;

if frame.header().frame_no() >= until {
yield frame;
}
Expand Down
140 changes: 118 additions & 22 deletions libsql-wal/src/segment/compacted.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,131 @@
use std::io;
use std::mem::size_of;
use std::mem::{offset_of, size_of};

use chrono::{DateTime, Utc};
use uuid::Uuid;
use zerocopy::little_endian::{U128 as lu128, U16 as lu16, U32 as lu32, U64 as lu64};
use zerocopy::{AsBytes, FromBytes, FromZeroes};

use crate::io::buf::{IoBufMut, ZeroCopyBuf};
use crate::io::FileExt;
use crate::segment::FrameHeader;
use crate::{LIBSQL_MAGIC, LIBSQL_PAGE_SIZE, LIBSQL_WAL_VERSION};

use super::{Frame, Result};
use super::Result;

#[derive(Debug, AsBytes, FromZeroes, FromBytes)]
#[repr(C)]
pub struct CompactedSegmentDataHeader {
pub struct CompactedSegmentHeader {
pub(crate) magic: lu64,
pub(crate) version: lu16,
pub(crate) frame_count: lu32,
pub(crate) segment_id: lu128,
pub(crate) log_id: lu128,
pub(crate) start_frame_no: lu64,
pub(crate) end_frame_no: lu64,
pub(crate) size_after: lu32,
/// for now, always 4096
pub(crate) page_size: lu16,
pub(crate) timestamp: lu64,
}
impl CompactedSegmentDataHeader {

bitflags::bitflags! {
pub struct CompactedFrameFlags: u32 {
/// This flag is set for the last frame in the segment
const LAST = 1 << 0;
}
}

#[derive(Debug, AsBytes, FromZeroes, FromBytes)]
#[repr(C)]
pub struct CompactedFrameHeader {
pub flags: lu32,
pub page_no: lu32,
pub frame_no: lu64,
/// running checksum from this frame
/// this is the crc32 of the checksum of the previous frame and all the frame data, including
/// all the fields before checksum in the header. THIS FIELD MUST ALWAYS BE THE last FIELD IN
/// THE STRUCT
pub checksum: lu32,
}

impl CompactedFrameHeader {
pub fn flags(&self) -> CompactedFrameFlags {
CompactedFrameFlags::from_bits(self.flags.get()).unwrap()
}

pub(crate) fn is_last(&self) -> bool {
self.flags().contains(CompactedFrameFlags::LAST)
}

pub(crate) fn set_last(&mut self) {
let mut flags = self.flags();
flags.insert(CompactedFrameFlags::LAST);
self.flags = flags.bits().into();
}

pub(crate) fn reset_flags(&mut self) {
self.flags = 0.into();
}

pub(crate) fn compute_checksum(&self, previous: u32, data: &[u8]) -> u32 {
assert_eq!(data.len(), LIBSQL_PAGE_SIZE as usize);
let mut h = crc32fast::Hasher::new_with_initial(previous);
h.update(&self.as_bytes()[..offset_of!(Self, checksum)]);
h.update(data);
h.finalize()
}

/// updates the checksum with the previous frame checksum and the frame data
pub(crate) fn update_checksum(&mut self, previous: u32, data: &[u8]) -> u32 {
let checksum = self.compute_checksum(previous, data);
self.checksum = checksum.into();
checksum
}

pub fn checksum(&self) -> u32 {
self.checksum.get()
}

pub fn page_no(&self) -> u32 {
self.page_no.get()
}
}

#[derive(Debug, AsBytes, FromZeroes, FromBytes)]
#[repr(C)]
pub struct CompactedFrame {
pub header: CompactedFrameHeader,
pub data: [u8; LIBSQL_PAGE_SIZE as usize],
}

impl CompactedFrame {
pub fn header(&self) -> &CompactedFrameHeader {
&self.header
}

pub(crate) fn header_mut(&mut self) -> &mut CompactedFrameHeader {
&mut self.header
}
}

impl CompactedSegmentHeader {
pub fn new(
start_frame_no: u64,
end_frame_no: u64,
size_after: u32,
timestamp: DateTime<Utc>,
log_id: Uuid,
) -> Self {
Self {
magic: LIBSQL_MAGIC.into(),
version: LIBSQL_WAL_VERSION.into(),
start_frame_no: start_frame_no.into(),
end_frame_no: end_frame_no.into(),
size_after: size_after.into(),
page_size: LIBSQL_PAGE_SIZE.into(),
timestamp: (timestamp.timestamp_millis() as u64).into(),
log_id: log_id.as_u128().into(),
}
}

fn check(&self) -> Result<()> {
if self.magic.get() != LIBSQL_MAGIC {
return Err(super::Error::InvalidHeaderMagic);
Expand All @@ -47,14 +147,8 @@ impl CompactedSegmentDataHeader {
}
}

#[derive(Debug, AsBytes, FromZeroes, FromBytes)]
#[repr(C)]
pub struct CompactedSegmentDataFooter {
pub(crate) checksum: lu32,
}

pub struct CompactedSegment<F> {
header: CompactedSegmentDataHeader,
header: CompactedSegmentHeader,
file: F,
}

Expand All @@ -69,7 +163,7 @@ impl<F> CompactedSegment<F> {
}
}

pub fn header(&self) -> &CompactedSegmentDataHeader {
pub fn header(&self) -> &CompactedSegmentHeader {
&self.header
}
}
Expand All @@ -79,23 +173,25 @@ impl<F: FileExt> CompactedSegment<F> {
let buf = ZeroCopyBuf::new_uninit();
let (buf, ret) = file.read_exact_at_async(buf, 0).await;
ret?;
let header: CompactedSegmentDataHeader = buf.into_inner();
let header: CompactedSegmentHeader = buf.into_inner();
header.check()?;
Ok(Self { file, header })
}

pub(crate) fn from_parts(file: F, header: CompactedSegmentDataHeader) -> Self {
pub(crate) fn from_parts(file: F, header: CompactedSegmentHeader) -> Self {
Self { header, file }
}

/// read a CompactedFrame from the segment
pub(crate) async fn read_frame<B: IoBufMut + Send + 'static>(
&self,
buf: B,
offset: u32,
) -> (B, io::Result<()>) {
assert_eq!(buf.bytes_init(), 0);
assert_eq!(buf.bytes_total(), size_of::<Frame>());
let offset = size_of::<CompactedSegmentDataHeader>() + size_of::<Frame>() * offset as usize;
assert_eq!(buf.bytes_total(), size_of::<CompactedFrame>());
let offset =
size_of::<CompactedSegmentHeader>() + size_of::<CompactedFrame>() * offset as usize;
let (buf, ret) = self.file.read_exact_at_async(buf, offset as u64).await;
(buf, ret)
}
Expand All @@ -107,9 +203,9 @@ impl<F: FileExt> CompactedSegment<F> {
) -> (B, io::Result<()>) {
assert_eq!(buf.bytes_init(), 0);
assert_eq!(buf.bytes_total(), LIBSQL_PAGE_SIZE as usize);
let offset = size_of::<CompactedSegmentDataHeader>()
+ size_of::<Frame>() * offset as usize
+ size_of::<FrameHeader>();
let offset = size_of::<CompactedSegmentHeader>()
+ size_of::<CompactedFrame>() * offset as usize
+ size_of::<CompactedFrameHeader>();
let (buf, ret) = self.file.read_exact_at_async(buf, offset as u64).await;
(buf, ret)
}
Expand Down
55 changes: 1 addition & 54 deletions libsql-wal/src/segment/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,7 @@ impl SegmentHeader {
}

pub trait Segment: Send + Sync + 'static {
fn compact(
&self,
out_file: &impl FileExt,
id: uuid::Uuid,
) -> impl Future<Output = Result<Vec<u8>>> + Send;
fn compact(&self, out_file: &impl FileExt) -> impl Future<Output = Result<Vec<u8>>> + Send;
fn start_frame_no(&self) -> u64;
fn last_committed(&self) -> u64;
fn index(&self) -> &fst::Map<Arc<[u8]>>;
Expand All @@ -177,55 +173,6 @@ pub trait Segment: Send + Sync + 'static {
fn destroy<IO: Io>(&self, io: &IO) -> impl Future<Output = ()>;
}

impl<T: Segment> Segment for Arc<T> {
fn compact(
&self,
out_file: &impl FileExt,
id: uuid::Uuid,
) -> impl Future<Output = Result<Vec<u8>>> + Send {
self.as_ref().compact(out_file, id)
}

fn start_frame_no(&self) -> u64 {
self.as_ref().start_frame_no()
}

fn last_committed(&self) -> u64 {
self.as_ref().last_committed()
}

fn index(&self) -> &fst::Map<Arc<[u8]>> {
self.as_ref().index()
}

fn read_page(&self, page_no: u32, max_frame_no: u64, buf: &mut [u8]) -> io::Result<bool> {
self.as_ref().read_page(page_no, max_frame_no, buf)
}

fn is_checkpointable(&self) -> bool {
self.as_ref().is_checkpointable()
}

fn size_after(&self) -> u32 {
self.as_ref().size_after()
}

async fn read_frame_offset_async<B>(&self, offset: u32, buf: B) -> (B, Result<()>)
where
B: IoBufMut + Send + 'static,
{
self.as_ref().read_frame_offset_async(offset, buf).await
}

fn destroy<IO: Io>(&self, io: &IO) -> impl Future<Output = ()> {
self.as_ref().destroy(io)
}

fn timestamp(&self) -> DateTime<Utc> {
self.as_ref().timestamp()
}
}

#[repr(C)]
#[derive(Debug, zerocopy::AsBytes, zerocopy::FromBytes, zerocopy::FromZeroes)]
pub struct FrameHeader {
Expand Down
Loading

0 comments on commit 06e177f

Please sign in to comment.