Skip to content

Commit

Permalink
clean clean clean (#1626)
Browse files Browse the repository at this point in the history
  • Loading branch information
hermeGarcia authored Nov 30, 2023
1 parent c83ed77 commit 0dce135
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 113 deletions.
41 changes: 20 additions & 21 deletions nucliadb_vectors/src/data_point/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use std::collections::{HashMap, HashSet};
use std::time::SystemTime;
use std::{fs, io, path};

use data_store::Slot;
use data_store::Interpreter;
use disk_hnsw::DiskHnsw;
use io::{BufWriter, Write};
use memmap2::Mmap;
Expand Down Expand Up @@ -236,7 +236,7 @@ impl<'a, Dlog: DeleteLog> Retriever<'a, Dlog> {
nodes,
delete_log,
similarity,
no_nodes: data_store::number_of_written_slots(nodes),
no_nodes: data_store::stored_elements(nodes),
min_score,
}
}
Expand Down Expand Up @@ -344,15 +344,7 @@ impl Elem {
}
}

impl data_store::Data for Elem {
fn serialized_len(&self) -> usize {
Node::serialized_len(
&self.key,
&self.vector,
&self.labels.0,
self.metadata.as_ref(),
)
}
impl data_store::IntoBuffer for Elem {
fn serialize_into<W: io::Write>(self, w: W) -> io::Result<()> {
Node::serialize_into(
w,
Expand Down Expand Up @@ -441,7 +433,7 @@ impl AsRef<DataPoint> for DataPoint {

impl DataPoint {
pub fn stored_len(&self) -> Option<u64> {
if data_store::number_of_written_slots(&self.nodes) == 0 {
if data_store::stored_elements(&self.nodes) == 0 {
return None;
}
let node = data_store::get_value(Node, &self.nodes, 0);
Expand All @@ -454,11 +446,18 @@ impl DataPoint {
self.journal
}
pub fn get_keys<Dlog: DeleteLog>(&self, delete_log: &Dlog) -> Vec<String> {
data_store::get_keys(Node, &self.nodes)
.filter(|k| !delete_log.is_deleted(k))
.map(String::from_utf8_lossy)
.map(|s| s.to_string())
.collect()
let node_storage = &self.nodes;
let length = data_store::stored_elements(node_storage);
let data_iterator = (0..length).map(|i| data_store::get_value(Node, node_storage, i));
let mut keys = Vec::new();
for data in data_iterator {
if !delete_log.is_deleted(data) {
let raw_key = Node.get_key(data);
let string_key = String::from_utf8_lossy(raw_key).to_string();
keys.push(string_key);
}
}
keys
}

#[allow(clippy::too_many_arguments)]
Expand All @@ -481,7 +480,7 @@ impl DataPoint {
min_score,
);

let no_nodes = data_store::number_of_written_slots(&self.nodes);
let no_nodes = data_store::stored_elements(&self.nodes);

let filter = FormulaFilter::new(
filter,
Expand Down Expand Up @@ -542,7 +541,7 @@ impl DataPoint {
.collect();
data_store::merge(&mut nodes, &node_producers)?;
let nodes = unsafe { Mmap::map(&nodes)? };
let no_nodes = data_store::number_of_written_slots(&nodes);
let no_nodes = data_store::stored_elements(&nodes);

// Creating the FSTs with the new nodes
let (label_index, key_index) = if channel == Channel::EXPERIMENTAL {
Expand Down Expand Up @@ -651,7 +650,7 @@ impl DataPoint {
root_dir: &path::Path,
nodes: &[u8],
) -> VectorR<(Option<LabelIndex>, Option<KeyIndex>)> {
let no_nodes = data_store::number_of_written_slots(nodes);
let no_nodes = data_store::stored_elements(nodes);

// building the KeyIndex and LabelIndex FSTs
let fst_dir = root_dir.join(file_names::FST);
Expand Down Expand Up @@ -731,7 +730,7 @@ impl DataPoint {
// Nodes are stored on disk and mmaped.
data_store::create_key_value(&mut nodesf, elems)?;
let nodes = unsafe { Mmap::map(&nodesf)? };
let no_nodes = data_store::number_of_written_slots(&nodes);
let no_nodes = data_store::stored_elements(&nodes);

// Creating the FSTs
let (label_index, key_index) = if channel == Channel::EXPERIMENTAL {
Expand Down
9 changes: 2 additions & 7 deletions nucliadb_vectors/src/data_point/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

use std::io;

use crate::data_types::data_store::Slot;
use crate::data_types::data_store::Interpreter;
use crate::data_types::trie;
use crate::data_types::usize_utils::*;

Expand Down Expand Up @@ -163,11 +163,7 @@ impl Node {
trie::has_word(&x[xlabel_ptr..], label)
}
}
impl Slot for Node {
fn cmp_keys(&self, x: &[u8], key: &[u8]) -> std::cmp::Ordering {
let xkey = self.get_key(x);
xkey.cmp(key)
}
impl Interpreter for Node {
fn get_key<'a>(&self, x: &'a [u8]) -> &'a [u8] {
Self::key(x)
}
Expand Down Expand Up @@ -269,7 +265,6 @@ mod tests {
assert_eq!(Node::vector(&buf[node2..]), vector2);
assert!(Node.keep_in_merge(&buf[node1..]));
assert!(Node.keep_in_merge(&buf[node2..]));
assert_eq!(Node.cmp_slot(&buf[node1..], &buf[node2..]), key1.cmp(key2));
assert_eq!(Node::metadata(&buf[node1..]), metadata1);
assert_eq!(Node::metadata(&buf[node2..]), metadata2);
assert_eq!(
Expand Down
Loading

0 comments on commit 0dce135

Please sign in to comment.