Skip to content

Commit

Permalink
fix hnsw param use
Browse files Browse the repository at this point in the history
  • Loading branch information
hermeGarcia committed Jan 8, 2024
1 parent 4a7bb24 commit eae6515
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 20 deletions.
17 changes: 7 additions & 10 deletions nucliadb_vectors/src/data_point/disk_hnsw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ impl<'a> Iterator for EdgeIter<'a> {
let edge = f32_from_le_bytes(&buf[crnt..(crnt + EDGE_LEN)]);
crnt += EDGE_LEN;
self.crnt = crnt;
Some((Address(node), Edge { dist: edge }))
Some((Address(node), edge))
}
}
}
Expand All @@ -138,7 +138,7 @@ impl DiskHnsw {
length += USIZE_LEN;
for (cnx, edge) in hnsw.get_layer(layer).get_out_edges(node) {
buf.write_all(&cnx.0.to_le_bytes())?;
buf.write_all(&edge.dist.to_le_bytes())?;
buf.write_all(&edge.to_le_bytes())?;
length += CNX_LEN;
}
}
Expand Down Expand Up @@ -242,9 +242,9 @@ mod tests {
fn hnsw_test() {
let no_nodes = 3;
let cnx0 = vec![
vec![(Address(1), Edge { dist: 1.0 })],
vec![(Address(2), Edge { dist: 2.0 })],
vec![(Address(3), Edge { dist: 3.0 })],
vec![(Address(1), 1.0)],
vec![(Address(2), 2.0)],
vec![(Address(3), 3.0)],
];
let layer0 = RAMLayer {
out: cnx0
Expand All @@ -253,18 +253,15 @@ mod tests {
.map(|(i, c)| (Address(i), c.clone()))
.collect(),
};
let cnx1 = vec![
vec![(Address(1), Edge { dist: 4.0 })],
vec![(Address(2), Edge { dist: 5.0 })],
];
let cnx1 = vec![vec![(Address(1), 4.0)], vec![(Address(2), 5.0)]];
let layer1 = RAMLayer {
out: cnx1
.iter()
.enumerate()
.map(|(i, c)| (Address(i), c.clone()))
.collect(),
};
let cnx2 = vec![vec![(Address(1), Edge { dist: 6.0 })]];
let cnx2 = vec![vec![(Address(1), 6.0)]];
let layer2 = RAMLayer {
out: cnx2
.iter()
Expand Down
11 changes: 6 additions & 5 deletions nucliadb_vectors/src/data_point/ops_hnsw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
k_neighbours: usize,
mut candidates: Vec<(Address, Edge)>,
) -> Vec<(Address, Edge)> {
candidates.sort_unstable_by_key(|(n, d)| std::cmp::Reverse(Cnx(*n, d.dist)));
candidates.sort_unstable_by_key(|(n, d)| std::cmp::Reverse(Cnx(*n, *d)));
candidates.dedup_by_key(|(addr, _)| *addr);
candidates.truncate(k_neighbours);
candidates
Expand Down Expand Up @@ -152,7 +152,7 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
}
Some((down, _)) => {
let mut sorted_out: Vec<_> = layer.get_out_edges(down).collect();
sorted_out.sort_by(|a, b| b.1.dist.total_cmp(&a.1.dist));
sorted_out.sort_by(|a, b| b.1.total_cmp(&a.1));
sorted_out.into_iter().for_each(|(new_candidate, _)| {
if !visited_nodes.contains(&new_candidate) {
candidates.push_back(new_candidate);
Expand Down Expand Up @@ -216,14 +216,15 @@ impl<'a, DR: DataRetriever> HnswOps<'a, DR> {
) -> Vec<Address> {
use params::*;
let neighbours = self.layer_search::<&RAMLayer>(x, layer, ef_construction(), entry_points);
let neighbours = self.select_neighbours_heuristic(m_max(), neighbours);
let mut needs_repair = HashSet::new();
let mut result = Vec::with_capacity(neighbours.len());
layer.add_node(x);
for (y, dist) in neighbours.iter().copied() {
result.push(y);
layer.add_edge(x, Edge { dist }, y);
layer.add_edge(y, Edge { dist }, x);
if layer.no_out_edges(y) > 2 * m_max() {
layer.add_edge(x, dist, y);
layer.add_edge(y, dist, x);
if layer.no_out_edges(y) > m_max() {
needs_repair.insert(y);
}
}
Expand Down
2 changes: 1 addition & 1 deletion nucliadb_vectors/src/data_point/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pub fn level_factor() -> f64 {

/// Upper limit to the number of out-edges a embedding can have.
pub const fn m_max() -> usize {
30
60
}

/// Number of bi-directional links created for every new element.
Expand Down
5 changes: 1 addition & 4 deletions nucliadb_vectors/src/data_point/ram_hnsw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,7 @@ pub struct EntryPoint {
pub layer: usize,
}

#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Serialize, Deserialize)]
pub struct Edge {
pub dist: f32,
}
pub type Edge = f32;

#[derive(Default, Clone)]
pub struct RAMLayer {
Expand Down

3 comments on commit eae6515

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: eae6515 Previous: 5a633b0 Ratio
nucliadb/search/tests/unit/search/test_fetch.py::test_highligh_error 13169.686276223609 iter/sec (stddev: 9.63872080549913e-7) 12745.686329086004 iter/sec (stddev: 1.7317806991721728e-7) 0.97

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: eae6515 Previous: 5a633b0 Ratio
nucliadb/search/tests/unit/search/test_fetch.py::test_highligh_error 13067.814493243523 iter/sec (stddev: 8.919025985563626e-7) 12745.686329086004 iter/sec (stddev: 1.7317806991721728e-7) 0.98

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: eae6515 Previous: 5a633b0 Ratio
nucliadb/search/tests/unit/search/test_fetch.py::test_highligh_error 13375.408949204444 iter/sec (stddev: 0.0000028379529676458134) 12745.686329086004 iter/sec (stddev: 1.7317806991721728e-7) 0.95

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.