Skip to content

Commit

Permalink
Symmetrize digest hash function
Browse files Browse the repository at this point in the history
  • Loading branch information
Itolstoganov committed Feb 5, 2024
1 parent 2438de6 commit c4a7f61
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ void StrobemerIndex::assign_randstrobes(size_t ref_index, size_t offset) {
chunk.push_back(randstrobe);
}
for (auto randstrobe : chunk) {
RefRandstrobe::packed_t packed = ref_index << 8;
RefRandstrobe::packed_t packed = (ref_index << 9) | (randstrobe.is_first_main << 8);
packed = packed + (randstrobe.strobe2_pos - randstrobe.strobe1_pos);
randstrobes[offset++] = RefRandstrobe{randstrobe.hash, randstrobe.strobe1_pos, packed};
}
Expand Down
2 changes: 1 addition & 1 deletion src/index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ struct StrobemerIndex {
}

bool is_first_strobe_main(bucket_index_t position) const {
return (randstrobes[position].hash & 1) == 1;
return randstrobes[position].is_first_main();
}

bool is_filtered(bucket_index_t position) const {
Expand Down
3 changes: 2 additions & 1 deletion src/nam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ std::pair<float, std::vector<Nam>> find_nams(
}
nr_good_hits++;
add_to_hits_per_ref(hits_per_ref[q.is_reverse], q.start, q.end, index, position, false);
} else {
}
else {
size_t partial_pos = index.partial_find(q.hash);
if (partial_pos != index.end()) {
total_hits++;
Expand Down
26 changes: 15 additions & 11 deletions src/randstrobes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ static inline randstrobe_hash_t randstrobe_hash(syncmer_hash_t hash1, syncmer_ha
}

static inline digest_hash_t digest_hash(syncmer_hash_t hash1, syncmer_hash_t hash2, size_t digest_size) {
if (hash1 <= hash2) {
return (((hash1 >> digest_size) << digest_size) ^ (hash2 >> (64 - digest_size))) | 1;
if (hash1 < hash2) {
return ((hash1 >> digest_size) << digest_size) ^ (hash2 >> (64 - digest_size));
}
return (((hash2 >> digest_size) << digest_size) ^ (hash1 >> (64 - digest_size))) & ~1;
return ((hash2 >> digest_size) << digest_size) ^ (hash1 >> (64 - digest_size));
}

std::ostream& operator<<(std::ostream& os, const Syncmer& syncmer) {
Expand Down Expand Up @@ -138,7 +138,8 @@ std::vector<Syncmer> canonical_syncmers(
}

std::ostream& operator<<(std::ostream& os, const Randstrobe& randstrobe) {
os << "Randstrobe(hash=" << randstrobe.hash << ", strobe1_pos=" << randstrobe.strobe1_pos << ", strobe2_pos=" << randstrobe.strobe2_pos << ")";
os << "Randstrobe(hash=" << randstrobe.hash << ", strobe1_pos=" << randstrobe.strobe1_pos << ", strobe2_pos="
<< randstrobe.strobe2_pos << ", is_first_main=" << randstrobe.is_first_main << ")";
return os;
}

Expand Down Expand Up @@ -175,9 +176,11 @@ Randstrobe RandstrobeIterator::get(unsigned int strobe1_index) const {
}

// return Randstrobe{randstrobe_hash(strobe1.hash, strobe2.hash), static_cast<uint32_t>(strobe1.position), static_cast<uint32_t>(strobe2.position)};
bool is_first_main = strobe1.hash < strobe2.hash;
return Randstrobe{digest_hash(strobe1.hash, strobe2.hash, digest),
static_cast<uint32_t>(strobe1.position),
static_cast<uint32_t>(strobe2.position)};
static_cast<uint32_t>(strobe2.position),
is_first_main};
}

Randstrobe RandstrobeGenerator::next() {
Expand Down Expand Up @@ -209,9 +212,11 @@ Randstrobe RandstrobeGenerator::next() {
}
syncmers.pop_front();
// return Randstrobe{randstrobe_hash(strobe1.hash, strobe2.hash), static_cast<uint32_t>(strobe1.position), static_cast<uint32_t>(strobe2.position)};
bool is_first_main = strobe1.hash < strobe2.hash;
return Randstrobe{digest_hash(strobe1.hash, strobe2.hash, digest),
static_cast<uint32_t>(strobe1.position),
static_cast<uint32_t>(strobe2.position)};
static_cast<uint32_t>(strobe2.position),
is_first_main};
}

/*
Expand All @@ -233,12 +238,11 @@ QueryRandstrobeVector randstrobes_query(const std::string_view seq, const IndexP
RandstrobeIterator randstrobe_fwd_iter{syncmers, parameters.randstrobe};
while (randstrobe_fwd_iter.has_next()) {
auto randstrobe = randstrobe_fwd_iter.next();
bool is_first_main = (randstrobe.hash & 1) == 1;
uint partial_start = is_first_main ? randstrobe.strobe1_pos : randstrobe.strobe2_pos;
uint partial_start = randstrobe.is_first_main ? randstrobe.strobe1_pos : randstrobe.strobe2_pos;
randstrobes.push_back(
QueryRandstrobe {
randstrobe.hash, randstrobe.strobe1_pos, randstrobe.strobe2_pos + parameters.syncmer.k,
partial_start, partial_start + parameters.syncmer.k, false
partial_start, partial_start + parameters.syncmer.k, randstrobe.is_first_main, false
}
);
}
Expand All @@ -259,12 +263,12 @@ QueryRandstrobeVector randstrobes_query(const std::string_view seq, const IndexP
RandstrobeIterator randstrobe_rc_iter{syncmers, parameters.randstrobe};
while (randstrobe_rc_iter.has_next()) {
auto randstrobe = randstrobe_rc_iter.next();
bool is_first_main = (randstrobe.hash & 1) == 1;
bool is_first_main = randstrobe.is_first_main;
uint partial_start = is_first_main ? randstrobe.strobe1_pos : randstrobe.strobe2_pos;
randstrobes.push_back(
QueryRandstrobe {
randstrobe.hash, randstrobe.strobe1_pos, randstrobe.strobe2_pos + parameters.syncmer.k,
partial_start, partial_start + parameters.syncmer.k, true
partial_start, partial_start + parameters.syncmer.k, randstrobe.is_first_main, true
}
);
}
Expand Down
10 changes: 8 additions & 2 deletions src/randstrobes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,12 @@ struct RefRandstrobe {
return hash < other.hash;
}

bool is_first_main() const {
return (m_packed >> bit_alloc) & 1;
}

int reference_index() const {
return m_packed >> bit_alloc;
return m_packed >> (bit_alloc + 1);
}

int strobe2_offset() const {
Expand All @@ -54,6 +58,7 @@ struct QueryRandstrobe {
unsigned int end;
unsigned int partial_start;
unsigned int partial_end;
bool is_first_main;
bool is_reverse;
};

Expand All @@ -67,6 +72,7 @@ struct Randstrobe {
randstrobe_hash_t hash;
unsigned int strobe1_pos;
unsigned int strobe2_pos;
bool is_first_main;

bool operator==(const Randstrobe& other) const {
return hash == other.hash && strobe1_pos == other.strobe1_pos && strobe2_pos == other.strobe2_pos;
Expand Down Expand Up @@ -175,7 +181,7 @@ class RandstrobeGenerator {
{ }

Randstrobe next();
Randstrobe end() const { return Randstrobe{0, 0, 0}; }
Randstrobe end() const { return Randstrobe{0, 0, 0, false}; }

private:
SyncmerIterator syncmer_iterator;
Expand Down

0 comments on commit c4a7f61

Please sign in to comment.