From 886ecc5a48b455d271ad3bb66257f5de4a274550 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Wed, 30 Aug 2023 14:52:49 -0700 Subject: [PATCH] better help; avoid recalc threshold --- Cargo.toml | 2 +- src/lib.rs | 7 ++----- src/python/pyo3_branchwater/__init__.py | 3 ++- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1d441084..49a2b9db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ crate-type = ["cdylib"] pyo3 = { version = "0.18.1", features = ["extension-module", "anyhow"] } rayon = "1.5.1" serde = { version = "1.0.136", features = ["derive"] } -sourmash = { git = "https://github.com/sourmash-bio/sourmash", branch = "ntp/mastiff_roaring_prot" } +sourmash = { git = "https://github.com/sourmash-bio/sourmash", "rev" = "ff1092f8f366339caa59d7203f623813228f4356" } clap = { version = "3.1.0", features = ["derive"] } serde_json = "1.0.79" niffler = "2.4.0" diff --git a/src/lib.rs b/src/lib.rs index a74b5411..596e91d2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -862,7 +862,6 @@ fn mastiff_manysearch>( queries_file: P, index: P, template: Sketch, - threshold_bp: usize, minimum_containment: f64, output: Option

, ) -> Result<(), Box> { @@ -923,10 +922,9 @@ fn mastiff_manysearch>( if let Ok(query_sig) = Signature::from_path(filename) { if let Some(query) = prepare_query(&query_sig, &template) { let query_size = query.minhash.size() as f64; - let threshold = threshold_bp / query.minhash.scaled() as usize; // search mastiff db let counter = db.counter_for_query(&query.minhash); - let matches = db.matches_from_counter(counter, threshold); + let matches = db.matches_from_counter(counter, minimum_containment as usize); // filter the matches for containment @@ -1142,8 +1140,7 @@ fn do_manysearch(querylist_path: String, // if siglist_path is revindex, run mastiff_manysearch; otherwise run manysearch if is_revindex_database(siglist_path.as_ref()) { let template = build_template(ksize, scaled); - let threshold_bp = (threshold * scaled as f64) as usize; - match mastiff_manysearch(querylist_path, siglist_path, template, threshold_bp, threshold, output_path) { + match mastiff_manysearch(querylist_path, siglist_path, template, threshold, output_path) { Ok(_) => Ok(0), Err(e) => { eprintln!("Error: {e}"); diff --git a/src/python/pyo3_branchwater/__init__.py b/src/python/pyo3_branchwater/__init__.py index 307d040a..77bd4f16 100755 --- a/src/python/pyo3_branchwater/__init__.py +++ b/src/python/pyo3_branchwater/__init__.py @@ -77,7 +77,8 @@ class Branchwater_Fastgather(CommandLinePlugin): def __init__(self, p): super().__init__(p) p.add_argument('query_sig', help="metagenome sketch") - p.add_argument('against_paths', help="a text file containing paths to .sig/.sig.gz files") + p.add_argument('against_paths', help="a text file containing paths to .sig/.sig.gz files \ + OR a branchwater indexed database generated with 'sourmash scripts index'") p.add_argument('-o', '--output-gather', required=True, help="save gather output (minimum metagenome cover) to this file") p.add_argument('--output-prefetch',