From 5c9d0ea89c59062a99771f5f8193d4b52ed05461 Mon Sep 17 00:00:00 2001 From: Mikhail Karasikov Date: Wed, 13 Oct 2021 16:55:49 +0200 Subject: [PATCH] test query on single column --- metagraph/integration_tests/base.py | 5 +- metagraph/integration_tests/test_query.py | 118 ++++++++++++++++++ .../multi_brwt/brwt_builders.cpp | 4 +- .../annotation/test_annotated_dbg_helpers.cpp | 3 +- .../tests/annotation/test_annotation.hpp | 4 +- 5 files changed, 126 insertions(+), 8 deletions(-) diff --git a/metagraph/integration_tests/base.py b/metagraph/integration_tests/base.py index f56a20b174..1f0037edd2 100644 --- a/metagraph/integration_tests/base.py +++ b/metagraph/integration_tests/base.py @@ -104,7 +104,8 @@ def _clean(graph, output, extra_params=''): @staticmethod def _annotate_graph(input, graph_path, output, anno_repr, - separate=False, no_fork_opt=False, no_anchor_opt=False): + separate=False, no_fork_opt=False, no_anchor_opt=False, + anno_type='header'): target_anno = anno_repr noswap = anno_repr.endswith('_noswap') @@ -121,7 +122,7 @@ def _annotate_graph(input, graph_path, output, anno_repr, target_anno = anno_repr anno_repr = 'row' - command = f'{METAGRAPH} annotate -p {NUM_THREADS} --anno-header \ + command = f'{METAGRAPH} annotate -p {NUM_THREADS} --anno-{anno_type}\ -i {graph_path} --anno-type {anno_repr} \ -o {output} {input}' diff --git a/metagraph/integration_tests/test_query.py b/metagraph/integration_tests/test_query.py index f3ceddbed5..db8292c280 100644 --- a/metagraph/integration_tests/test_query.py +++ b/metagraph/integration_tests/test_query.py @@ -9,6 +9,7 @@ import numpy as np from helpers import get_test_class_name from base import TestingBase, METAGRAPH, TEST_DATA_DIR, graph_file_extension +import hashlib """Test graph construction""" @@ -536,6 +537,123 @@ def test_query_coordinates(self): self.assertEqual(len(res.stdout), 687712) +@parameterized_class(('graph_repr', 'anno_repr'), + input_values=product( + [repr for repr in GRAPH_TYPES if not (repr == 'bitmap' and PROTEIN_MODE)], + ANNO_TYPES + ['row_diff_brwt_separate', + 'row_diff_brwt_no_fork_opt', + 'row_diff_brwt_no_anchor_opt'] + ) + product(['succinct_bloom', 'succinct_mask'], ['flat']), + class_name_func=get_test_class_name +) +class TestQuery1Column(TestingBase): + @classmethod + def setUpClass(cls): + cls.tempdir = TemporaryDirectory() + + cls.with_bloom = False + if cls.graph_repr == 'succinct_bloom': + cls.graph_repr = 'succinct' + cls.with_bloom = True + + cls.mask_dummy = False + if cls.graph_repr == 'succinct_mask': + cls.graph_repr = 'succinct' + cls.mask_dummy = True + + construct_command = '{exe} build {mask_dummy} -p {num_threads} \ + --graph {repr} -k 20 -o {outfile} {input}'.format( + exe=METAGRAPH, + mask_dummy='--mask-dummy' if cls.mask_dummy else '', + num_threads=NUM_THREADS, + repr=cls.graph_repr, + outfile=cls.tempdir.name + '/graph', + input=TEST_DATA_DIR + '/transcripts_100.fa' + ) + + res = subprocess.run([construct_command], shell=True) + assert(res.returncode == 0) + + stats_command = '{exe} stats {graph}'.format( + exe=METAGRAPH, + graph=cls.tempdir.name + '/graph' + graph_file_extension[cls.graph_repr], + ) + res = subprocess.run(stats_command.split(), stdout=PIPE) + assert(res.returncode == 0) + params_str = res.stdout.decode().split('\n')[2:] + assert('k: 20' == params_str[0]) + if cls.graph_repr != 'succinct' or cls.mask_dummy: + assert('nodes (k): 46960' == params_str[1]) + assert('mode: basic' == params_str[2]) + + if cls.with_bloom: + convert_command = '{exe} transform -o {outfile} --initialize-bloom {bloom_param} {input}'.format( + exe=METAGRAPH, + outfile=cls.tempdir.name + '/graph', + bloom_param='--bloom-fpp 0.1', + input=cls.tempdir.name + '/graph' + graph_file_extension[cls.graph_repr], + ) + res = subprocess.run([convert_command], shell=True) + assert(res.returncode == 0) + + def check_suffix(anno_repr, suffix): + match = anno_repr.endswith(suffix) + if match: + anno_repr = anno_repr[:-len(suffix)] + return anno_repr, match + + cls.anno_repr, separate = check_suffix(cls.anno_repr, '_separate') + cls.anno_repr, no_fork_opt = check_suffix(cls.anno_repr, '_no_fork_opt') + cls.anno_repr, no_anchor_opt = check_suffix(cls.anno_repr, '_no_anchor_opt') + + cls._annotate_graph( + TEST_DATA_DIR + '/transcripts_100.fa', + cls.tempdir.name + '/graph' + graph_file_extension[cls.graph_repr], + cls.tempdir.name + '/annotation', + cls.anno_repr, + separate, + no_fork_opt, + no_anchor_opt, + anno_type='label 1' + ) + + # check annotation + anno_stats_command = '{exe} stats -a {annotation}'.format( + exe=METAGRAPH, + annotation=cls.tempdir.name + '/annotation' + anno_file_extension[cls.anno_repr], + ) + res = subprocess.run(anno_stats_command.split(), stdout=PIPE) + assert(res.returncode == 0) + params_str = res.stdout.decode().split('\n')[2:] + assert('labels: 1' == params_str[0]) + if cls.graph_repr != 'hashfast' and (cls.graph_repr != 'succinct' or cls.mask_dummy): + assert('objects: 46960' == params_str[1]) + + if cls.anno_repr.endswith('_noswap'): + cls.anno_repr = cls.anno_repr[:-len('_noswap')] + + assert('representation: ' + cls.anno_repr == params_str[3]) + + def test_query(self): + query_command = f'{METAGRAPH} query \ + -i {self.tempdir.name}/graph{graph_file_extension[self.graph_repr]} \ + -a {self.tempdir.name}/annotation{anno_file_extension[self.anno_repr]} \ + --discovery-fraction 1.0 \ + {TEST_DATA_DIR}/transcripts_1000.fa' + res = subprocess.run(query_command.split(), stdout=PIPE) + self.assertEqual(res.returncode, 0) + self.assertEqual(hashlib.sha224(res.stdout).hexdigest(), '254d173abb255a81a4ab8a685201a73de8dbad4546c378e0a645d454') + + query_command = f'{METAGRAPH} query --count-labels \ + -i {self.tempdir.name}/graph{graph_file_extension[self.graph_repr]} \ + -a {self.tempdir.name}/annotation{anno_file_extension[self.anno_repr]} \ + --discovery-fraction 1.0 \ + {TEST_DATA_DIR}/transcripts_1000.fa' + res = subprocess.run(query_command.split(), stdout=PIPE) + self.assertEqual(res.returncode, 0) + self.assertEqual(hashlib.sha224(res.stdout).hexdigest(), '1bd6c24373812064c3e17e73533de7b1e30baa3cca3a64b460e83cb4') + + @parameterized_class(('graph_repr', 'anno_repr'), input_values=product( [repr for repr in GRAPH_TYPES if not (repr == 'bitmap' and PROTEIN_MODE)], diff --git a/metagraph/src/annotation/binary_matrix/multi_brwt/brwt_builders.cpp b/metagraph/src/annotation/binary_matrix/multi_brwt/brwt_builders.cpp index c7e6a5acdc..9bc78e4f2b 100644 --- a/metagraph/src/annotation/binary_matrix/multi_brwt/brwt_builders.cpp +++ b/metagraph/src/annotation/binary_matrix/multi_brwt/brwt_builders.cpp @@ -18,7 +18,7 @@ using mtg::common::logger; BRWTBottomUpBuilder::Partitioner BRWTBottomUpBuilder::get_basic_partitioner(size_t arity) { - assert(arity > 1u); + assert(arity > 0u); return [arity](const VectorPtrs &vectors) { if (!vectors.size()) @@ -201,7 +201,7 @@ BRWT BRWTBottomUpBuilder::build( size_t num_threads) { if (!linkage.size()) { - logger->trace("Passed no linkage rules. Assembling Multi-BRWT without internal nodes..."); + logger->warn("Passed no linkage rules. Assembling Multi-BRWT without internal nodes..."); std::vector> columns; diff --git a/metagraph/tests/annotation/test_annotated_dbg_helpers.cpp b/metagraph/tests/annotation/test_annotated_dbg_helpers.cpp index 7412d4f39d..5e2e620d50 100644 --- a/metagraph/tests/annotation/test_annotated_dbg_helpers.cpp +++ b/metagraph/tests/annotation/test_annotated_dbg_helpers.cpp @@ -30,7 +30,7 @@ build_anno_graph(uint64_t k, anno_graph->annotate_sequence(std::string(sequences[i]), { labels[i] }); } - if (!std::is_same>::value) + if (!std::is_same>::value) { anno_graph = std::make_unique( graph, std::unique_ptr( @@ -41,6 +41,7 @@ build_anno_graph(uint64_t k, )) ) ); + } return anno_graph; } diff --git a/metagraph/tests/annotation/test_annotation.hpp b/metagraph/tests/annotation/test_annotation.hpp index a81ae5073c..ae7da0bb5f 100644 --- a/metagraph/tests/annotation/test_annotation.hpp +++ b/metagraph/tests/annotation/test_annotation.hpp @@ -44,9 +44,7 @@ class AnnotatorTest : public ::testing::Test { virtual void set(annot::ColumnCompressed<>&& column_annotator) { if constexpr(std::is_same_v) { - annotation = annot::convert_to_simple_BRWT( - std::move(column_annotator) - ); + annotation = annot::convert_to_simple_BRWT(std::move(column_annotator)); } else if constexpr(std::is_same_v>) { annotation.reset(new annot::RowCompressed<>(column_annotator.num_objects()));