Skip to content

Commit

Permalink
test query on single column
Browse files Browse the repository at this point in the history
  • Loading branch information
karasikov committed Oct 14, 2021
1 parent 281b796 commit 5c9d0ea
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 8 deletions.
5 changes: 3 additions & 2 deletions metagraph/integration_tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ def _clean(graph, output, extra_params=''):

@staticmethod
def _annotate_graph(input, graph_path, output, anno_repr,
separate=False, no_fork_opt=False, no_anchor_opt=False):
separate=False, no_fork_opt=False, no_anchor_opt=False,
anno_type='header'):
target_anno = anno_repr

noswap = anno_repr.endswith('_noswap')
Expand All @@ -121,7 +122,7 @@ def _annotate_graph(input, graph_path, output, anno_repr,
target_anno = anno_repr
anno_repr = 'row'

command = f'{METAGRAPH} annotate -p {NUM_THREADS} --anno-header \
command = f'{METAGRAPH} annotate -p {NUM_THREADS} --anno-{anno_type}\
-i {graph_path} --anno-type {anno_repr} \
-o {output} {input}'

Expand Down
118 changes: 118 additions & 0 deletions metagraph/integration_tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy as np
from helpers import get_test_class_name
from base import TestingBase, METAGRAPH, TEST_DATA_DIR, graph_file_extension
import hashlib


"""Test graph construction"""
Expand Down Expand Up @@ -536,6 +537,123 @@ def test_query_coordinates(self):
self.assertEqual(len(res.stdout), 687712)


@parameterized_class(('graph_repr', 'anno_repr'),
input_values=product(
[repr for repr in GRAPH_TYPES if not (repr == 'bitmap' and PROTEIN_MODE)],
ANNO_TYPES + ['row_diff_brwt_separate',
'row_diff_brwt_no_fork_opt',
'row_diff_brwt_no_anchor_opt']
) + product(['succinct_bloom', 'succinct_mask'], ['flat']),
class_name_func=get_test_class_name
)
class TestQuery1Column(TestingBase):
@classmethod
def setUpClass(cls):
cls.tempdir = TemporaryDirectory()

cls.with_bloom = False
if cls.graph_repr == 'succinct_bloom':
cls.graph_repr = 'succinct'
cls.with_bloom = True

cls.mask_dummy = False
if cls.graph_repr == 'succinct_mask':
cls.graph_repr = 'succinct'
cls.mask_dummy = True

construct_command = '{exe} build {mask_dummy} -p {num_threads} \
--graph {repr} -k 20 -o {outfile} {input}'.format(
exe=METAGRAPH,
mask_dummy='--mask-dummy' if cls.mask_dummy else '',
num_threads=NUM_THREADS,
repr=cls.graph_repr,
outfile=cls.tempdir.name + '/graph',
input=TEST_DATA_DIR + '/transcripts_100.fa'
)

res = subprocess.run([construct_command], shell=True)
assert(res.returncode == 0)

stats_command = '{exe} stats {graph}'.format(
exe=METAGRAPH,
graph=cls.tempdir.name + '/graph' + graph_file_extension[cls.graph_repr],
)
res = subprocess.run(stats_command.split(), stdout=PIPE)
assert(res.returncode == 0)
params_str = res.stdout.decode().split('\n')[2:]
assert('k: 20' == params_str[0])
if cls.graph_repr != 'succinct' or cls.mask_dummy:
assert('nodes (k): 46960' == params_str[1])
assert('mode: basic' == params_str[2])

if cls.with_bloom:
convert_command = '{exe} transform -o {outfile} --initialize-bloom {bloom_param} {input}'.format(
exe=METAGRAPH,
outfile=cls.tempdir.name + '/graph',
bloom_param='--bloom-fpp 0.1',
input=cls.tempdir.name + '/graph' + graph_file_extension[cls.graph_repr],
)
res = subprocess.run([convert_command], shell=True)
assert(res.returncode == 0)

def check_suffix(anno_repr, suffix):
match = anno_repr.endswith(suffix)
if match:
anno_repr = anno_repr[:-len(suffix)]
return anno_repr, match

cls.anno_repr, separate = check_suffix(cls.anno_repr, '_separate')
cls.anno_repr, no_fork_opt = check_suffix(cls.anno_repr, '_no_fork_opt')
cls.anno_repr, no_anchor_opt = check_suffix(cls.anno_repr, '_no_anchor_opt')

cls._annotate_graph(
TEST_DATA_DIR + '/transcripts_100.fa',
cls.tempdir.name + '/graph' + graph_file_extension[cls.graph_repr],
cls.tempdir.name + '/annotation',
cls.anno_repr,
separate,
no_fork_opt,
no_anchor_opt,
anno_type='label 1'
)

# check annotation
anno_stats_command = '{exe} stats -a {annotation}'.format(
exe=METAGRAPH,
annotation=cls.tempdir.name + '/annotation' + anno_file_extension[cls.anno_repr],
)
res = subprocess.run(anno_stats_command.split(), stdout=PIPE)
assert(res.returncode == 0)
params_str = res.stdout.decode().split('\n')[2:]
assert('labels: 1' == params_str[0])
if cls.graph_repr != 'hashfast' and (cls.graph_repr != 'succinct' or cls.mask_dummy):
assert('objects: 46960' == params_str[1])

if cls.anno_repr.endswith('_noswap'):
cls.anno_repr = cls.anno_repr[:-len('_noswap')]

assert('representation: ' + cls.anno_repr == params_str[3])

def test_query(self):
query_command = f'{METAGRAPH} query \
-i {self.tempdir.name}/graph{graph_file_extension[self.graph_repr]} \
-a {self.tempdir.name}/annotation{anno_file_extension[self.anno_repr]} \
--discovery-fraction 1.0 \
{TEST_DATA_DIR}/transcripts_1000.fa'
res = subprocess.run(query_command.split(), stdout=PIPE)
self.assertEqual(res.returncode, 0)
self.assertEqual(hashlib.sha224(res.stdout).hexdigest(), '254d173abb255a81a4ab8a685201a73de8dbad4546c378e0a645d454')

query_command = f'{METAGRAPH} query --count-labels \
-i {self.tempdir.name}/graph{graph_file_extension[self.graph_repr]} \
-a {self.tempdir.name}/annotation{anno_file_extension[self.anno_repr]} \
--discovery-fraction 1.0 \
{TEST_DATA_DIR}/transcripts_1000.fa'
res = subprocess.run(query_command.split(), stdout=PIPE)
self.assertEqual(res.returncode, 0)
self.assertEqual(hashlib.sha224(res.stdout).hexdigest(), '1bd6c24373812064c3e17e73533de7b1e30baa3cca3a64b460e83cb4')


@parameterized_class(('graph_repr', 'anno_repr'),
input_values=product(
[repr for repr in GRAPH_TYPES if not (repr == 'bitmap' and PROTEIN_MODE)],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ using mtg::common::logger;

BRWTBottomUpBuilder::Partitioner
BRWTBottomUpBuilder::get_basic_partitioner(size_t arity) {
assert(arity > 1u);
assert(arity > 0u);

return [arity](const VectorPtrs &vectors) {
if (!vectors.size())
Expand Down Expand Up @@ -201,7 +201,7 @@ BRWT BRWTBottomUpBuilder::build(
size_t num_threads) {

if (!linkage.size()) {
logger->trace("Passed no linkage rules. Assembling Multi-BRWT without internal nodes...");
logger->warn("Passed no linkage rules. Assembling Multi-BRWT without internal nodes...");

std::vector<std::unique_ptr<bit_vector>> columns;

Expand Down
3 changes: 2 additions & 1 deletion metagraph/tests/annotation/test_annotated_dbg_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ build_anno_graph(uint64_t k,
anno_graph->annotate_sequence(std::string(sequences[i]), { labels[i] });
}

if (!std::is_same<Annotation, annot::ColumnCompressed<>>::value)
if (!std::is_same<Annotation, annot::ColumnCompressed<>>::value) {
anno_graph = std::make_unique<AnnotatedDBG>(
graph,
std::unique_ptr<AnnotatedDBG::Annotator>(
Expand All @@ -41,6 +41,7 @@ build_anno_graph(uint64_t k,
))
)
);
}

return anno_graph;
}
Expand Down
4 changes: 1 addition & 3 deletions metagraph/tests/annotation/test_annotation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ class AnnotatorTest : public ::testing::Test {

virtual void set(annot::ColumnCompressed<>&& column_annotator) {
if constexpr(std::is_same_v<Annotator, annot::MultiBRWTAnnotator>) {
annotation = annot::convert_to_simple_BRWT(
std::move(column_annotator)
);
annotation = annot::convert_to_simple_BRWT(std::move(column_annotator));

} else if constexpr(std::is_same_v<Annotator, annot::RowCompressed<>>) {
annotation.reset(new annot::RowCompressed<>(column_annotator.num_objects()));
Expand Down

0 comments on commit 5c9d0ea

Please sign in to comment.