diff --git a/MANIFEST.in b/MANIFEST.in index 2141ac2..6d155ad 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,7 +4,7 @@ graft src include README.rst include LICENSE.txt include tox.ini .travis.yml -include src/longbow/preconfigured_models/**/*.json +include src/longbow/models/*.json prune **/.hypothesis diff --git a/src/longbow/preconfigured_models/__init__.py b/src/longbow/models/__init__.py similarity index 100% rename from src/longbow/preconfigured_models/__init__.py rename to src/longbow/models/__init__.py diff --git a/src/longbow/models/bulk_10x5p.json b/src/longbow/models/bulk_10x5p.json new file mode 100644 index 0000000..8493146 --- /dev/null +++ b/src/longbow/models/bulk_10x5p.json @@ -0,0 +1,59 @@ +{ + "cdna": { + "description": "bulk 10x 5' kit", + "version": "3.0.0", + "structure": [ + "5p_Adapter", + "UMI", + "SLS", + "cDNA", + "Poly_A", + "sample_index", + "3p_Adapter" + ], + "adapters": { + "5p_Adapter": "TCTACACGACGCTCTTCCGATCT", + "UMI": { + "FixedLengthRandomBases": 10 + }, + "SLS": "TTTCTTATATGGG", + "cDNA": "random", + "Poly_A": { + "HomopolymerRepeat": [ + "A", + 30 + ] + }, + "sample_index": { + "FixedLengthRandomBases": 10 + }, + "3p_Adapter": "CTCTGCGTTGATACCACTGCTT" + }, + "named_random_segments": [ + "UMI", + "cDNA", + "sample_index" + ], + "coding_region": "cDNA", + "annotation_segments": { + "UMI": [ + [ + "ZU", + "XU" + ], + [ + "XM", + "XU" + ] + ], + "sample_index": [ + [ + "id", + "ip" + ] + ] + }, + "deprecated": false, + "name": "bulk_10x5p" + } +} \ No newline at end of file diff --git a/src/longbow/models/bulk_teloprimeV2.json b/src/longbow/models/bulk_teloprimeV2.json new file mode 100644 index 0000000..9b0004b --- /dev/null +++ b/src/longbow/models/bulk_teloprimeV2.json @@ -0,0 +1,42 @@ +{ + "cdna": { + "description": "Lexogen TeloPrime V2 kit", + "version": "3.0.0", + "structure": [ + "TPV2_adapter", + "cDNA", + "Poly_A", + "idx", + "rev_bind" + ], + "adapters": { + "TPV2_adapter": "CTACACGACGCTCTTCCGATCTTGGATTGATATGTAATACGACTCACTATAG", + "cDNA": "random", + "Poly_A": { + "HomopolymerRepeat": [ + "A", + 30 + ] + }, + "idx": { + "FixedLengthRandomBases": 10 + }, + "rev_bind": "CTCTGCGTTGATACCACTGCTT" + }, + "named_random_segments": [ + "idx", + "cDNA" + ], + "coding_region": "cDNA", + "annotation_segments": { + "idx": [ + [ + "BC", + "XB" + ] + ] + }, + "deprecated": false, + "name": "bulk_teloprimeV2" + } +} \ No newline at end of file diff --git a/src/longbow/models/isoseq.json b/src/longbow/models/isoseq.json new file mode 100644 index 0000000..2b050fd --- /dev/null +++ b/src/longbow/models/isoseq.json @@ -0,0 +1,16 @@ +{ + "array": { + "description": "PacBio IsoSeq model", + "version": "3.0.0", + "structure": [ + "V", + "M" + ], + "adapters": { + "V": "TCTACACGACGCTCTTCCGATCT", + "M": "GTACTCTGCGTTGATACCACTGCTT" + }, + "deprecated": false, + "name": "isoseq" + } +} \ No newline at end of file diff --git a/src/longbow/models/mas_10.json b/src/longbow/models/mas_10.json new file mode 100644 index 0000000..1e01048 --- /dev/null +++ b/src/longbow/models/mas_10.json @@ -0,0 +1,34 @@ +{ + "array": { + "description": "10-element MAS-ISO-seq array", + "version": "3.0.0", + "structure": [ + "Q", + "C", + "M", + "I", + "O", + "J", + "B", + "D", + "K", + "H", + "R" + ], + "adapters": { + "Q": "AAGCACCATAATGTGT", + "C": "ACTCTGTCAGGTCCGA", + "M": "ACCTAGATCAGAGCCT", + "I": "AGTGCGTTGCGAATTG", + "O": "AAGTCACCGGCACCTT", + "J": "AATTGCGTAGTTGGCC", + "B": "ACTTGTAAGCTGTCTA", + "D": "ACCTCCTCCTCCAGAA", + "K": "ACACTTGGTCGCAATC", + "H": "ATGTTGAATCCTAGCG", + "R": "AACCGGACACACTTAG" + }, + "deprecated": false, + "name": "mas_10" + } +} \ No newline at end of file diff --git a/src/longbow/models/mas_15.json b/src/longbow/models/mas_15.json new file mode 100644 index 0000000..0536574 --- /dev/null +++ b/src/longbow/models/mas_15.json @@ -0,0 +1,44 @@ +{ + "array": { + "description": "15-element MAS-ISO-seq array", + "version": "3.0.0", + "structure": [ + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P" + ], + "adapters": { + "A": "AGCTTACTTGTGAAGA", + "B": "ACTTGTAAGCTGTCTA", + "C": "ACTCTGTCAGGTCCGA", + "D": "ACCTCCTCCTCCAGAA", + "E": "AACCGGACACACTTAG", + "F": "AGAGTCCAATTCGCAG", + "G": "AATCAAGGCTTAACGG", + "H": "ATGTTGAATCCTAGCG", + "I": "AGTGCGTTGCGAATTG", + "J": "AATTGCGTAGTTGGCC", + "K": "ACACTTGGTCGCAATC", + "L": "AGTAAGCCTTCGTGTC", + "M": "ACCTAGATCAGAGCCT", + "N": "AGGTATGCCGGTTAAG", + "O": "AAGTCACCGGCACCTT", + "P": "ATGAAGTGGCTCGAGA" + }, + "deprecated": false, + "name": "mas_15" + } +} \ No newline at end of file diff --git a/src/longbow/models/mas_16.json b/src/longbow/models/mas_16.json new file mode 100644 index 0000000..c262301 --- /dev/null +++ b/src/longbow/models/mas_16.json @@ -0,0 +1,46 @@ +{ + "array": { + "description": "16-element MAS-ISO-seq array", + "version": "3.0.0", + "structure": [ + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q" + ], + "adapters": { + "A": "AGCTTACTTGTGAAGA", + "B": "ACTTGTAAGCTGTCTA", + "C": "ACTCTGTCAGGTCCGA", + "D": "ACCTCCTCCTCCAGAA", + "E": "AACCGGACACACTTAG", + "F": "AGAGTCCAATTCGCAG", + "G": "AATCAAGGCTTAACGG", + "H": "ATGTTGAATCCTAGCG", + "I": "AGTGCGTTGCGAATTG", + "J": "AATTGCGTAGTTGGCC", + "K": "ACACTTGGTCGCAATC", + "L": "AGTAAGCCTTCGTGTC", + "M": "ACCTAGATCAGAGCCT", + "N": "AGGTATGCCGGTTAAG", + "O": "AAGTCACCGGCACCTT", + "P": "ATGAAGTGGCTCGAGA", + "Q": "AGTAGCTGTGTGCA" + }, + "deprecated": false, + "name": "mas_16" + } +} \ No newline at end of file diff --git a/src/longbow/models/sc_10x3p.json b/src/longbow/models/sc_10x3p.json new file mode 100644 index 0000000..afa4c2b --- /dev/null +++ b/src/longbow/models/sc_10x3p.json @@ -0,0 +1,61 @@ +{ + "cdna": { + "description": "single-cell 10x 3' kit", + "version": "3.0.0", + "structure": [ + "5p_Adapter", + "CBC", + "UMI", + "Poly_T", + "cDNA", + "3p_Adapter" + ], + "adapters": { + "5p_Adapter": "TCTACACGACGCTCTTCCGATCT", + "CBC": { + "FixedLengthRandomBases": 16 + }, + "UMI": { + "FixedLengthRandomBases": 12 + }, + "Poly_T": { + "HomopolymerRepeat": [ + "T", + 30 + ] + }, + "cDNA": "random", + "3p_Adapter": "CCCATGTACTCTGCGTTGATACCACTGCTT" + }, + "named_random_segments": [ + "CBC", + "UMI", + "cDNA" + ], + "coding_region": "cDNA", + "annotation_segments": { + "UMI": [ + [ + "ZU", + "XU" + ], + [ + "XM", + "XU" + ] + ], + "CBC": [ + [ + "CR", + "XB" + ], + [ + "XC", + "XB" + ] + ] + }, + "deprecated": false, + "name": "sc_10x3p" + } +} \ No newline at end of file diff --git a/src/longbow/models/sc_10x5p.json b/src/longbow/models/sc_10x5p.json new file mode 100644 index 0000000..35bf797 --- /dev/null +++ b/src/longbow/models/sc_10x5p.json @@ -0,0 +1,63 @@ +{ + "cdna": { + "description": "single-cell 10x 5' kit", + "version": "3.0.0", + "structure": [ + "5p_Adapter", + "CBC", + "UMI", + "SLS", + "cDNA", + "Poly_A", + "3p_Adapter" + ], + "adapters": { + "5p_Adapter": "TCTACACGACGCTCTTCCGATCT", + "CBC": { + "FixedLengthRandomBases": 16 + }, + "UMI": { + "FixedLengthRandomBases": 10 + }, + "SLS": "TTTCTTATATGGG", + "cDNA": "random", + "Poly_A": { + "HomopolymerRepeat": [ + "A", + 30 + ] + }, + "3p_Adapter": "GTACTCTGCGTTGATACCACTGCTT" + }, + "named_random_segments": [ + "CBC", + "UMI", + "cDNA" + ], + "coding_region": "cDNA", + "annotation_segments": { + "UMI": [ + [ + "ZU", + "XU" + ], + [ + "XM", + "XU" + ] + ], + "CBC": [ + [ + "CR", + "XB" + ], + [ + "XC", + "XB" + ] + ] + }, + "deprecated": false, + "name": "sc_10x5p" + } +} \ No newline at end of file diff --git a/src/longbow/models/spatial_slideseq.json b/src/longbow/models/spatial_slideseq.json new file mode 100644 index 0000000..5597265 --- /dev/null +++ b/src/longbow/models/spatial_slideseq.json @@ -0,0 +1,70 @@ +{ + "cdna": { + "description": "Slide-seq protocol", + "version": "3.0.0", + "structure": [ + "5p_Adapter", + "SBC2", + "SLS2", + "SBC1", + "UMI", + "Poly_T", + "cDNA", + "3p_Adapter" + ], + "adapters": { + "5p_Adapter": "TCTACACGACGCTCTTCCGATCT", + "SBC2": { + "FixedLengthRandomBases": 8 + }, + "SLS2": "TCTTCAGCGTTCCCGAGA", + "SBC1": { + "FixedLengthRandomBases": 6 + }, + "UMI": { + "FixedLengthRandomBases": 9 + }, + "Poly_T": { + "HomopolymerRepeat": [ + "T", + 30 + ] + }, + "cDNA": "random", + "3p_Adapter": "CCCATGTACTCTGCGTTGATACCACTGCTT" + }, + "named_random_segments": [ + "UMI", + "SBC2", + "SBC1", + "cDNA" + ], + "coding_region": "cDNA", + "annotation_segments": { + "UMI": [ + [ + "ZU", + "XU" + ], + [ + "XM", + "XU" + ] + ], + "SBC1": [ + [ + "X1", + "XP" + ] + ], + "SBC2": [ + [ + "X2", + "XR" + ] + ] + }, + "deprecated": false, + "name": "spatial_slideseq" + } +} \ No newline at end of file diff --git a/src/longbow/preconfigured_models/array/isoseq.json b/src/longbow/preconfigured_models/array/isoseq.json deleted file mode 100644 index a6321b3..0000000 --- a/src/longbow/preconfigured_models/array/isoseq.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "description": "PacBio IsoSeq model", - "version": "3.0.0", - "structure": [ - "V", - "M" - ], - "adapters": { - "V": "TCTACACGACGCTCTTCCGATCT", - "M": "GTACTCTGCGTTGATACCACTGCTT" - }, - "deprecated": false, - "name": "isoseq" -} \ No newline at end of file diff --git a/src/longbow/preconfigured_models/array/mas_10.json b/src/longbow/preconfigured_models/array/mas_10.json deleted file mode 100644 index 456079f..0000000 --- a/src/longbow/preconfigured_models/array/mas_10.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "description": "10-element MAS-ISO-seq array", - "version": "3.0.0", - "structure": [ - "Q", - "C", - "M", - "I", - "O", - "J", - "B", - "D", - "K", - "H", - "R" - ], - "adapters": { - "Q": "AAGCACCATAATGTGT", - "C": "ACTCTGTCAGGTCCGA", - "M": "ACCTAGATCAGAGCCT", - "I": "AGTGCGTTGCGAATTG", - "O": "AAGTCACCGGCACCTT", - "J": "AATTGCGTAGTTGGCC", - "B": "ACTTGTAAGCTGTCTA", - "D": "ACCTCCTCCTCCAGAA", - "K": "ACACTTGGTCGCAATC", - "H": "ATGTTGAATCCTAGCG", - "R": "AACCGGACACACTTAG" - }, - "deprecated": false, - "name": "mas_10" -} \ No newline at end of file diff --git a/src/longbow/preconfigured_models/array/mas_15.json b/src/longbow/preconfigured_models/array/mas_15.json deleted file mode 100644 index b1aae4f..0000000 --- a/src/longbow/preconfigured_models/array/mas_15.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "description": "15-element MAS-ISO-seq array", - "version": "3.0.0", - "structure": [ - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "H", - "I", - "J", - "K", - "L", - "M", - "N", - "O", - "P" - ], - "adapters": { - "A": "AGCTTACTTGTGAAGA", - "B": "ACTTGTAAGCTGTCTA", - "C": "ACTCTGTCAGGTCCGA", - "D": "ACCTCCTCCTCCAGAA", - "E": "AACCGGACACACTTAG", - "F": "AGAGTCCAATTCGCAG", - "G": "AATCAAGGCTTAACGG", - "H": "ATGTTGAATCCTAGCG", - "I": "AGTGCGTTGCGAATTG", - "J": "AATTGCGTAGTTGGCC", - "K": "ACACTTGGTCGCAATC", - "L": "AGTAAGCCTTCGTGTC", - "M": "ACCTAGATCAGAGCCT", - "N": "AGGTATGCCGGTTAAG", - "O": "AAGTCACCGGCACCTT", - "P": "ATGAAGTGGCTCGAGA" - }, - "deprecated": false, - "name": "mas_15" -} \ No newline at end of file diff --git a/src/longbow/preconfigured_models/array/mas_16.json b/src/longbow/preconfigured_models/array/mas_16.json deleted file mode 100644 index 0b9ef39..0000000 --- a/src/longbow/preconfigured_models/array/mas_16.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "description": "16-element MAS-ISO-seq array", - "version": "3.0.0", - "structure": [ - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "H", - "I", - "J", - "K", - "L", - "M", - "N", - "O", - "P", - "Q" - ], - "adapters": { - "A": "AGCTTACTTGTGAAGA", - "B": "ACTTGTAAGCTGTCTA", - "C": "ACTCTGTCAGGTCCGA", - "D": "ACCTCCTCCTCCAGAA", - "E": "AACCGGACACACTTAG", - "F": "AGAGTCCAATTCGCAG", - "G": "AATCAAGGCTTAACGG", - "H": "ATGTTGAATCCTAGCG", - "I": "AGTGCGTTGCGAATTG", - "J": "AATTGCGTAGTTGGCC", - "K": "ACACTTGGTCGCAATC", - "L": "AGTAAGCCTTCGTGTC", - "M": "ACCTAGATCAGAGCCT", - "N": "AGGTATGCCGGTTAAG", - "O": "AAGTCACCGGCACCTT", - "P": "ATGAAGTGGCTCGAGA", - "Q": "AGTAGCTGTGTGCA" - }, - "deprecated": false, - "name": "mas_16" -} \ No newline at end of file diff --git a/src/longbow/preconfigured_models/cdna/bulk_10x5p.json b/src/longbow/preconfigured_models/cdna/bulk_10x5p.json deleted file mode 100644 index 16b50d0..0000000 --- a/src/longbow/preconfigured_models/cdna/bulk_10x5p.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "description": "bulk 10x 5' kit", - "version": "3.0.0", - "structure": [ - "5p_Adapter", - "UMI", - "SLS", - "cDNA", - "Poly_A", - "sample_index", - "3p_Adapter" - ], - "adapters": { - "5p_Adapter": "TCTACACGACGCTCTTCCGATCT", - "UMI": { - "FixedLengthRandomBases": 10 - }, - "SLS": "TTTCTTATATGGG", - "cDNA": "random", - "Poly_A": { - "HomopolymerRepeat": [ - "A", - 30 - ] - }, - "sample_index": { - "FixedLengthRandomBases": 10 - }, - "3p_Adapter": "CTCTGCGTTGATACCACTGCTT" - }, - "named_random_segments": [ - "UMI", - "cDNA", - "sample_index" - ], - "coding_region": "cDNA", - "annotation_segments": { - "UMI": [ - [ - "ZU", - "XU" - ], - [ - "XM", - "XU" - ] - ], - "sample_index": [ - [ - "id", - "ip" - ] - ] - }, - "deprecated": false, - "name": "bulk_10x5p" -} \ No newline at end of file diff --git a/src/longbow/preconfigured_models/cdna/bulk_teloprimeV2.json b/src/longbow/preconfigured_models/cdna/bulk_teloprimeV2.json deleted file mode 100644 index f3d02b4..0000000 --- a/src/longbow/preconfigured_models/cdna/bulk_teloprimeV2.json +++ /dev/null @@ -1,40 +0,0 @@ -{ - "description": "Lexogen TeloPrime V2 kit", - "version": "3.0.0", - "structure": [ - "TPV2_adapter", - "cDNA", - "Poly_A", - "idx", - "rev_bind" - ], - "adapters": { - "TPV2_adapter": "CTACACGACGCTCTTCCGATCTTGGATTGATATGTAATACGACTCACTATAG", - "cDNA": "random", - "Poly_A": { - "HomopolymerRepeat": [ - "A", - 30 - ] - }, - "idx": { - "FixedLengthRandomBases": 10 - }, - "rev_bind": "CTCTGCGTTGATACCACTGCTT" - }, - "named_random_segments": [ - "idx", - "cDNA" - ], - "coding_region": "cDNA", - "annotation_segments": { - "idx": [ - [ - "BC", - "XB" - ] - ] - }, - "deprecated": false, - "name": "bulk_teloprimeV2" -} \ No newline at end of file diff --git a/src/longbow/preconfigured_models/cdna/sc_10x3p.json b/src/longbow/preconfigured_models/cdna/sc_10x3p.json deleted file mode 100644 index 04f5105..0000000 --- a/src/longbow/preconfigured_models/cdna/sc_10x3p.json +++ /dev/null @@ -1,59 +0,0 @@ -{ - "description": "single-cell 10x 3' kit", - "version": "3.0.0", - "structure": [ - "5p_Adapter", - "CBC", - "UMI", - "Poly_T", - "cDNA", - "3p_Adapter" - ], - "adapters": { - "5p_Adapter": "TCTACACGACGCTCTTCCGATCT", - "CBC": { - "FixedLengthRandomBases": 16 - }, - "UMI": { - "FixedLengthRandomBases": 12 - }, - "Poly_T": { - "HomopolymerRepeat": [ - "T", - 30 - ] - }, - "cDNA": "random", - "3p_Adapter": "CCCATGTACTCTGCGTTGATACCACTGCTT" - }, - "named_random_segments": [ - "CBC", - "UMI", - "cDNA" - ], - "coding_region": "cDNA", - "annotation_segments": { - "UMI": [ - [ - "ZU", - "XU" - ], - [ - "XM", - "XU" - ] - ], - "CBC": [ - [ - "CR", - "XB" - ], - [ - "XC", - "XB" - ] - ] - }, - "deprecated": false, - "name": "sc_10x3p" -} \ No newline at end of file diff --git a/src/longbow/preconfigured_models/cdna/sc_10x5p.json b/src/longbow/preconfigured_models/cdna/sc_10x5p.json deleted file mode 100644 index f21576a..0000000 --- a/src/longbow/preconfigured_models/cdna/sc_10x5p.json +++ /dev/null @@ -1,61 +0,0 @@ -{ - "description": "single-cell 10x 5' kit", - "version": "3.0.0", - "structure": [ - "5p_Adapter", - "CBC", - "UMI", - "SLS", - "cDNA", - "Poly_A", - "3p_Adapter" - ], - "adapters": { - "5p_Adapter": "TCTACACGACGCTCTTCCGATCT", - "CBC": { - "FixedLengthRandomBases": 16 - }, - "UMI": { - "FixedLengthRandomBases": 10 - }, - "SLS": "TTTCTTATATGGG", - "cDNA": "random", - "Poly_A": { - "HomopolymerRepeat": [ - "A", - 30 - ] - }, - "3p_Adapter": "GTACTCTGCGTTGATACCACTGCTT" - }, - "named_random_segments": [ - "CBC", - "UMI", - "cDNA" - ], - "coding_region": "cDNA", - "annotation_segments": { - "UMI": [ - [ - "ZU", - "XU" - ], - [ - "XM", - "XU" - ] - ], - "CBC": [ - [ - "CR", - "XB" - ], - [ - "XC", - "XB" - ] - ] - }, - "deprecated": false, - "name": "sc_10x5p" -} \ No newline at end of file diff --git a/src/longbow/preconfigured_models/cdna/spatial_slideseq.json b/src/longbow/preconfigured_models/cdna/spatial_slideseq.json deleted file mode 100644 index 3ace8e6..0000000 --- a/src/longbow/preconfigured_models/cdna/spatial_slideseq.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "description": "Slide-seq protocol", - "version": "3.0.0", - "structure": [ - "5p_Adapter", - "SBC2", - "SLS2", - "SBC1", - "UMI", - "Poly_T", - "cDNA", - "3p_Adapter" - ], - "adapters": { - "5p_Adapter": "TCTACACGACGCTCTTCCGATCT", - "SBC2": { - "FixedLengthRandomBases": 8 - }, - "SLS2": "TCTTCAGCGTTCCCGAGA", - "SBC1": { - "FixedLengthRandomBases": 6 - }, - "UMI": { - "FixedLengthRandomBases": 9 - }, - "Poly_T": { - "HomopolymerRepeat": [ - "T", - 30 - ] - }, - "cDNA": "random", - "3p_Adapter": "CCCATGTACTCTGCGTTGATACCACTGCTT" - }, - "named_random_segments": [ - "UMI", - "SBC2", - "SBC1", - "cDNA" - ], - "coding_region": "cDNA", - "annotation_segments": { - "UMI": [ - [ - "ZU", - "XU" - ], - [ - "XM", - "XU" - ] - ], - "SBC1": [ - [ - "X1", - "XP" - ] - ], - "SBC2": [ - [ - "X2", - "XR" - ] - ] - }, - "deprecated": false, - "name": "spatial_slideseq" -} \ No newline at end of file diff --git a/src/longbow/utils/model_utils.py b/src/longbow/utils/model_utils.py index c67b68f..8248002 100644 --- a/src/longbow/utils/model_utils.py +++ b/src/longbow/utils/model_utils.py @@ -23,21 +23,19 @@ starts_with_number_re = re.compile(r"^\d") -def load_preconfigured_models(): - pre_configured_models = {"array": {}, "cdna": {}} +def load_models(): + models = {"array": {}, "cdna": {}} - with importlib.resources.path("longbow", "preconfigured_models") as model_dir: - for json_file in (model_dir / "array").glob("*json"): + with importlib.resources.path("longbow", "models") as model_dir: + for json_file in model_dir.glob("*json"): with json_file.open() as fh: m = json.load(fh) - pre_configured_models["array"][m["name"]] = m + if "array" in m: + models["array"][m["array"]["name"]] = m["array"] + if "cdna" in m: + models["cdna"][m["cdna"]["name"]] = m["cdna"] - for json_file in (model_dir / "cdna").glob("*json"): - with json_file.open() as fh: - m = json.load(fh) - pre_configured_models["cdna"][m["name"]] = m - - return pre_configured_models + return models class ModelBuilder: @@ -79,7 +77,7 @@ class ModelBuilder: SUDDEN_END_PROB = 0.01 MATCH_END_PROB = 0.1 - pre_configured_models = load_preconfigured_models() + pre_configured_models = load_models() @staticmethod def make_global_alignment_model(target, name=None):