From 253b10c2d04f3db5740fdd5dc9cd8324a4f4c381 Mon Sep 17 00:00:00 2001 From: "Alex H. Wagner, PhD" Date: Tue, 6 Feb 2024 13:53:47 -0500 Subject: [PATCH] Issue #334: Normalization (#337) * add normalization example to test notebook * update notebook metadata * a few simple behavior tests * Add keys to ReferenceLengthExpression and LiteralSequenceExpression * Make LiteralSequenceExpression not-identifiable * remove unnecessary / unused code * addresses https://github.com/ga4gh/vrs-python/pull/338#issuecomment-1924896008 * Fix ReferenceLengthExpression tests in test_allele_translator * linewise diff for test_annotate_vcf_grch38_noattrs * Update test_vcf_expected_output_no_vrs_attrs.vcf.gz ReferenceLengthExpression * Fix test_annotate_vcf_grch38_attrs * Fix test_annotate_vcf_grch38_attrs_altsonly --------- Co-authored-by: Kyle Ferriter --- notebooks/testingstuff.ipynb | 322 ++++++++++++++++-- src/ga4gh/core/__init__.py | 2 +- src/ga4gh/core/_internal/enderef.py | 6 +- src/ga4gh/core/_internal/identifiers.py | 46 +-- src/ga4gh/core/_internal/pydantic.py | 10 +- src/ga4gh/vrs/_internal/models.py | 27 +- .../test_vcf_expected_altsonly_output.vcf.gz | Bin 4441 -> 4513 bytes .../data/test_vcf_expected_output.vcf.gz | Bin 4703 -> 4759 bytes ...st_vcf_expected_output_no_vrs_attrs.vcf.gz | Bin 4490 -> 4530 bytes tests/extras/test_allele_translator.py | 11 +- tests/extras/test_vcf_annotation.py | 18 +- 11 files changed, 359 insertions(+), 83 deletions(-) diff --git a/notebooks/testingstuff.ipynb b/notebooks/testingstuff.ipynb index 28ebb9da..f3677c18 100644 --- a/notebooks/testingstuff.ipynb +++ b/notebooks/testingstuff.ipynb @@ -2,8 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2024-01-30T20:55:12.659786Z", + "start_time": "2024-01-30T20:55:12.375781Z" + } + }, "outputs": [], "source": [ "import json\n", @@ -33,7 +38,9 @@ " replace_with_digest\n", ")\n", "def pretty_print(d: dict):\n", - " print(json.dumps(d, indent=2))" + " print(json.dumps(d, indent=2))\n", + "def pydantic_dict(o):\n", + " return o.dict(exclude_none=True)" ] }, { @@ -57,9 +64,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2024-01-30T20:55:12.664116Z", + "start_time": "2024-01-30T20:55:12.662083Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id=None label=None description=None extensions=None digest=None type='Allele' expressions=None location=SequenceLocation(id=None, label=None, description=None, extensions=None, digest=None, type='SequenceLocation', sequenceReference=None, start=55181319, end=55181320) state=LiteralSequenceExpression(id=None, label=None, description=None, extensions=None, digest=None, type='LiteralSequenceExpression', sequence=SequenceString(root='T'))\n" + ] + } + ], "source": [ "allele_dict = {\n", " 'location': {\n", @@ -99,9 +119,36 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2024-01-30T20:55:12.669479Z", + "start_time": "2024-01-30T20:55:12.664423Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"location\": {\n", + " \"type\": \"SequenceLocation\",\n", + " \"start\": 55181319,\n", + " \"end\": 55181320,\n", + " \"sequenceReference\": null,\n", + " \"digest\": \"5mvu29n_A07DBCqsGyrjk2NUknOhkVZS\"\n", + " },\n", + " \"state\": {\n", + " \"type\": \"LiteralSequenceExpression\",\n", + " \"sequence\": \"T\"\n", + " },\n", + " \"type\": \"Allele\",\n", + " \"digest\": \"eahJQ_NsonA4qMlGbBrFEJBlIMUenRLI\"\n", + "}\n" + ] + } + ], "source": [ "allele_identified = identify_all(allele)\n", "pretty_print(allele_identified)" @@ -109,9 +156,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2024-01-30T20:55:12.684277Z", + "start_time": "2024-01-30T20:55:12.672225Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Location serialized: b'{\"end\":55181320,\"sequenceReference\":null,\"start\":55181319,\"type\":\"SequenceLocation\"}'\n", + "Location digest: 5mvu29n_A07DBCqsGyrjk2NUknOhkVZS\n", + "Location digest: 5mvu29n_A07DBCqsGyrjk2NUknOhkVZS\n", + "Allele serialized: b'{\"location\":\"5mvu29n_A07DBCqsGyrjk2NUknOhkVZS\",\"state\":{\"sequence\":\"T\",\"type\":\"LiteralSequenceExpression\"},\"type\":\"Allele\"}'\n", + "Allele digest: eahJQ_NsonA4qMlGbBrFEJBlIMUenRLI\n", + "Allele digest: eahJQ_NsonA4qMlGbBrFEJBlIMUenRLI\n" + ] + } + ], "source": [ "location_serialized = ga4gh_serialize(allele.location)\n", "print(\"Location serialized: \" + str(location_serialized))\n", @@ -126,9 +191,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2024-01-30T20:55:12.709066Z", + "start_time": "2024-01-30T20:55:12.675406Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average duration: 7.207540911622346e-05\n" + ] + } + ], "source": [ "ct = 100\n", "start = timer()\n", @@ -140,9 +218,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2024-01-30T20:55:12.710289Z", + "start_time": "2024-01-30T20:55:12.703490Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average duration: 0.00015179874957539142\n" + ] + } + ], "source": [ "# genotype \n", "# https://www.ncbi.nlm.nih.gov/clinvar/variation/431013/\n", @@ -225,9 +316,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2024-01-30T20:55:12.740335Z", + "start_time": "2024-01-30T20:55:12.737050Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average duration: 0.0002980487502645701\n" + ] + } + ], "source": [ "genotype_431013_dict = {\n", " \"type\": \"Genotype\",\n", @@ -261,8 +365,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2024-01-30T20:55:12.832604Z", + "start_time": "2024-01-30T20:55:12.740570Z" + } + }, "outputs": [], "source": [ "import ga4gh.vrs.extras.translator\n", @@ -272,35 +381,176 @@ "importlib.reload(ga4gh.vrs.dataproxy)\n", "importlib.reload(ga4gh.vrs)\n", "\n", + "from ga4gh.vrs import models\n", + "\n", "from ga4gh.vrs.extras.translator import Translator\n", "from ga4gh.vrs.dataproxy import SeqRepoDataProxy\n", "from biocommons.seqrepo import SeqRepo\n", "\n", "\n", - "data_proxy = SeqRepoDataProxy(SeqRepo(\"/Users/kferrite/dev/biocommons.seqrepo/seqrepo/2021-01-29\"))\n", - "translator = Translator(data_proxy=data_proxy)\n", - "# translator._from_beacon(\"13 : 32936732 G > C\")\n", - "# data_proxy.get_metadata('GRCh38:13')\n", - "spdi_383650 = 'NC_000009.12:128325834:C:T'\n", - "translator._from_spdi(spdi_383650)" + "data_proxy = SeqRepoDataProxy(SeqRepo(\"/usr/local/share/seqrepo/2021-01-29\"))\n", + "translator = Translator(data_proxy=data_proxy)\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 10, + "metadata": { + "ExecuteTime": { + "end_time": "2024-01-30T20:55:12.836510Z", + "start_time": "2024-01-30T20:55:12.833916Z" + } + }, "outputs": [], "source": [ - "pretty_print(allele_280320.model_dump(exclude_none=True))\n", - "ga4gh.vrs.normalize(allele_280320, data_proxy=data_proxy)" + "expansion_allele = models.Allele(\n", + " expressions=[\n", + " models.Expression(\n", + " syntax='spdi',\n", + " value='NC_000001.11:40819438:CTCCTCCT:CTCCTCCTCCT'\n", + " )\n", + " ],\n", + " location=models.SequenceLocation(\n", + " sequenceReference=models.SequenceReference(\n", + " refgetAccession='SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO',\n", + " residueAlphabet='na',\n", + " id='NC_000001.11:',\n", + " ),\n", + " start=40819438,\n", + " end=40819438),\n", + " state=models.LiteralSequenceExpression(\n", + " sequence='CTC'\n", + " )\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# This is expected to normalize to a fully-justified allele with a ReferenceLengthExpression\n", + "normalized = ga4gh.vrs.normalize(expansion_allele, data_proxy=data_proxy)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-30T20:55:35.795398Z", + "start_time": "2024-01-30T20:55:35.785972Z" + } + }, + "execution_count": 12 + }, + { + "cell_type": "code", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"type\": \"Allele\",\n", + " \"expressions\": [\n", + " {\n", + " \"syntax\": \"spdi\",\n", + " \"value\": \"NC_000001.11:40819438:CTCCTCCT:CTCCTCCTCCT\"\n", + " }\n", + " ],\n", + " \"location\": {\n", + " \"type\": \"SequenceLocation\",\n", + " \"sequenceReference\": {\n", + " \"id\": \"NC_000001.11:\",\n", + " \"type\": \"SequenceReference\",\n", + " \"refgetAccession\": \"SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO\",\n", + " \"residueAlphabet\": \"na\"\n", + " },\n", + " \"start\": 40819438,\n", + " \"end\": 40819446\n", + " },\n", + " \"state\": {\n", + " \"type\": \"ReferenceLengthExpression\",\n", + " \"length\": 11,\n", + " \"sequence\": \"CTCCTCCTCCT\",\n", + " \"repeatSubunitLength\": 3\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "pretty_print(pydantic_dict(normalized))" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-30T20:55:44.585796Z", + "start_time": "2024-01-30T20:55:44.579823Z" + } + }, + "execution_count": 13 + }, + { + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": "'ga4gh:VA.L1iW0hEkuerURCtRni6HdEnEIdLoGvog'" + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ga4gh_identify(normalized)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-30T21:11:45.983540Z", + "start_time": "2024-01-30T21:11:45.980918Z" + } + }, + "execution_count": 24 + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "# Descriptive properties do not affect digest\n", + "normalized.label = \"test\"" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-30T21:13:11.467490Z", + "start_time": "2024-01-30T21:13:11.466190Z" + } + }, + "execution_count": 26 + }, + { + "cell_type": "code", + "outputs": [ + { + "data": { + "text/plain": "'ga4gh:VA.L1iW0hEkuerURCtRni6HdEnEIdLoGvog'" + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ga4gh_identify(normalized)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-01-30T21:13:31.090021Z", + "start_time": "2024-01-30T21:13:31.086682Z" + } + }, + "execution_count": 28 } ], "metadata": { diff --git a/src/ga4gh/core/__init__.py b/src/ga4gh/core/__init__.py index c2de1300..f013a087 100644 --- a/src/ga4gh/core/__init__.py +++ b/src/ga4gh/core/__init__.py @@ -13,7 +13,7 @@ parse_ga4gh_identifier, GA4GHComputeIdentifierWhen, use_ga4gh_compute_identifier_when ) from ._internal.pydantic import ( - is_pydantic_instance, is_curie_type, is_identifiable, is_literal, pydantic_copy + is_pydantic_instance, is_curie_type, is_ga4gh_identifiable, is_literal, pydantic_copy ) from ._internal import models as core_models diff --git a/src/ga4gh/core/_internal/enderef.py b/src/ga4gh/core/_internal/enderef.py index 1f4c4001..35090de0 100644 --- a/src/ga4gh/core/_internal/enderef.py +++ b/src/ga4gh/core/_internal/enderef.py @@ -15,7 +15,7 @@ is_pydantic_instance, is_list, is_curie_type, - is_identifiable, + is_ga4gh_identifiable, get_pydantic_root, pydantic_copy) @@ -60,7 +60,7 @@ def _enref(o): if not is_pydantic_instance(o): raise ValueError("Called ga4gh_enref() with non-pydantic instance") - if not is_identifiable(o): + if not is_ga4gh_identifiable(o): raise ValueError("Called ga4gh_enref() with non-identifiable object") # in-place replacement on object copy @@ -101,7 +101,7 @@ def _deref(o): if not is_pydantic_instance(o): raise ValueError("Called ga4gh_deref() with non-non-pydantic instance") - if not is_identifiable(o): + if not is_ga4gh_identifiable(o): raise ValueError("Called ga4gh_deref() with non-identifiable object") # in-place replacement on object copy diff --git a/src/ga4gh/core/_internal/identifiers.py b/src/ga4gh/core/_internal/identifiers.py index bdf1b0b3..160ec0f2 100644 --- a/src/ga4gh/core/_internal/identifiers.py +++ b/src/ga4gh/core/_internal/identifiers.py @@ -28,7 +28,7 @@ from .pydantic import ( is_pydantic_instance, is_curie_type, - is_identifiable, + is_ga4gh_identifiable, getattr_in, get_pydantic_root, is_pydantic_custom_type @@ -143,7 +143,7 @@ def ga4gh_identify(vro): 'ga4gh:VSL.u5fspwVbQ79QkX6GHLF8tXPCAXFJqRPx' """ - if is_identifiable(vro): + if is_ga4gh_identifiable(vro): when_rule = ga4gh_compute_identifier_when.get(GA4GHComputeIdentifierWhen.ALWAYS) do_compute = False ir = None @@ -281,7 +281,7 @@ def identify_all( if is_pydantic_custom_type(input_obj): val = export_pydantic_model(input_obj) - if isinstance(val, str) and is_curie_type(val) and is_ga4gh_identifier(val): + if isinstance(val, str) and is_ga4gh_identifier(val): val = parse_ga4gh_identifier(val)["digest"] output_obj = val elif is_pydantic_instance(input_obj): @@ -307,7 +307,7 @@ def identify_all( # Assumes any obj with 'digest' should be collapsed. collapsed_output_obj = collapse_identifiable_values(output_obj) # Add a digest to the output if it is identifiable - if is_identifiable(input_obj): + if is_ga4gh_identifiable(input_obj): # Compute digest for updated object, not re-running compaction output_obj["digest"] = ga4gh_digest(collapsed_output_obj, do_compact=False) else: @@ -317,22 +317,22 @@ def identify_all( return output_obj -def scrape_model_metadata(obj, meta={}) -> dict: - """ - For a Pydantic object obj, pull out .ga4gh.identifiable - and .ga4gh.keys and put them in meta keyed by the class name of obj - """ - assert isinstance(obj, BaseModel) - name = type(obj).__name__ - if is_pydantic_custom_str_type(obj): - meta[name] = {"identifiable": False, "keys": None} - else: - meta[name] = {} - identifiable = getattr_in(obj, ["ga4gh", "identifiable"]) - if identifiable: - meta[name]["identifiable"] = identifiable - keys = getattr_in(obj, ["ga4gh", "keys"]) - if keys and len(keys) > 0: - meta[name]["keys"] = keys - # TODO recurse into fields - return meta +# def scrape_model_metadata(obj, meta={}) -> dict: +# """ +# For a Pydantic object obj, pull out .ga4gh.identifiable +# and .ga4gh.keys and put them in meta keyed by the class name of obj +# """ +# assert isinstance(obj, BaseModel) +# name = type(obj).__name__ +# if is_pydantic_custom_str_type(obj): +# meta[name] = {"identifiable": False, "keys": None} +# else: +# meta[name] = {} +# identifiable = getattr_in(obj, ["ga4gh", "identifiable"]) +# if identifiable: +# meta[name]["identifiable"] = identifiable +# keys = getattr_in(obj, ["ga4gh", "keys"]) +# if keys and len(keys) > 0: +# meta[name]["keys"] = keys +# # TODO recurse into fields +# return meta diff --git a/src/ga4gh/core/_internal/pydantic.py b/src/ga4gh/core/_internal/pydantic.py index 473e6c76..85e70192 100644 --- a/src/ga4gh/core/_internal/pydantic.py +++ b/src/ga4gh/core/_internal/pydantic.py @@ -21,15 +21,15 @@ def getattr_in(obj, names) -> Any: return v -def is_identifiable(o: Any) -> bool: +def is_ga4gh_identifiable(o: Any) -> bool: """ - Determine if object is identifiable. An object is considered identifiable if - contains a `ga4gh_digest` attribute + Determine if object is GA4GH identifiable. An object is considered + GA4GH identifiable if it contains a `ga4gh_prefix` attribute :param o: Object - :return: `True` if `o` has `ga4gh_digest` attribute. `False` otherwise. + :return: `True` if `o` has `ga4gh_prefix` attribute. `False` otherwise. """ - return getattr_in(o, ['ga4gh', 'identifiable']) + return bool(getattr_in(o, ['ga4gh', 'prefix'])) def is_literal(o: Any) -> bool: diff --git a/src/ga4gh/vrs/_internal/models.py b/src/ga4gh/vrs/_internal/models.py index e928473d..6a8767b5 100644 --- a/src/ga4gh/vrs/_internal/models.py +++ b/src/ga4gh/vrs/_internal/models.py @@ -26,7 +26,7 @@ from pydantic import BaseModel, ConfigDict, Field, RootModel, constr from ga4gh.core._internal.pydantic import ( - is_identifiable, + is_ga4gh_identifiable, getattr_in ) from ga4gh.core._internal.models import IRI, _Entity @@ -91,7 +91,7 @@ def pydantic_class_refatt_map(): # Types directly reffable reffable_classes = list(filter( lambda c: ('id' in c.model_fields - and is_identifiable(c)), + and is_ga4gh_identifiable(c)), model_classes )) # Types reffable because they are a union of reffable types @@ -172,16 +172,17 @@ class _ValueObject(_Entity): description='A sha512t24u digest created using the VRS Computed Identifier algorithm.', ) + class ga4gh: + keys: List[str] + class _Ga4ghIdentifiableObject(_ValueObject): """A contextual value object for which a GA4GH computed identifier can be created.""" type: str - class ga4gh: - identifiable = True + class ga4gh(_ValueObject.ga4gh): prefix: str - keys: List[str] class Expression(BaseModel): @@ -239,7 +240,7 @@ class SequenceReference(_ValueObject): ) residueAlphabet: Optional[ResidueAlphabet] = None - class ga4gh: + class ga4gh(_ValueObject.ga4gh): assigned: bool = Field( True, description='This special property indicates that the `digest` field follows an alternate convention and is expected to have the value assigned following that convention. For SequenceReference, it is expected the digest will be the refget accession value without the `SQ.` prefix.' @@ -262,6 +263,13 @@ class ReferenceLengthExpression(_ValueObject): None, description='The number of residues in the repeat subunit.' ) + class ga4gh(_ValueObject.ga4gh): + keys = [ + 'length', + 'repeatSubunitLength', + 'type' + ] + class LiteralSequenceExpression(_ValueObject): """An explicit expression of a Sequence.""" @@ -271,6 +279,12 @@ class LiteralSequenceExpression(_ValueObject): ) sequence: SequenceString = Field(..., description='the literal sequence') + class ga4gh(_ValueObject.ga4gh): + keys = [ + 'sequence', + 'type' + ] + class SequenceLocation(_Ga4ghIdentifiableObject): """A `Location` defined by an interval on a referenced `Sequence`.""" @@ -407,7 +421,6 @@ class GenotypeMember(_ValueObject): ) class ga4gh(_Ga4ghIdentifiableObject.ga4gh): - identifiable = False keys = [ 'type', 'count', diff --git a/tests/extras/data/test_vcf_expected_altsonly_output.vcf.gz b/tests/extras/data/test_vcf_expected_altsonly_output.vcf.gz index f1f0b18a6f2b1239bbcf50b48eba0311e6f5052b..cf73bc98d0fd1b36aec4db19d5ad5b843df599bc 100644 GIT binary patch literal 4513 zcmV;S5nk>eiwFb&00000{{{d;LjnM~482_2ZsSH0eRjTr@Vx9|Jx1TViH!qzJu{9G zPiA~cumJ)Xv_x54NTfqjo_H7g?_14V7eytjjBApI1d+s3r|WX6x=HiZt3|d-7e%>F z>XRSO-aV;f@ztwW?=CKH-(H`5eevex1q|hRdrfc(PL6o z>HP2;xW)s$N$T`OFm9L=3O+l#Jb9We+8;-+v*>m7y72#c{jcOHX(6M-#U`J5S#H)zes_L-wv_HJFYc06eV63P>bc6QyDF`Z7fDqo-%Kz?z!W@|TEs8MlszVo zorty81ehvdGQkyqz8q5zn2u}d1O-nsU|Qywjx7PE9WZq)6=2GjW4ZxTX)c|S=H;02 zfGM4fmF4#3n4ASrX=6*0^qdD!&73hLpaL-!R!c?@dLC+tD`}JzjsP?i6SGW7%PBxZ zH8CE?oI8r;59Jgy2}z|0s-6$!L$ZsG9omwqpmefN)&Va z-T5VB3^Hej2`ys+N2%5XgTfYDCIpKu#R#RlRqW7?SQv^aCGJBp)j!3UArPm*!gLK1 zb{UG$!c_jCupLF&^(Mqr|76TGg*x?>y(?oGlN2KqA5u2nk+Pg5WqTV!O!Y%>&M7v4 zV59TZ4k?cnO+31F0NxH8T#?K-!#bw@LOM<{k_8jfWyqN1C?c5H(J`!WB)xmEF>M*J z36vDU(7lNrQmC9z6dO&`+t@J-Iw3CvQor(k2MR^ z8u1;&r{l38UB*;OIFn8iNJ+4XPXB@p&tVp`JSviI3`f7fXuP0CB>FMVB3H#t%(U$L}8D>W`>Zq!Y&a;5ynw* zEKrK#+#o>5$RUj47HoiaBNZW(RfR=FNI_WXN{pHTWeZa{u?5M2Wne8v`b6q$)WTv% z>T2A9JIY236$1Wj+mWXvMNbc|jY=@hH zfISeUPY}2wx!2W0Ydq%&k~@C`G>fsYQrw&mtViv@5%x#Ta3EufgN48v7o=o_sjc~F zM?-QuAYhO5&v8NNZrp6Np;Qu$hbUl8aGNJUBPB7UEkQG+e_ConjnboGSdc=GdZeQq z;kCfEPAb}3V)5e?PiF*Zi4 zkLB=A3>-qr8uuAR@tpAht=JcXmUwHxC?{+wQ452eDdkDF!Hw53LgkYNADubMEw42A z&;@2dN@x^DCn*mvq;+4cWb9a;D1D7vIv1q34dT(cts~stNj&xWrw}PwW98$N=e9D& z8sZ#f^5J0tB!p6)VpQm{B_)mhVI67i;agUU^0ZlwHZzvuNs7Q4LrX^K5Ih6J@I>ls zeB+Vgk>fxbM=>UC1P&iHQ{HuycuSxJ<$%DBfSxU4{>Z9Q=ShAd}d*Yo^I(L+a28-6pm=y*gjb~Ivuw$Cz^4( z2d77SQW&ub-6HVaX<%+BPbsa!+N!Oh`17g291E3ZM4haQC$4$OLyE^&gIDmDnlsWy zX-9{*#*se8WfiI+#T~_i!vPzG(-=j0JI^XS%E!)1LOHZp4r59B?1)uZSF|H}9DD%R zqmvBhD7&WBqtb_!Q0Ae=H##lJ-_ZqZRGmh|luzbajSq>TB~hk|Mztx86_h%z4et1w za+I|i)u!cwXuj%Xd~_~qN!|?*Y7>u~&XD}gNnmcC!Ydf8F|^&yjRp#?2+lU%{^Q5M z7v`(aNGMXoK0vJ{FDXW)Jz09iUAO8#ip*Z zx&Jn8zD({{X;h{U08&MO?!S0$0l<)dM^2L01I@}qJSAo3KWxnX4%hol{iTQkG$QJM zQYGtjpt(&dtYnk-HpZ;ZdPYrX!mnDYFYrgWXqnbyoA1UpJ}u#YqNI7vQI*4!PWsyH@v-@R}R-TM^ZkEr} z)gBD1Vx4Y9FbQ-TlxUTd4{6!vGiUdY?E=7OfY6r#T!R48Q#t|>Dfb(ub%f{>VFoU6@Z`CX><-TTW2#*d5&_e`^&ZrGU~(VW=E|tMuu%z=e1l*B*{|8L@Y(haJ9gXl zi>I_)C6AB5*c0~*ywXOwFoFv^?QQw@^1H7kQmRhK!ls7Im_M;6r@o z=_Ax(wOOx$crq&sP)ARw_sCI{{yJN2=AP;ME7n|8jm{i7r@kmF#yf=dP8m?JDBk0wj z)4GXN9C_)cI&3n;OC&J1*l=2L3a7I9FLbdlu!1i(Gs+Lj}B+4f&}(>opNZS2ASGV^J;O@G{;d%DL{Px+yR$O`nqq#G{V32;?> z0jxx4HruS~7T^z18m{bZ-$PvdyDdjbIndm)o0RuiT_!cm zvU|kq`iHksw!nki98a`u_pXY*4qNEPA21o^X*%B-1Ay}W@)S3D_KQ!qZh+Xs4c*Y) zn)H`%{OHv(@n!5^2kIR#Y?2^M=eItS^GN({_%2_dVaN+Qi5~Nd-}g@l2uke?=}f5ha&Ka+&Ske z@_D(XZJk@%ib#EdRB2wp2ylLT6m6Q8(G2{!uQhOK0&TCew!M_oH>a1E{c5iN&uY41 zz42kSYbjIo7envb4KxbXsr-1$i(+)UhbWu{ANGOTv8ui7eZ1KxCqBZS4|b4#N1Au@ zNTY83D7YB5&_kvEcztts+IZ*g;?2+%Pk%@0n&enNZhSuZ0ieznnSXz;DZ8o#NwrS`;Ila8?0=0wV+p;XmJ#^#3Z?&S-|2isW zvrXA-4I94g)Z>X=4786L%k9sh+gH$!rnYbDq^$SxjSq@nYBt|^CxP~7dE$44o674! zE4%$X6;%!Fm-bser)Qo5=ojVs>wC2#=+W9Qq@}Mn#nYXytqxb-j03K z!*LtIdoVP|=T6X?n28zVBps6wa@*4T92d`f<}oiZ3H zl6F-Z-qE-B@SHeyd^?@gzHn5$73Wk!~hL}r2~ZEf^-w>5F5Q)j$% zlL*^R;J0Ts!&5P~C(AH4;vgae4G%{)o&Yy^@yswZEys0|HMvSnl$(i9tFN<|#FIc>AXfRc^c26D7F29cL5O#Ei(_JqrNnxctARqPB!sUr;rbvgJb9@sNlt|9}yZnO`}+Qz;h#+8ZfTqlE8O)iUV0cmH_G{V?2*Ep^;Sz5~v z`R%RcBumu_GiCd#l#x7R_|(cp;00meu8FDGu#<)r8jcs@Xo|;)oghfgV>bvnkWW!b zuZu@G#AZ^AoNca>PS^}TNO5?r&DASA6fPgE`^&TQS9ZQq*6g6S()Abbn%SkeaZ^}K z&l`Ak!4NMBuSG$q;)Gbm2;BT z>fBT^Txe|^*ZM)Ddug=5UWxb9UO$sBv)+8|rN*k?<7Yn^hn(7)HJrWl&M%cflb;!G zd9OTjx@~pO9`*kE`MT^h<+JOQoR^%IQOQT4a)=iN76?4y+5m`YicOv?L4`1*T7{E4 z3Fjt^1ySy&h`vLSU9)*QSHkb3dW+n=y?J_eAs^*?GTDFe>Hj4(uZB-5J9B%FXPwKt zzW$=qi}6Em5S#koPPq!WcbXYN*z-)(l|)w)Mj2tgnQjAUl4B9#0Mq0}4$?VcR)8!@ zbFCz6HR%`O|AFv-hPC)?9Mw3C<<2Eu^rBmPI(X3V4c6f-(HDLC=27Rl(sp#irLdTt zmvSq0ESd5&5>bNzh~q_?5dck-jVXj|j*%m~0to4pqf+||RB=Rm9QR8bREAew%%HNQ zcLDfV6|=X={ZvhJdWizw1@<6_%Dv`=0wJQZIwl=WS*Aykl4XwlWa&H46hu&S!+wr!<4hXMzl z(&!tVZYQ}k!;x3+V0!Pr8@A;jn$;$J0%)PJ$@xKI<))7F?D1o7h@=aqQ2Y{RG!&pc60gyNpnqC??>$ z{zdn0dXt@OJrAts*7HdJ_56c(=Vg%Avzu)=)yUSypGKS6-+r|I@8y6H+PNHd|NX4< z?`OyVc?MY1C`^KR>eFh(;gm#B_)v(sd9pof1>JTmUA2I;JWyg``~B+UExsY>6nyMpxkjC6w*^# zfl8um8wHdZo-#*rf-v>8$|WkeEjUvMpj9#vhY0Ra4A3f@2z6ab6~^$(baDv?QVE2t zr)4@R&ZHv*MZhf6NfGYABw_ejrjt}Qb!>`|p{&d%Tfi@BBV;HmlS$YPab1O=POQu& zNKdMSjX=;}D;2egAW%V{j`_ZH(_zdJ6an&8>834hj1(IGe9j!mbNh~tH(of4R#l0T z1f!0YCY(JJoDq(pt%U`(KM3a73e$wUXTr9XP4FyGvS&dMNN_|8bl(DF6mtx$flZKB z2)SRH;0L=z2pEumAw=;OzO3qlAmr@@5eE<&J0 zW`aOsvmFzRxDG}OCG$}%O51R!brQq z1l!j??h=WiPDN%nF_sXH(L&LZvTg%qDGFr^8;tGi2cwi?EC9hq9;*f^cLmOT#6r&}pOzaPXyC_=`m5GLm;V^|#yGtAUB?A_L9Ah$MVWL3_g<=9@ zp=lR38k#``mNB+*s8du8xtHZsN$FxN8l7Sw`u3ODU5QqZ27w5#H1% z5C@|?RW|n33YJ|MeP?B2Ur4CqIu6EeYiXlF5N(Mfx|NN6DG{I~s59Qsd@AZP)M4yP z2_;-{1SzR(H1vipcMycVGO?fJ@EyiR(fizjXy~h;WFgGOFJWea6Ue?&qL_n_*cG%e zpdlA9p?L)jtq4LMSJ2Xc78nvyLNhbgfQ`_*R@kJ%*4u!H(A0pY&=6xn+h(u@Ci|$Y z5gRoi#yq+%f@)RB)C@JpSn^lcPKC`K+eK6nVMYWNGl*^rGbGFwi1R3@DNq7q-=Kna zjero0Td)^7-xu?E?BJ?V3sgp5}ToHSXN%Mnm(b%+9+YJLJhT9!IVN2 zRl!VJa~soZVT77PzljQN+cjOqq=;=s+rez0gzeNrf(XqGuU&?6hp)m`W-(x}2EvSq z0WMJPbrsOk>~k2(oxc*AxTdgT+?=nxu9X9YSRW-ZC(^~(Sg5=<6Ot1|YD-#M(V*N8 zsIY7Lrzpb=w^?kYjj1HkY@)zxgxfq7w4o$6YDtj9&_Br~qDIMDGt5w-Yc*0~w(ydf zxsEE@lAGd3F`mw-ppAgIE~?W;JY0(McvgumgxN9>#C-rMOkI_z3{mFBh(Og_Sh`GUK76WQa)IHGJ#t7b9J@5`%!CdTrJjf$^Mi2`$KXgXZSafM9mmoEs$!R;HLc*|u4D6+u)!ZktHy#f*dTBt_-53C#&+K+GN( znkUp)n>QXY9yzX@HfPaAEdDrnUT5QCyF!Sy-cy43vQaZxaR_O?gAD^l`H-$LyX5) ztEiYuYD!QGrBmCywH4}PT#hhuNKl3G;BbX)Os8!N%-ea6Fk5+7IUG?A9a9cnj{59~ zBTQY93gvO|5?t1HGL*usnvSfcK8ysDhtj;!>7e`^U4?CAr_EfZW2RD!la5c za+BK*!_;wUn-yPTwz7^ia?_yp`7B_3eJ z)_ssHtYi_y*4z)H#0MX}%d9;mTnr{G14@2A8b$U9F!sWvOp6b)Aegyb_;#&ilvsEzFF3$|kY0rYh zx(8>b`KfT#D##|`W&>gM!UrpJJs)-h>3s;`YT_+D@cIDP^u&aEV$&ZK1Qz|ujM$8$s8Rf8yMb>pZYxQ=I`m;B=Ek`x0EKSM5GgB={oEMp`JCb)J-qRyjw<7Pb0Mp zc~1rJIb3rCg`iM_TJn&is8$D6!=d6YLL=juZ! zRP9>{P*=q##4-o^>S;!E0K%iK?F6F9wmzUuHwIMOIKcmlK+|y7{d##W-0kc~N;fS? zR-g`gd2^X{fXnC}#0tu)H&2I|wqCEUG8x;|4jqVFsPX13IJ7_h*fB08lz!+Qn@c z+-kbz4a5Oq$eZrWq`rKsTd&L$oyPigpgI8MA_=BxFw#s8E%v|V;c`etKjIm=SzP%6z`dgW2tQXVo49fJDMvq7A_xqj6f<*n84)>j!;2UiCvB?MwH1HZ2)x!*ABL6wddc?B&*WhtfOg zU0f8Sx&DpOR%B zSo6UOQm;tEydO!`t!M>r$|ZD}sc)}`<6at_@%c$v6?mEomXV4Ramloh!8Lk#vXe;7%6sEjSqU^L$x|scx(NKTWyBOsY)!^glW)p1=(2aAw z)UteiwiQjM+fBMOO#RKe9_`q9>2^D{oShEUK8AXXvhccl%(m1J{$M&=T z^7_rI*5#XF>-?m3eR|e{n{5Tmi@^Nxp z$U0nNcD?$Ni|ave^mFa>lefnW>x}NpVCdgoo^nDy)lP5s2cdq=__BQ-g{PmQmERsl z%V0Jw7yTr`i}{%~BkGA{c{I8?e)`_{Zb9~9-$W3AK;nELDEs#)vmR_u7EY3wfM zTyxTxUPxaxhLj}qqn`z8)x=>9B%G! zA2#z_;omNq8a@9!xH^3`8$|D}rpJ|7boFHO<01pokK(u@c6ZbojE*jcM<@etjx!kTGjoTHKVCE_|i8`Ci z*?yc^<=j^klXZ$2I4RMZDVNS|MI_^*n~m>JyazvSr2!W zj2%(xz^u|EMca-%;?gb&>WWn1My*l5*B|uzqrs@x8}+}6ttC@6a}Hu_vo_T@T(Q;u zGF!irqdPKMk-d?;-^|x<(aZE`?tAUarg1zqGalpVN}RmA-;A7bUnH+WZ?0aGMR3L@ zVtINuz1m*ad8+AS@)SPTcv>e_|BFPGR2{jZ?JA(F-KN|*a+ItQm5`LGP2`jZku#gd zilXvK6x}05is>9qSD@e5vbWOa(zyQgvj0W;9!y!p>i>se^QX%XL$O$%{puQW{rZ7z@tQ_42Fwj{2zEAF`BS~0JVd3gm z_W*%={ZXs;Cx!nXDE!^XTII$U+4zB;Zaz=$PbTk%A6BpBix(sE%l%pg-jKattUmQk zHfD6GxR`#fX=~?8VyYNDqBd_+sgCp$kc>fNN{GnGD4{DLglResTL05-q9fg*l!fhEgFUq5vb z+;s>gE+ZYiz-d3e+Sw&jr(M!%>VN#Sd3XKc?BnP0ho3Ls1}w49#*>FBA-Ca&v(Thd zr=3r^vBW9AtF3c7?MJFK{X5g`q|^!rzpH;;*JPFJx?1EKm&|{1>W{;zbbqdU`e_Zz z7=LCwVp8tO^sFefvgOZFI>Gu5<2s6iBz@-SZ~1hqzp}nnO#P49!#L44oLP^Q%6wV! z8$6=66d8}&mwz(p56z^eQs>^0u`~Yc-KkfT4{Cey;&gmF-i(rqQ7^F7_;tHus#8AU fJpVRTr-B64ndvS}`-^3-nN|MR}kLs-;^ diff --git a/tests/extras/data/test_vcf_expected_output.vcf.gz b/tests/extras/data/test_vcf_expected_output.vcf.gz index e6d1e7d367730c219db9b89fb24ece4d06d9e2a5..7317e933209673047086195a110a06b63c778265 100644 GIT binary patch literal 4759 zcmV;I5@_uoiwFb&00000{{{d;LjnN3482@yZ`{Te{jB^7LjKaCvM}!l8U;`#TM;V9 z*26&q1Q4iQu9n3vxgoi7)S~~rcles&l5RKFweulCcFA+k+{d|hhSa`#wMbXVA}`i) zb@KDs+b40%zI*lR?d8?oo12sGFV9atoZj9ZohRk2NHB|$6363F4OY9OseBWTvqW9BTVKo8MRz8`*KX)VNz?D zl~N*L3Xe$`6#)8jOxa@^D!E}8JXMdWi6PQz1WesyO3MXc@|Ra*SZe z&cuWi3d01(MpJb*CWb)+mNAZTsP4Lk9920rQd*2nqwW?m)?8+YK?z7_gO5d*QYIzF zB+`u{d}>Z08lydV8)LPCZ5PJe=}nAL=jzYV=N^Clms>66T_#W zmZ2_VEG3k1!x5y!+nAUQE!POb?oEt~96rN1DEg(fASUK2Xjurc_?R^&IDzWxC5mZ; z!uHT2fQDMYgytR^MiGQM_Runb78nxIL$lD=fQ>M_dTipc-7z2{G!39B48)kwDs;BM z%swgyiH+)zAdc1|sFp{jp{qH@mfvG*kIl8Rh$In0}=;7Sd26*EOt=xvQTKnbOXEf09c?jgo_5n4v-sdZfV|;Ux=e z9aXd?55d|r6{jw_1Hp$BLhJ^2aqDvRf(D*COjAssJd#xG23E1 z=<(3u34=tfMiVHX8y^s%b&%Jb63nSmngCk}f!YjgcvPe?&MQ6bplIA%sF=_}cT|#K zI>ZF^sHnu$J{!~{Cm1g#dgvgtAr~Q>e#s z;Z6)3g33DVGXmp1;~rX&uLjM-tpUNDusIJ}80<_jPqJ0mcnv{RKCZ%}GlRM1f!o3Bq1HmJE5KC%*+r_Fq@nb8<;Qh3%OG$)u231?s!o=|-q zK6u1<<=B%Bqi9hZfeDYAF`qi}a7!Q<=7BBOVZl(0Kc(}~VLMk66`BoF54GnG4i+%a zg1HW#)I)Q}gbp7b8`K98;USK;7V~_Fg=ZED>Ft*GwcWw9L1vJ)jm?{tgVS+sDbkG7 zzBoPDlfsCN=oS_}od)I#^ORDHP+O%I7=J$HnTJ9p2~sC(;~CaGboOagt{UP%In}g zxE!2h7=zh0wH%Z_tOPR;W%!^|qx>D6#}2AfF^lAY8lj~rVMGmYGr(IE~-&}8^G6QIC2_=@;4`*xq9+H!Jw7G?QUi;P*8zzw(;g~ zKYP9~Uwu*f1wUJJalJ~)vRcNOCc~evx!ta#d=Xt-M~l3O=5ZARMj3rdt7TLz(=vKY zvb=iUB#}di97Lqa;SPShS~Wjwlg#66K2OW|u}G5f59Q3@!Ww@WKP3&m`zY#ej(|hH zt;%%nzD?>c{P*N1KOwTJ?A zK-67Q#_Ob~xl78gWSez1hOEvyMs;ZXRSne__`~0{Osb*Hw?iABmheAOT>s2bnmsli z0nfmFbztIXnLaLEF1ghY2-cV7Yscg$f}3XvWL=eHwP)P@@cpME&mMQ#{kBa?N5(lf z&F0B!4~AvFPIe*~dpb2rw2F(zq-gS)l7~&R0Pq zvui(pN{Usy*#Ki_s{9 zx5Lw2=V*RQat~?zh1}{isU@NgP~`e>oMtX+aj%orXKlw3Q?}g~&h(^1osB*CU!^V$cj*t?b4T}h>?t?2AX$Mv7`MY^GXbvh zPasy%R^!Kdnf2)P?z)k&UHyPWNN3ws)d2hfO8uRk?FWd9`)$jCQuZ`=?8e1IS`~2x zv+N%6y7~AfN*CeaHV-G-rhAupSBDLB?GKoYvLu;L#sHw4zZ}JFmj33_tsNlta6>zE zHzwWfTQ_<&Ok5eep96Ic=r>6ari(k5$yvny(tq7AshdaQpffB!;%vTH9U9>G|o^Rkxa(|FfEQ zSg(CpPAz4O{$l8<-9W8St;!F#yg`ia_7H_L@53%o6RXcBEN5 zkJRecje?7Q3*A@h&o{UCr?q$PFVD*ze@-f0ogC}U*5#8M0IGD6y5A3WGmy)ii&Js& zV+6QSgX_c=#)@*2XLa6Jd2`uay_x;JNkjL^&6~INh;e##*9L5Y>zks;i#_z?(rvk- z#9bZbv)Q(&w}>_GX8LgiFMIk&HG}4I==vq}rLpbXDlV#heB^@TmYmMF&RNg{(kynn z!)@twp^@Kiu8PVU_FMTco8zw_0nl&6A7W4?`{pTVB8(U-s0e5uZc;5Pzlg8J_w2`; z_wNoqyuUrTJU@VI4&eKP>yM{b2hEYffm_c(Q`-miuVa7rUtBbUJ-b@~03VA81ONa4 z009360763o0K5h5*jaO;NE`t0XZKV1vQ@iVt<%k+K*|S1C0?TumK-k>V2eZq1Ux_e zHh3ULGs(F-;#dss8={bjO;8h%azdrNR`-1*xDGSXdZ}ic$z%BcAx-JgQN5 z$FgV4aa2R_2@DOZ?)Z0OBQ_!&Cw&gZQx|C(`g3PF?~Q;YDTtk!It6Fuon^T0VaTC) z8qyodqT0=MZ)m4E=1vqjo}CP;-8%|i44X8Bn_PDw$sT!5!v$^MYi|5G28!=GW2IST z%^;o7FkM`e6g@wSery7y>GUYM+VcTPBXerKn>cQK<(^6Poe`Q&92zkm*VNJ9&AYKP znK;(cjYDiZzE_*tG*^zWJzfTp9{C~ZsLyd|<1ujk_1p*o!*pCH-jd1gw2`8Vu8Uo) zw;NH-HW+&eSoSr{y!(Rfm*^Rkd#|>eUye&Vy6WL3Lcxy>PwQhW3KN z3zy>5(oTM8_Q$gS_+X_|D6tWe6-rppApK0Mo8EX|$)uIcv2RS4PXcN|R)wJ=G=_rM z5PLoHDaa26_yq}N0kMn#0Ze}o1%I;#}zZsoq2%63ASQ8Mvz=bt{<@AFhn7(E*{|k8*$ciy1z>@ zVBh^FVZFh-yN<8YlYtFGr5ngQzM^$hdg-?qf7Qzf5Bus&>DmF`c{ZOqrTUJUZR9P} zYi%@bJ@3=!gcD&55BAV9#OXCkt1FcpV-bk?ulH0l6eS>3B_NQ%Ay8!nnys8{^Gupl z)q*OCk}OH8tcs#4eG2*sd4ldk(66V%9RCb@<$dU%8tlED6Hl)9)j8rS$a9q5CC(yF;2FSk1bPJ^x++vTHb)%7 z6!|sQ?1xzI0c5z@AHw~HQ+m(p$DtFrpLL(T5KfaPSNGDp@IQ?GU)-wv6mh_;y=K>F zLTjd!C{LlqIzLv|h#H`2qqLdTvpCocUlnHRwq8n&&SP6!Yt+JeZuKHV>pjX{pM4t# zJP(eA%UFOA919oma6JG}8US21*||_8w9 z#@eDy4PG>k&F#EXSTxxC${eTHm3?(CrxSmm{9YR8C?p{AmOv~gP&A1VitNK7WDby7 zvXg<3N+`>PzrZ!h=z{X)2@vJcUO)4pyuvq&_qoH~#h(7Vj(#6M$$>X0ZeqH+wZ4}j zMCJ6jm-H^1#=J~Ub~Eh7OV3%AA%Z*w%gH$q+&&cEgs3qcR0};lg`udFK=lG-xvZSj<2k-DHiq zL!nISZ|pA67b@96vq^3x-`Q`TKR3zqe3D+T{$Jvk$e%}KGYtK(*4zZ-%!kLt5KnN3 zJqs5jyjZ3MvFLln{aH~+7r1D|#lVl8m^?~U{Nu3P-0zfBJTCp8MROS!C;rkK7jL|z z4+pVtI6@Vc**gNmUy^=mNmnzPM>JX!{eB#6S+tM+VWz*ULAjE#jC$Nu=$uW*e5I`R^~#R#>DWCD(L&RTb)o9tv` zFEI9uy-0o8`|RC$8Km**W)+SUva$3>(Q^Fve;EJsx=RRcUi4c3dDZ;StAqc21z4jf zOoB=3(`dS+=FAT#$+X3UbS+yjW-3?a!`WkNx|)08pnKJsayba2fj3JAUg*sp;~*Zy ze$u@0;>7!P3zL3BuIVGMA{}w%|-6fR@Qb93r?wF+j^~BGh#$l^DY>(#a(pNF@-mo)+mOIkOxg zC<11YPLgm3CJDpOBAqO0Q^%$V8On-mvIYF1HbRE7BAJBk5Z9Fm>com%g7lkLdgBX1g{B` zgHi!<3u4;>Md?~tZo%3PlTD#BLNrm6*A{e27=8SpdqxNW=G2(5xQh@dk(nTn*lfcD zBd&wdLRsrk)M$sxY>Y0&&8K2p{{$g6f;bfxw#UF>l_3x zL!I)>u461A9HWJz1!dhD%2E`{<~A7H)(=K0#aIA>jdiSQq}&xa^RXTW;MLfm0;Rtx z#<5*5xTF{@S!rT>7~DnKiYQIgR1Aknl-ga`*e)5c2;>-(VdW-jq)LuY!_=Fc-gtnF&rH`wEF-4nks=&_aWTT)>3p zB{Z}m2zgvWTN<>$kca}B>9GcEgxi=Os6%s zF})UAs5$hTsNl9;(N#=}*m|@b%mzx>PBkQm&|LG{Whi&}N^GeY0|sj#%$R840_9#; z4sGdu4nw)~S3nb27gmg$^QG67av%}wqeb+IbTKv-O0V^V! z#dYzc7*A)E&{{xT7u9Jk9xg?BJgdMKLT?!e;y!>SbX~Pj86x$K5rL|!Qg5?ej2k^A zw7$b&p=P7hluxw}h|rG4YfcGfS7}KNTL^(#3|xJy$i~>OEZx?UMm>d!39VH}+aj0& zky<|5)WXz0SIb9EFdj@Sp*3a$7cn=u^kP753v~~)vN3|XmCbd<=9ndnb8Q-#jk+zT zuf!lAs9x)JMqoT=TtEx*!=SmoG$5EAHs@LigOw@fPPVNVUP%y@kK6jznZ#W3a$DbY zfgTVOTAN16F*h%`BY#-RI8}MVjJ00UDMP(&;8yl+CF1gq>s_CE3K7$5UHK^HzAblk z4UrO)e0sA05`rmDu9oPogX**%4@aVMPrqd)Fn61IWijJmJV{Y{twVEy84$e(hUN)1 z*7}V{j7N@3r}bHMQ44|8x0*5UI&ytUAQLM+_q4mS%rG2HMg>{ zfVmgU9sN!{6n9K${pPVmy%C{r;yA90xj)48JqsJ@>6Z0lyOn!`Od>5CvnMMnyW@^a zk$Rl>sVTDI+P*xS8Exs?29@m?*=GyQ*Sw?jq*DurRVlt`3eTdwsEVQ ztTYr_{=*Ys9%Bf2rVm&Q%BG{#=y0Y+@x z2g%e(ra^2>{4h!$7rvno3VwA&01T;jZ% z_&QuAKgv;n)*Mx*#NOO5+}w1k9LXxoZB)JL6OO%!c-+R@)d#s z&i*3A4WHe3vJAHTZKK1VC0%Xv9x0(t)joHnSQ7l_G>vy7WiMTDiA)v+iG>JV~O z`(6T6Rq+L}TmyacG^05H;ZfE$0#RpMeV|O&22|PD!T+;B(QwoKYI)AxZEi0UevoSD@s|o zxnb8^4ufRrCD6<6Fke@{9UH-o-nfnRjy9{_ag^s_2A#$OI-}6{$F*|+P%2)^#Z?&G zD!Q#3h#kVPZn`s*>hhIpy)sW!8mrfVssI#=B$%ddUokl}*nbxv7emVX5l_p_{MzSz zPum_`=e=#$OOMi)a+hJK5LAbr4dUee)p@(WZmoV=U!_^?Uhbrn5S(uA{_V<}1<7N* zNf-+S7}pJD8~w%5b*q6?q1Gy2UGi#k)ZalAI%N#=iK-dZPWCQ(yKv$H z)_kynR4dYO-H)W|mbZd8#S*&6)a$F>pq)l%aC#JP#IsiEQgX}}E5#?(03^Xppxz&> zdLYG|Zd-I;8-QzMxJqoHt%w&c>!*wr%$kZ<*{8BQ-f{$iaA} zf(11o2tBnrT*XQkGWpH>DkDx|e^&g$G5!dA0R2h+!yF|3t~>=zgcf5483E-(b*gbv z4&q1o+j)KU{=9MVzSlTCYQQ-S_`GrXTl=h$Z7DR=cp6!5H`2Fb|81mRi`tOPwnd}a z=rsBbi!@Z>Y7}d|7CWdcW?PheG%vi-VGteUN&@HTD;af4X9^M>tmbD|W~bAm)|cUc zkuSr8T=wA-y(?CioLzO>{kN6VkFF1D)*-FeLC?RvIOc>rR8BWL-B7)Td}f|T;qgN> z_sw233&y=;#~LmU^5(MD=?$t@21VZq9~G`tj_Mbv4@L3mfl~4sOm+C6Z01_3RXf;d zDqGVDm(AFpj8}_z5JMgG$Ah`QoIvvjz3SXsEWBZ$N-P*VsA^_w;)%(0|GM22UiahU z%|Q;#l7cYfB@0EBg*r=~XJo;dlA2bP zni@_@(0Zz-oG1yXU39bb{h{~h$F*Fp`0|Ie-SA~64%V|bW#=Xv*EdLjW3Qumw1{Sp zz{PklN$lZb5X=|;@&=aalSJ+P4&qs~u*~%(J+%`GkDW7n5}kVC)2DkL(s4X} z*Oxkl6~lyUW<)DVoc>JH!?s%RH&&{dZISdn7vjbnScP zVBI)gnpDqG{OaNEJ)d4vK1ybfkMAxAytnss{OARIV|(w;o@^oOlK#*?;XAQaC&`z&2C4Z zPR?TQ`s_pZZDeidf$N(2H=DUYh?#%0kq`9=K+^<}j_O<(XuEp#DR<<)Mc6t4gsa|S z1oUrr`i=Hq3Gn}r06&cCWoCVq%kSrJUfZMT!kKyZWNKX=cKYXww}Zp;!=vu~;o+B( z%b)4-@@aT~H2l!}Jbz~$9`?y^_X{iVdhFA5{?O66oDz$h8NW&%r$5`-;fkgM|IFss zWLz@3PoYXsOYW-&Ng0DaoDi`lvIU(CAxu%Z+xVwhM`gM}<;@i!t&+BW%}1*&+?u>^ zTkLK0^xw7g+kGWxZjs_yGS#-$+5e6zUGAx~n?rBjgq7VFUp>4M+%*X$E+b8~4m6)% z6h2GDYw?EeEk4ZNcH@hyFcBXXGdjL{96j*xaqn(5Gn*w9NB)cFUtoi)0_MA_#0oFF6(8$(``{@L$+q{b?4wCf2ufMOS zTmBd882QxyBYPq$T7_fdnIgSji2VVNsBMXiN9M&}O!`wZskUK!qQkS~%h54Q9!%I4mYfirmW?&SIKvs|4W9uIB@%YJg!ZwIyE006!!O(OsR diff --git a/tests/extras/data/test_vcf_expected_output_no_vrs_attrs.vcf.gz b/tests/extras/data/test_vcf_expected_output_no_vrs_attrs.vcf.gz index b5e17a0236bb77f129525304d092a041a0abe4da..e886f1b94748c17fda4afffc3b163b59a9b9beba 100644 GIT binary patch literal 4530 zcmV;j5l!wNiwFb&00000{{{d;LjnMH482@yZ`{Te{jB^7LjKaCvM}!%qEP@*Pa%NaVR^?&I7$LyE6nEs|Bd$ntem zo_sue`=pM=cduT(y}Y`6b93_j<@w2n)7#sl^SGGh$)-%Q^yKh#wF=;_GR})&6BR`~ zKl}mi@j&NM8J`Ho4Rb=l&(5w+o@R^s;^1`>ybfMx{$H>E89hZcWN^6HrZbNmDFK*73(%q-oeY7g?qcmDQ7fEqn#O3iKD$3}G5vJ%d1rMbb;ma{)i^)SL zLaj9crs^@7;0i!rj;T9L$F+2Vf~V;)EptqVmH^Xsm^zdSFy+fJU5BYOm(ED@a?G&9 zlum}qa{F>j&U#R3LraqMocExbIb%pb1!5|!mW&|uysss$q)}2h0?@vgm}N>@P666i z6XRjXxuaNqS56_5kW`AG>UmdAP6}<6;slsoIXNaR1c`!YS5Dd)ZVe}>P|;PBQSc8p z1QjZ}Vp7JkFmwd!B)VFH^5jk#f&~3XucC%21|9U}n7?#1tq@jm0?2nAO`{D(3Eltw zQdp>S^ThA-D4l2PzC~$9(MLNIc4R^dCMmSFwcy5!5Y{+K6XD2&G0reL3!IHCCukO_rnsv=-FZ0wRQ93L@~$T zUtBT9Aaf>6Xc-bXO4TM96t>tgAy{ZBMkw8^VnRDYVJN1Qunomn{}f|}K%6=Y<26Xw zWhg=mWBG%^b`)jTnh<0ClOfX->eN>DRK_wUDMl#XrEEBnvYaGkTN^@*^+RyZDK>y$ zqw&>*l!uBY9?d!cZ^8ywB=b$Tj&Z+`j#G?ey@~NMWJq!p(VLj)7*;rv-rd<4w+z?> zN{V3U*2IJqDkl`hM$@!5CWb)=mMM;LxNW1|BRhM}|s!rq%07diZl;-Ki4#)6udtDt2O#Ny|)HX#{Q zUnfz*5)^h1tq?TS0wuKUpt?`^ANS^$4&@9BlO7U>MXFX^Kj<7#!h65Q=94z##aY0H(nA(~T zb~Gf91A6R%{y8ov-Hn@#Hk3-D@el>92_EzG&`3!PX-m)y>7SOGP^0u<7#5_^gC6N9 zM|dr8t&@tjmRS5a#oHM@G;&B7l9ERL2sz2?Ssk`gcw}IN=Kxw^UDc!+;_$(UBGpxg z$Lx^eK~E2jPZ%_5H9DkxetaN=wkWS9XOvT=b_iQ3McNEPd{ks8&MSM`sA&9KsFcvC zI~vU>9pX?u8m=j|FGTey8O4ib(WGafgM$&$qil&4TT-?#DNz`O zAw8DEJ27wwDQnzk6vcbS9kgQK3|iu?0i&F-r9>?ZcBYgk*#~G))-ncN{8SX7=|ZO zU*iXl6t5ijq;V8O(njF$Q8VRJM~SxtN>CoyN{b7IQ~W7i4~^To(WKB~kb0y&cW|&k zc@``!eo_z3oe~;9Ja(iHBJd%O4MWQFA%V{<4AI*y&1<`ZXM@5KZ5!)1D+j0JHsnMz zPV?gQU{4AoHlbStemV`z4dp4NRajfKRTO_d)icLJr5RBtYvPG(9`cal_0`@hcuUP0 zX`{4*!&~D>U*ob0)sW(j;>F<}8->#tMfo_-Dm==E&PqZ#v{(*9N&4=HRajTFBY7RX z1J{F-4Cg4jrqzSehm}y~p~eq7Ey>@}_1LI7jR+~<%(EIF5<^R(Ocjl4QyMEMbzB?V z@ipZrYc;A(%LUPV)yVkZT-1_$8=$XEJaRfi@;4_vbMw@Hg25U?+uht?px}z&Y~#(} zKK6WJzWS>4L!Yg=xL(CYQ7)s@vf<}zZnx_oTLc%^!6M6pc~nM#Q3Ripav7A%qzE44 zG%KGsao`aG4-r^)xPu?9R`q9X;(3(L=SdMg=5aj!P|gf4tnio7Q(WWw9|i5r0dUB+ zWs%JNZ{zBh(ZebZ^7s)ziU834FP>`vFyz0H6QyNGv+xj4QJ(l8R_6W(*N1KSt%w3N zBI++GqIKNS+$GhoWSh1&hOEw7MpbC~t7@t*@JD~sGA@TU-wthjUc&zbQT3UlBz>$s z0-k~Us=!3SGI?D3T=J_Q5UeiC)}F~x05{KK$htC4E6@1*;rE~OEPdQ%_h}m!o{V>H zlFsAR9t?|Y9q&Xi>giM{!79ohEd0(3Sz*>N74#)|*uVcfta)e?32D z+XBM805HD{Z)cbE>@iDM&wDWSmj!1T;KyYgTtLj$$;?xpqnz{1k}~)VWM<8!aMwDi zgRscKtx@_M)YO}0Pb7T|4qQJ(s|Z-{LCpsy^?|MK44NA&l|aeY2?mh;o*fIHZ9lMM zw`)Ipit|;p*#Kj2L+#0kc5CXL^zb+J^@;s*TPd{d}gSS?bfNE5wRWg;H!c zn=G#?F%Zr2)=@cIR{So1di%a&cm4rn@aU)Q`XSCw_|YBI&BTrY2y2n7LNTHSH@vLl=dz3fL{PM-T>2hwkgjUtQHUWmJ_yS)x zK)*Z8WCXxH-r9~MrfmCPc+-;(^)~k4f0g(&+@(Kk&n?~Kv8VjdLSzN{VAKqk^#r)e zK7&|6Ta6y8WmcotyX#uUcJ%`iA(?GgWexBEh03avF%gbieMF)XpOjs?GM^FLSTP9$NCzn0n8s z8Y%Z#hIhem>GdMszQ1{Qde@9rf7o1wtX^F2rIg^Diy;0Ssiln3Ukp998>kejQTgGP7scpq4^cSleb@$SVpV(D`*6EYPJDnpAM7Cgjx=rN zkxJd#QE<_1p}R``cyoJyT6yRG^1Rsb=cLkA$+6ySeLndCpiCBt|9-Hafqdp%oT`f- z1HcVxTrajTRur2ot@6Ii>dWq`&FpV=8v38yym?!V7^hcvO~59&zRB|}-$Oqx{gx|; z{nbG>n{D%Ii&*ilryo!7vZH@gF{m$xu3tc3^0v=@ym|lb;KTddgUj;+;CBGr4z7Pb zy*jA3hX=q>PyGp{TCH*%!7wn001A02m}BC000301^_}s0stNb?blnAqDB+| z;AitId`VT3LetGf0m}zKWmH@lAXC0z&}G1;(qH|)%f)JRa8 z_c7xbsxnxnqywL^^GVpOxJVMeKuJ~n<+ZydB{Whu$OdggR~u1%)KJM+=h#$5e z*H83VmGJcm>{?PrB&uv5=zd*0B4c)Qxi_KHL+52w|}LA>uz=3@A8Z`DMVl@+!p zvs{mpr0kUy?`z;s$TnF>ku1PK@q{?|#VYXNG)ZJ=GCf=d=sh3ug3r+N*1j7USm`JW zTS!zfgV#nc9|GLFOp)6zyNu?`1hxRFYS@@C-Pv+yO!5_%w zPTZ5yV5ezYe?r`HNa8pPVLJ;UWNC=CDVCsGCyfkQw-fcCzYw{L{a|HI%d6W=c33B&^ii z@4wvZgwimaG6*dI5eiWZ6%{KduXKlFOOm3m1Ak8u) z#?Tqf76yqn+oq{AlmHYgs~}Pzg7_M;k|XtxIKBf=T(kXgni;jvN-dM_Y4ZHjop6)t zhk5>s_w*k^F1Zc*W1|nOrP9Cyh2VH*u5KYdg-)-rTk@sco&2JZjyHI2^hOUuZL8rc z>uDe*h9*6Tx>5d2~o+_j~~GT%uU}1FMn{q|5Hu-p|fxV0ARST8d#yb%J*evRiym!+y{AN~qEK!iI19#H|5HL7s z=7kD&y=uAYNFztE5j}WgF_C9MNoHoO7zGxpCup@U4!C+4)L%;%wg_HnD)&|gXiDj_*T++O7ix}o2P9r^>Q~&?~ literal 4490 zcmV;55q0h#iwFokTZCl*19W9`bYFI3W?yA^aAjk3Wn^D(b#!obbYE_7Uv_eHUtx4~ za&s)z0QaQjQ&ynA1Hyr%(3`Ampc?u7l9Oj^|S^-MeZZ-HC1Xm!Ez* zIzH(gUhe(t_+an6)$KM9{A4%}W@!*ddv97{Xu+t|pC{JLOA>$d=4TkAfeyUX-(!T@ zL@@!MwNLi$hS&MU)>dF`SzEFGZR?A7=jD*ro9jh1)X3J{AI9_1-~Mj>|Jx2Bbo;!! z_wP5`|9-RkpErOtjH5IdXFd%lbJ`63Xq-;=m{6{?1!GO^%5)Gu?oAd`FY0$L+Y_$( zQQY^!wC_b;_?QGqKk?J;YcEN?pEodBiOHzTC39bo$yb=vRm_!AB4COVlQ1d(^!1ps zz*JOn#W4J|3rvS7B3%ao(#$2m%%K^_ZUvHyy?tK@lKN3pZ_PW28|3{V{VO&+R+fpL@|L zo|Yv_5{x=pm~b{sa7H+WwyrFw%}FrFR+uK-4HLGlY=UQjk_`)jK!PJ$pqmyLqnLAG zHEe>kLdgBX1g{B`gHi!j7R06niqf^Pl?AyOCYwTMglM8BuO)O!7=3)db3zCK=G2&w z+(ihK$W0JPY_?{C5!b35cS4**}e}WJjL7WN;n{(i>%MgebHsucz zTVbT#$^_fgKkgEVp-!vJu461A9HWJz1!dhD%2E`{u52*2sUM6|im?F%8_QVLNVzL; z)?+yjz^k!A1xkNY%ww}&a78g%veLxnG`Nd$6j7R}sTdAZD7Cw=u~{-;6UZ?ZL#|BJ zNTEyP|)JGR$zB#YWngN+QiB3cNCUk zBNb)~FPWL^sG=>oDSi~=>5LNE2#D*VI&H+mr6`YQ71%gC74~MR0g&X0<{>p=24N2ab8)vZ6u9; z3KbLDsE)QIm;q5nKH5}bYM*Q5BPSRSCYI0!vw@442V7<`pteLk1Fam4pdMv&Q?WT_ z3FF*YMrNZP%b7bd2nedzW}Oij&lwlcg8VROZf*?-W{1tWQNmznig}W4n}t^qMCIeQ zd32^Qx4hgo4_%-K#Dq4MQE|+}3+|{NmNHINo-ku=mUPNcZyUIkb6bVDz2j!rr=LQ^ z^x9NDig|9!T~kAp!X%&BEP#Yy%9Cp(y6d1iZN|eY2N5`P=1cC#5S_iW-jKFd5$y> ziJ>H6QbiiM$!&*W>bSJciZ3x+Sw|YV=}?B$UoB<4axUtiyc?j*O|#`xHpMP<=4x0FB z)#g#v#{C5TVtLtXj)G{MM+71RKO?rUiqCKCmwJqbxFd zJbZsYkE8Lrcee{a(az}L2GPh5U*KU9PyMwO^h$R!6f5-RV}G9WnUcXQp8&)R5LRgb zLp9^9U^)u}7zq=IpB~5YB7tST1~5GhZD-x-;W&=M#}|02FSFV);QOg>bzse=!BD$A zGB~F_3sUPIoEa`Jg;CQWUxe#9gw=~4t=#qHv};K3Ljb1(FZ96c7r3SuCejO=jkJ~n zGnFvNR|p0;`!BN1G~0eA+iKl@eCN+YZ#DyuwUKA+Cqe3G8Utp*1R zd9!yMr9LpE>S-@dy$}{3)@S6;AP``?kB-;V!^CKA4-6wgR+c9_NPCp3VKs7bQVxk z)Zn{%CnJ!r!qM&A(QZ12>8<&xuhVY+p3V&d?>l|VXp%`pwm_Dy!(I^SRr3nn^wQkB zFdljuskxH(Oz^f~%rz8(l^QIyZbK@XT6zMF0N|P3G|M!&95gisHB19wm!8XZb(#gI zzLO_Kmj1YvSep1|mi&IMB}>K3^yhm#EOcoly6-US>20AUxXIS|I9d>o2tF+JI^sFYx~)&@^0kzgnJG z?rv{JN;fS?R-g`g%jPog0K@nm#0tu)H_oP+wO*$exs0vr2M{3`F2Xbi_y(7Xkt^E+ zSQq_n%ZgGKZm!w&=7S)ednxp?FIcb3Uk?<8L3VGf^T!J)gwU+2v`gw`{F`x4z1-+PQdr-hRv`$V|v$_0@*({s&EQU$lR5sCH4PCbz$P{X+^3^S`u|~ZYh(f!J;cB63 zX7wU_=iQfZ;vDvTu!Gb)(rDR_Wa_qR1+R-Obdjl7m)(9Vi%$ReAX$rNtrfu)?&0y zddmf?@%3;%kLNGYk7K>%vV48D6%U7tdA3E&{LZ@{?cj0YelznRzZ|N50(D8-Zoj=e zJ8hhwbsNVA4e+l4zBMj>X`M9k?O{Xv*2veXk-h2oFGKx`%Z6gMEgIX6cB9vjq@hKq zQS|M5Y`3y8*`wrB^V}Qm2l4KT6>yE&mFi2{6G*pUF+I6#w%c7QZw7rvZU(!n6ow&Y zhogs_Ty|Q$_m#^Ju6ApB%DuNi*S|eK>QDXo7z#P`9aGrIdxJpBB^cYS@_%paiScCTsBf`4sZ9JU%UHmv$*oi8Ey5W`TgzVe0(eX+mNZ=)>G%=@O0FPKVA$Qo!-uQ zcjw@ICsRZCeW!ELApfL0+dEW>9nvJ(zm$&7>-ooB^7Ke{OsH8tm?ZK=cOlSW58um@&R3FCmlQ5pi=5k0^pTGH+ z{>=B%^2$k$q6g%cKg+}Zfg3)(i+lX~Y)t0e_p{EEYhS(V@kM+j;=7ZQhkfrZ7)*}7 z1VVjkpV;I0*o&S%-}8u$lF5gj`Wg=fV>^^Ou!M9+(WWDJxU>s4x+2rEUZdA;wL9&0 zuhVO_dhKtbWC}Z~R()Y@Tb@O;_ z8azkJ%ZIx&KDnZNn1+v!A1?a5yY+PV;n(|j;_&9ozAZ^P6$c;h=RK$27U^l^jnxM- z369u6gok&-i^b*FNGYk>aYfTrKux555s^y}8ayN}0L_gCz8`1$B0@vcrjcHYObp1Cw*u5xr)XCR~cMUC81 zJCd*^bA_wlxdOaxwR?@$pUnLqF!zUXv`mSwlJIQ$?yWtX%$(4>Clh(G-|n5x-uL%U z_YXSv`};Q~3BS<8`P1P3VDPc~W%@zx@At?r_cIxIUG{l0eQ1A8J{6-o)aFep)sB8@ zjWOsk2@y*kN~rmSFwMkH0-PAst;l4j*{*xOKM(o7*LmhW^dN{Nv@9qnoGxm-pva0ZZ+p{@`&)$ZhoH zD0)uLnC=dxRx|ij{^`0Vja=8&f>yiN{gXt091>-RCwqqvGuVRoBjY)M#J^o+|IxDI z$jY}aJJ|y3+d#883DWGrkiRXLTm6-_t<}>1J%3^%UPL47tBUknJfgM~Iggs>e=_M0 z&7|7i>WL0c(wl=rma2#NY7)LBC;R=aM}Bp3elD+`N-=WpNZ;u{d3Wk`@I@_7_7D5F c{dq4v>9qn|^*=PXznw<^1?!S4=KpK}0O|wIy8r+H diff --git a/tests/extras/test_allele_translator.py b/tests/extras/test_allele_translator.py index 4e7b0fa6..a249cea0 100644 --- a/tests/extras/test_allele_translator.py +++ b/tests/extras/test_allele_translator.py @@ -111,7 +111,7 @@ def tlr(rest_dataproxy): insertion_output = { "location": { "end": 20003010, - "start":20003010, + "start": 20003010, "sequenceReference": { "refgetAccession": "SQ._0wi-qoDrvram155UmcSC-zA5ZK4fpLT", "type": "SequenceReference" @@ -265,6 +265,7 @@ def test_to_spdi(tlr): assert 1 == len(to_spdi) assert spdiexpr == to_spdi[0] + hgvs_tests = ( ("NC_000013.11:g.32936732=", { "id": "ga4gh:VA.GuDPEe-WojSx4b4DxupN3si1poaR61qL", @@ -303,7 +304,7 @@ def test_to_spdi(tlr): "type": "Allele" }), ("NC_000007.14:g.55181220del", { - "id": "ga4gh:VA.BNV6SfAuqDYKTTRknLcS-QuTryF5rSBi", + "id": "ga4gh:VA.wlYnlMsWc0ZTPZb-nQv2dXHbFcXa6J9u", "location": { "id": "ga4gh:SL.hnIOG_kul0Lf3mO1ddTRFb0GbQhtQ19t", "end": 55181220, @@ -341,7 +342,7 @@ def test_to_spdi(tlr): "type": "Allele" }), ("NC_000013.11:g.32331093_32331094dup", { - "id": "ga4gh:VA.g-q4OzcyYFC5eVQFSrbXwgJScSREvrY-", + "id": "ga4gh:VA.x5iNzjjXbb1-wWTBLMBcicYlCMwYoedq", "location": { "id": "ga4gh:SL.PJ8lHWhAMNRSrxHvkarfDjRWxF-GwaJ_", "end": 32331094, @@ -361,7 +362,7 @@ def test_to_spdi(tlr): "type": "Allele" }), ("NC_000013.11:g.32316467dup", { - "id": "ga4gh:VA._KlbF6GZCbuLxbL9z4hZE3oZSLzBHstS", + "id": "ga4gh:VA.ZAyA7Mmd7ERWN6CEd6muxn2mk_gTvEvF", "location": { "id": "ga4gh:SL.LURTeRdwh5bQf_QqPBoaA--MECYmrY5U", "end": 32316467, @@ -434,7 +435,7 @@ def test_hgvs(tlr, hgvsexpr, expected): def test_to_hgvs_invalid(tlr): # IRI is passed - iri_vo = models.Allele( + iri_vo = models.Allele( **{ "location": { "end": 1263, diff --git a/tests/extras/test_vcf_annotation.py b/tests/extras/test_vcf_annotation.py index c39ffe64..1b134794 100644 --- a/tests/extras/test_vcf_annotation.py +++ b/tests/extras/test_vcf_annotation.py @@ -8,10 +8,12 @@ TEST_DATA_DIR = "tests/extras/data" + @pytest.fixture def vcf_annotator(): return VCFAnnotator("rest") + @pytest.mark.vcr def test_annotate_vcf_grch38_noattrs(vcf_annotator, vcr_cassette): vcr_cassette.allow_playback_repeats = False @@ -26,12 +28,14 @@ def test_annotate_vcf_grch38_noattrs(vcf_annotator, vcr_cassette): out_vcf_lines = out_vcf.readlines() with gzip.open(expected_vcf_no_vrs_attrs, "rt") as expected_output: expected_output_lines = expected_output.readlines() - assert out_vcf_lines == expected_output_lines + for actual_line, expected_line in zip(out_vcf_lines, expected_output_lines): + assert actual_line == expected_line assert os.path.exists(output_vrs_pkl) assert vcr_cassette.all_played os.remove(output_vcf) os.remove(output_vrs_pkl) + @pytest.mark.vcr def test_annotate_vcf_grch38_attrs(vcf_annotator, vcr_cassette): vcr_cassette.allow_playback_repeats = False @@ -46,12 +50,14 @@ def test_annotate_vcf_grch38_attrs(vcf_annotator, vcr_cassette): out_vcf_lines = out_vcf.readlines() with gzip.open(expected_vcf, "rt") as expected_output: expected_output_lines = expected_output.readlines() - assert out_vcf_lines == expected_output_lines + for actual_line, expected_line in zip(out_vcf_lines, expected_output_lines): + assert actual_line == expected_line assert os.path.exists(output_vrs_pkl) assert vcr_cassette.all_played os.remove(output_vcf) os.remove(output_vrs_pkl) + @pytest.mark.vcr def test_annotate_vcf_grch38_attrs_altsonly(vcf_annotator, vcr_cassette): vcr_cassette.allow_playback_repeats = False @@ -66,12 +72,14 @@ def test_annotate_vcf_grch38_attrs_altsonly(vcf_annotator, vcr_cassette): out_vcf_lines = out_vcf.readlines() with gzip.open(expected_altsonly_vcf, "rt") as expected_output: expected_output_lines = expected_output.readlines() - assert out_vcf_lines == expected_output_lines + for actual_line, expected_line in zip(out_vcf_lines, expected_output_lines): + assert actual_line == expected_line assert os.path.exists(output_vrs_pkl) assert vcr_cassette.all_played os.remove(output_vcf) os.remove(output_vrs_pkl) + @pytest.mark.vcr def test_annotate_vcf_grch37_attrs(vcf_annotator, vcr_cassette): vcr_cassette.allow_playback_repeats = False @@ -92,6 +100,7 @@ def test_annotate_vcf_grch37_attrs(vcf_annotator, vcr_cassette): os.remove(output_vcf) os.remove(output_vrs_pkl) + @pytest.mark.vcr def test_annotate_vcf_pickle_only(vcf_annotator, vcr_cassette): vcr_cassette.allow_playback_repeats = False @@ -106,6 +115,7 @@ def test_annotate_vcf_pickle_only(vcf_annotator, vcr_cassette): assert vcr_cassette.all_played os.remove(output_vrs_pkl) + @pytest.mark.vcr def test_annotate_vcf_vcf_only(vcf_annotator, vcr_cassette): vcr_cassette.allow_playback_repeats = False @@ -125,6 +135,7 @@ def test_annotate_vcf_vcf_only(vcf_annotator, vcr_cassette): assert not os.path.exists(output_vrs_pkl) os.remove(output_vcf) + def test_annotate_vcf_input_validation(vcf_annotator): input_vcf = f"{TEST_DATA_DIR}/test_vcf_input.vcf" @@ -132,6 +143,7 @@ def test_annotate_vcf_input_validation(vcf_annotator): vcf_annotator.annotate(input_vcf) assert str(e.value) == "Must provide one of: `vcf_out` or `vrs_pickle_out`" + @pytest.mark.vcr def test_get_vrs_object_invalid_input(vcf_annotator, caplog): """Test that _get_vrs_object method works as expected with invalid input"""