Skip to content

Commit

Permalink
fix: restrict calls to reference contigs (#101)
Browse files Browse the repository at this point in the history
* fix: remove renaming to empty string

* fix: add rule to restrict callsets to contigs in reference (needed for wgs callsets where no intersection with target region is possible).

* fix: add parantheses to catch error log of both commands

Co-authored-by: Johannes Köster <[email protected]>

* fix: switch back to only tools.yaml and increase all package versions

---------

Co-authored-by: Johannes Köster <[email protected]>
  • Loading branch information
BiancaStoecker and johanneskoester authored Sep 12, 2024
1 parent 20a469a commit 0413580
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 267 deletions.
10 changes: 5 additions & 5 deletions workflow/envs/tools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ channels:
- bioconda
- nodefaults
dependencies:
- bcftools =1.14
- samtools =1.14
- curl =7
- bedtools =2.30
- ucsc-liftover =377
- bcftools =1.20
- samtools =1.20
- curl =8
- bedtools =2.31
- ucsc-liftover =469
261 changes: 0 additions & 261 deletions workflow/resources/rename-contigs/grch38_ucsc2ensembl.txt
Original file line number Diff line number Diff line change
@@ -1,44 +1,9 @@
chr1 1
chr10 10
chr10_GL383545v1_alt
chr10_GL383546v1_alt
chr10_KI270824v1_alt
chr10_KI270825v1_alt
chr11 11
chr11_GL383547v1_alt
chr11_JH159136v1_alt
chr11_JH159137v1_alt
chr11_KI270721v1_random KI270721.1
chr11_KI270826v1_alt
chr11_KI270827v1_alt
chr11_KI270829v1_alt
chr11_KI270830v1_alt
chr11_KI270831v1_alt
chr11_KI270832v1_alt
chr11_KI270902v1_alt
chr11_KI270903v1_alt
chr11_KI270927v1_alt
chr12 12
chr12_GL383549v1_alt
chr12_GL383550v2_alt
chr12_GL383551v1_alt
chr12_GL383552v1_alt
chr12_GL383553v2_alt
chr12_GL877875v1_alt
chr12_GL877876v1_alt
chr12_KI270833v1_alt
chr12_KI270834v1_alt
chr12_KI270835v1_alt
chr12_KI270836v1_alt
chr12_KI270837v1_alt
chr12_KI270904v1_alt
chr13 13
chr13_KI270838v1_alt
chr13_KI270839v1_alt
chr13_KI270840v1_alt
chr13_KI270841v1_alt
chr13_KI270842v1_alt
chr13_KI270843v1_alt
chr14 14
chr14_GL000009v2_random GL000009.2
chr14_GL000194v1_random GL000194.1
Expand All @@ -48,109 +13,16 @@ chr14_KI270723v1_random KI270723.1
chr14_KI270724v1_random KI270724.1
chr14_KI270725v1_random KI270725.1
chr14_KI270726v1_random KI270726.1
chr14_KI270844v1_alt
chr14_KI270845v1_alt
chr14_KI270846v1_alt
chr14_KI270847v1_alt
chr15 15
chr15_GL383554v1_alt
chr15_GL383555v2_alt
chr15_KI270727v1_random KI270727.1
chr15_KI270848v1_alt
chr15_KI270849v1_alt
chr15_KI270850v1_alt
chr15_KI270851v1_alt
chr15_KI270852v1_alt
chr15_KI270905v1_alt
chr15_KI270906v1_alt
chr16 16
chr16_GL383556v1_alt
chr16_GL383557v1_alt
chr16_KI270728v1_random KI270728.1
chr16_KI270853v1_alt
chr16_KI270854v1_alt
chr16_KI270855v1_alt
chr16_KI270856v1_alt
chr17 17
chr17_GL000205v2_random GL000205.2
chr17_GL000258v2_alt
chr17_GL383563v3_alt
chr17_GL383564v2_alt
chr17_GL383565v1_alt
chr17_GL383566v1_alt
chr17_JH159146v1_alt
chr17_JH159147v1_alt
chr17_JH159148v1_alt
chr17_KI270729v1_random KI270729.1
chr17_KI270730v1_random KI270730.1
chr17_KI270857v1_alt
chr17_KI270858v1_alt
chr17_KI270859v1_alt
chr17_KI270860v1_alt
chr17_KI270861v1_alt
chr17_KI270862v1_alt
chr17_KI270907v1_alt
chr17_KI270908v1_alt
chr17_KI270909v1_alt
chr17_KI270910v1_alt
chr18 18
chr18_GL383567v1_alt
chr18_GL383568v1_alt
chr18_GL383569v1_alt
chr18_GL383570v1_alt
chr18_GL383571v1_alt
chr18_GL383572v1_alt
chr18_KI270863v1_alt
chr18_KI270864v1_alt
chr18_KI270911v1_alt
chr18_KI270912v1_alt
chr19 19
chr19_GL000209v2_alt
chr19_GL383573v1_alt
chr19_GL383574v1_alt
chr19_GL383575v2_alt
chr19_GL383576v1_alt
chr19_GL949746v1_alt
chr19_GL949747v2_alt
chr19_GL949748v2_alt
chr19_GL949749v2_alt
chr19_GL949750v2_alt
chr19_GL949751v2_alt
chr19_GL949752v1_alt
chr19_GL949753v2_alt
chr19_KI270865v1_alt
chr19_KI270866v1_alt
chr19_KI270867v1_alt
chr19_KI270868v1_alt
chr19_KI270882v1_alt
chr19_KI270883v1_alt
chr19_KI270884v1_alt
chr19_KI270885v1_alt
chr19_KI270886v1_alt
chr19_KI270887v1_alt
chr19_KI270888v1_alt
chr19_KI270889v1_alt
chr19_KI270890v1_alt
chr19_KI270891v1_alt
chr19_KI270914v1_alt
chr19_KI270915v1_alt
chr19_KI270916v1_alt
chr19_KI270917v1_alt
chr19_KI270918v1_alt
chr19_KI270919v1_alt
chr19_KI270920v1_alt
chr19_KI270921v1_alt
chr19_KI270922v1_alt
chr19_KI270923v1_alt
chr19_KI270929v1_alt
chr19_KI270930v1_alt
chr19_KI270931v1_alt
chr19_KI270932v1_alt
chr19_KI270933v1_alt
chr19_KI270938v1_alt
chr1_GL383518v1_alt
chr1_GL383519v1_alt
chr1_GL383520v2_alt
chr1_KI270706v1_random KI270706.1
chr1_KI270707v1_random KI270707.1
chr1_KI270708v1_random KI270708.1
Expand All @@ -160,33 +32,10 @@ chr1_KI270711v1_random KI270711.1
chr1_KI270712v1_random KI270712.1
chr1_KI270713v1_random KI270713.1
chr1_KI270714v1_random KI270714.1
chr1_KI270759v1_alt
chr1_KI270760v1_alt
chr1_KI270761v1_alt
chr1_KI270762v1_alt
chr1_KI270763v1_alt
chr1_KI270764v1_alt
chr1_KI270765v1_alt
chr1_KI270766v1_alt
chr1_KI270892v1_alt
chr2 2
chr20 20
chr20_GL383577v2_alt
chr20_KI270869v1_alt
chr20_KI270870v1_alt
chr20_KI270871v1_alt
chr21 21
chr21_GL383578v2_alt
chr21_GL383579v2_alt
chr21_GL383580v2_alt
chr21_GL383581v2_alt
chr21_KI270872v1_alt
chr21_KI270873v1_alt
chr21_KI270874v1_alt
chr22 22
chr22_GL383582v2_alt
chr22_GL383583v2_alt
chr22_KB663609v1_alt
chr22_KI270731v1_random KI270731.1
chr22_KI270732v1_random KI270732.1
chr22_KI270733v1_random KI270733.1
Expand All @@ -196,129 +45,22 @@ chr22_KI270736v1_random KI270736.1
chr22_KI270737v1_random KI270737.1
chr22_KI270738v1_random KI270738.1
chr22_KI270739v1_random KI270739.1
chr22_KI270875v1_alt
chr22_KI270876v1_alt
chr22_KI270877v1_alt
chr22_KI270878v1_alt
chr22_KI270879v1_alt
chr22_KI270928v1_alt
chr2_GL383521v1_alt
chr2_GL383522v1_alt
chr2_GL582966v2_alt
chr2_KI270715v1_random KI270715.1
chr2_KI270716v1_random KI270716.1
chr2_KI270767v1_alt
chr2_KI270768v1_alt
chr2_KI270769v1_alt
chr2_KI270770v1_alt
chr2_KI270771v1_alt
chr2_KI270772v1_alt
chr2_KI270773v1_alt
chr2_KI270774v1_alt
chr2_KI270775v1_alt
chr2_KI270776v1_alt
chr2_KI270893v1_alt
chr2_KI270894v1_alt
chr3 3
chr3_GL000221v1_random GL000221.1
chr3_GL383526v1_alt
chr3_JH636055v2_alt
chr3_KI270777v1_alt
chr3_KI270778v1_alt
chr3_KI270779v1_alt
chr3_KI270780v1_alt
chr3_KI270781v1_alt
chr3_KI270782v1_alt
chr3_KI270783v1_alt
chr3_KI270784v1_alt
chr3_KI270895v1_alt
chr3_KI270924v1_alt
chr3_KI270934v1_alt
chr3_KI270935v1_alt
chr3_KI270936v1_alt
chr3_KI270937v1_alt
chr4 4
chr4_GL000008v2_random GL000008.2
chr4_GL000257v2_alt
chr4_GL383527v1_alt
chr4_GL383528v1_alt
chr4_KI270785v1_alt
chr4_KI270786v1_alt
chr4_KI270787v1_alt
chr4_KI270788v1_alt
chr4_KI270789v1_alt
chr4_KI270790v1_alt
chr4_KI270896v1_alt
chr4_KI270925v1_alt
chr5 5
chr5_GL000208v1_random GL000208.1
chr5_GL339449v2_alt
chr5_GL383530v1_alt
chr5_GL383531v1_alt
chr5_GL383532v1_alt
chr5_GL949742v1_alt
chr5_KI270791v1_alt
chr5_KI270792v1_alt
chr5_KI270793v1_alt
chr5_KI270794v1_alt
chr5_KI270795v1_alt
chr5_KI270796v1_alt
chr5_KI270897v1_alt
chr5_KI270898v1_alt
chr6 6
chr6_GL000250v2_alt
chr6_GL000251v2_alt
chr6_GL000252v2_alt
chr6_GL000253v2_alt
chr6_GL000254v2_alt
chr6_GL000255v2_alt
chr6_GL000256v2_alt
chr6_GL383533v1_alt
chr6_KB021644v2_alt
chr6_KI270758v1_alt
chr6_KI270797v1_alt
chr6_KI270798v1_alt
chr6_KI270799v1_alt
chr6_KI270800v1_alt
chr6_KI270801v1_alt
chr6_KI270802v1_alt
chr7 7
chr7_GL383534v2_alt
chr7_KI270803v1_alt
chr7_KI270804v1_alt
chr7_KI270805v1_alt
chr7_KI270806v1_alt
chr7_KI270807v1_alt
chr7_KI270808v1_alt
chr7_KI270809v1_alt
chr7_KI270899v1_alt
chr8 8
chr8_KI270810v1_alt
chr8_KI270811v1_alt
chr8_KI270812v1_alt
chr8_KI270813v1_alt
chr8_KI270814v1_alt
chr8_KI270815v1_alt
chr8_KI270816v1_alt
chr8_KI270817v1_alt
chr8_KI270818v1_alt
chr8_KI270819v1_alt
chr8_KI270820v1_alt
chr8_KI270821v1_alt
chr8_KI270822v1_alt
chr8_KI270900v1_alt
chr8_KI270901v1_alt
chr8_KI270926v1_alt
chr9 9
chr9_GL383539v1_alt
chr9_GL383540v1_alt
chr9_GL383541v1_alt
chr9_GL383542v1_alt
chr9_KI270717v1_random KI270717.1
chr9_KI270718v1_random KI270718.1
chr9_KI270719v1_random KI270719.1
chr9_KI270720v1_random KI270720.1
chr9_KI270823v1_alt
chrM MT
chrUn_GL000195v1 GL000195.1
chrUn_GL000213v1 GL000213.1
Expand Down Expand Up @@ -448,8 +190,5 @@ chrUn_KI270755v1 KI270755.1
chrUn_KI270756v1 KI270756.1
chrUn_KI270757v1 KI270757.1
chrX X
chrX_KI270880v1_alt
chrX_KI270881v1_alt
chrX_KI270913v1_alt
chrY Y
chrY_KI270740v1_random KI270740.1
18 changes: 17 additions & 1 deletion workflow/rules/eval.smk
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,28 @@ rule intersect_calls_with_target_regions:
"<(bcftools view {input.bcf}) -wa -f 1.0 -header > {output}) 2> {log}"


rule restrict_to_reference_contigs:
input:
calls="results/filtered-variants/{callset}.bcf",
calls_index="results/filtered-variants/{callset}.bcf.csi",
ref_index="resources/reference/genome.fasta.fai",
output:
"results/filtered-variants/{callset}_restricted.bcf",
log:
"logs/restrict-to-reference-contigs/{callset}.log",
conda:
"../envs/tools.yaml"
shell:
"(bcftools view --regions $(cut -f1 {input.ref_index} | tr '\\n' ',') {input.calls} |"
" bcftools reheader -f {input.ref_index} > {output}) 2> {log}"


rule normalize_calls:
input:
calls=branch(
intersect_calls,
then="results/normalized-variants/{callset}_intersected.vcf",
otherwise="results/filtered-variants/{callset}.bcf",
otherwise="results/filtered-variants/{callset}_restricted.bcf",
),
ref="resources/reference/genome.fasta",
ref_index="resources/reference/genome.fasta.fai",
Expand Down

0 comments on commit 0413580

Please sign in to comment.