-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of https://github.com/DiltheyLab/graph-genome-w…
- Loading branch information
Showing
22 changed files
with
1,615 additions
and
866 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
|
||
### callsets to be used as input to genotyping tools (multiple ones can be listed) | ||
callsets: | ||
HGSVC: | ||
# multi-allelic VCF file | ||
multi: "reduced-data/Pangenome_graph_freeze3_64haplotypes.vcf.gz" | ||
# bi-allelic VCF file | ||
bi: "reduced-data/Callset_freeze3_64haplotypes.vcf.gz" | ||
# reference genome in FASTA format | ||
reference: "reduced-data/GRCh38_full_analysis_set_plus_decoy_hla.fa" | ||
reference_fai: "reduced-data/GRCh38_full_analysis_set_plus_decoy_hla.fa.fai" | ||
leave1out: # specify the samples to evaluate on | ||
NA24385: | ||
regions: # options: all, multi, bi | ||
- all | ||
- multi | ||
- bi | ||
filters: # options: all, typable | ||
- typable | ||
vartype: # variants contained in the callset. Options are: snp|indels|large-deletion|large-insertion | ||
- snp | ||
- indels | ||
- large-deletion | ||
- large-insertion | ||
external: # specify the samples to (externally) evaluate on | ||
NA24385: | ||
path: "reduced-data/HG002_GRCh38_1_22_v4.2.1_benchmark.vcf.gz" # benchmark dataset used a truth for external evaluation | ||
callable_regions: "data/downloaded/vcf/giab/hg38/HG002_GRCh38_1_22_v4.2.1_benchmark.bed" # path to BED with callable regions | ||
vartype: # options: snp-indel, sv | ||
- snp-indel | ||
- sv | ||
regions: # options: all, multi, bi | ||
- all | ||
filters: # options: all, typable | ||
- typable | ||
|
||
|
||
# read data. File required that specifies a sample name, path to FASTA/FASTQ data and superpopulation: | ||
# FamilyID SampleID FatherID MotherID Population Superpopulation Sample_Illumina | ||
reads: "resources/reads_reduced.tsv" | ||
|
||
|
||
# PanGenie command. Different versions can be run by listing several commandlines | ||
pangenie: {} | ||
|
||
|
||
# PanGenie command to be used for modularised versions of PanGenie | ||
pangenie-modules: | ||
pangenie.v3: "/usr/local/bin/PanGenie_v3.0.0" | ||
|
||
|
||
# Downsampling coverages for leave-one-out experiment. If reads shall not be downsampled, leave empty. | ||
downsampling: [] | ||
|
||
|
||
# Other programs | ||
programs: | ||
rtg: "/home/ubuntu/rtg-tools-3.12.1/rtg" | ||
bwa: "/usr/bin/bwa" | ||
bayestyper: "/usr/local/bin/bayesTyper" | ||
bayestyper_tools: "/usr/local/bin/bayesTyperTools" | ||
graphtyper: "/usr/local/bin/graphtyper" | ||
kmc: "/usr/bin/kmc" | ||
truvari: "/usr/local/bin/truvari" | ||
|
||
|
||
# Other parameters | ||
utils: | ||
bayestyper_reference_canon: "data/downloaded/bayestyper_utils/bayestyper_GRCh38_bundle_v1.3/GRCh38_canon.fa" | ||
bayestyper_reference_decoy: "data/downloaded/bayestyper_utils/bayestyper_GRCh38_bundle_v1.3/GRCh38_decoy.fa" |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#FamilyID SampleID FatherID MotherID Sex Population Superpopulation SampleIllumina | ||
NA24385 NA24385 0 0 1 UNKNOWN UNKNOWN reduced-data/NA24385_raw.fastq.gz | ||
1463B NA12878 NA12891 NA12892 2 CEU EUR reduced-data/NA12878_raw.fastq.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,54 @@ | ||
configfile: 'config/config.yaml' | ||
include: 'rules/download-data.smk' | ||
include: 'rules/downsample-reads.smk' | ||
include: 'rules/leave-one-out-experiments.smk' | ||
include: 'rules/preprocessing.smk' | ||
include: 'rules/genotyping.smk' | ||
include: 'rules/evaluation.smk' | ||
|
||
coverages = ['full'] + config['downsampling'] | ||
|
||
callsets = [c for c in config["callsets"].keys()] | ||
chromosomes = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X"] | ||
versions_to_run = [v for v in config['pangenie'].keys()] + [v for v in config['pangenie-modules'].keys()] + ['bayestyper', 'graphtyper'] | ||
|
||
chromosomes = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X"] | ||
|
||
samples_leave1out = [t for c in config["callsets"].keys() for t in config["callsets"][c]["leave1out"].keys()] | ||
samples_external = [t for c in config["callsets"].keys() for t in config["callsets"][c]["external"].keys()] | ||
vartypes_leave1out = [var for c in config["callsets"].keys() for t in config["callsets"][c]["leave1out"].keys() for var in config["callsets"][c]["leave1out"][t]["vartype"]] | ||
vartypes_external = [var for c in config["callsets"].keys() for t in config["callsets"][c]["external"].keys() for var in config["callsets"][c]["external"][t]["vartype"]] | ||
filters_leave1out = [f for c in config["callsets"].keys() for t in config["callsets"][c]["leave1out"].keys() for f in config["callsets"][c]["leave1out"][t]["filters"]] | ||
filters_external = [f for c in config["callsets"].keys() for t in config["callsets"][c]["external"].keys() for f in config["callsets"][c]["external"][t]["filters"]] | ||
regions_leave1out = [r for c in config["callsets"].keys() for t in config["callsets"][c]["leave1out"].keys() for r in config["callsets"][c]["leave1out"][t]["regions"]] | ||
regions_external = [r for c in config["callsets"].keys() for t in config["callsets"][c]["external"].keys() for r in config["callsets"][c]["external"][t]["regions"]] | ||
|
||
pipelines = ['leave1out', 'external'] ## possibilities: leave1out, external | ||
|
||
rule all: | ||
input: | ||
expand("results/leave-one-out/{callset}/plots/resources/resources_{callset}-{coverage}.pdf", callset = [c for c in config['callsets'].keys()], coverage = coverages), | ||
expand("results/leave-one-out/{callset}/plots/comparison-versions/{metric}/{metric}_{coverage}_{regions}.pdf", callset = [c for c in config['callsets'].keys()], metric = ['concordance', 'precision-recall-typable', 'untyped', 'concordance-vs-untyped'], coverage = coverages, regions = ['biallelic', 'multiallelic']) | ||
|
||
rule leave_one_out: | ||
input: | ||
expand("results/leave-one-out/{callset}/plots/resources/resources_{callset}-{coverage}.pdf", callset = [c for c in config['callsets'].keys()], coverage = coverages), | ||
expand("results/leave-one-out/{callset}/plots/comparison-versions/{metric}/{metric}_{coverage}_{regions}.pdf", callset = [c for c in config['callsets'].keys()], metric = ['concordance', 'precision-recall-typable', 'untyped', 'concordance-vs-untyped'], coverage = coverages, regions = ['biallelic', 'multiallelic']), | ||
input: | ||
expand("results/leave-one-out/{callset}/plots/resources/resources_{callset}-{coverage}.pdf", callset = [c for c in config['callsets'].keys()], coverage = coverages), | ||
expand("results/leave-one-out/{callset}/plots/comparison-versions/{metric}/{metric}_{coverage}_{regions}.pdf", callset = [c for c in config['callsets'].keys()], metric = ['concordance', 'precision-recall-typable', 'untyped', 'concordance-vs-untyped'], coverage = coverages, regions = ['biallelic', 'multiallelic']) | ||
|
||
|
||
# generate all combinations of desired output files to be produced | ||
def eval_files(wildcards): | ||
filenames = [] | ||
for p in pipelines: | ||
if p == 'leave1out': | ||
regions, samples, filters, vartypes = regions_leave1out, samples_leave1out, filters_leave1out, vartypes_leave1out | ||
if p == 'external': | ||
regions, samples, filters, vartypes = regions_external, samples_external, filters_external, vartypes_external | ||
for c in callsets: | ||
for cov in coverages: | ||
for v in versions_to_run: | ||
for r in regions: | ||
for t in samples: | ||
for f in filters: | ||
for var in vartypes: | ||
if var in ['snp', 'indels', 'snp-indel']: | ||
filenames.append("evaluation/" + c + "/" + t + "/" + p + "/" + v + "-" + cov + "-" + f + "-" + r + "-" + var + "/summary.txt") | ||
if var in ['large-insertion', 'large-deletion', 'sv']: | ||
filenames.append("evaluation/" + c + "/" + t + "/" + p + "/" + v + "-" + cov + "-" + f + "-" + r + "-" + var + "/summary.json") | ||
|
||
return filenames | ||
|
||
rule evaluation: | ||
input: | ||
eval_files |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
cores: 28 | ||
latency-wait: 30 | ||
keep-going: False | ||
rerun-incomplete: True | ||
restart-times: 0 | ||
max-status-checks-per-second: 0.001 | ||
use-conda: True | ||
conda-frontend: conda | ||
nolock: False | ||
configfile: config/config.yaml | ||
|
11 changes: 11 additions & 0 deletions
11
evaluation_pipeline/workflow/profile-reduced-data/config.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
cores: 28 | ||
latency-wait: 30 | ||
keep-going: False | ||
rerun-incomplete: True | ||
restart-times: 0 | ||
max-status-checks-per-second: 0.001 | ||
use-conda: True | ||
conda-frontend: conda | ||
nolock: False | ||
configfile: config/config_reduced-data.yml | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.