Skip to content

Commit

Permalink
feat: AF rna fusion filter possibility
Browse files Browse the repository at this point in the history
  • Loading branch information
jonca79 committed Sep 23, 2024
1 parent d92c538 commit d46e228
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 12 deletions.
2 changes: 1 addition & 1 deletion config/config.data.hg19.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ purecn_coverage:

report_fusions:
annotation_bed: "{{PROJECT_DESIGN_DATA}}/GMS560/design/Twist_RNA_fusionpartners.bed"
fp_fusions: "{{PROJECT_DESIGN_DATA}}/GMS560/rna_fusion/filter_rna_fusions_20231024.txt"
fp_fusions: "{{PROJECT_DESIGN_DATA}}/GMS560/rna_fusion/filter_rna_fusions_20240923.txt"

report_gene_fuse:
filter_fusions: "{{PROJECT_DESIGN_DATA}}/GMS560/gene_fuse/filter_fusions_20230214.csv"
Expand Down
2 changes: 1 addition & 1 deletion config/config.data.hg38.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ purecn_coverage:

report_fusions:
annotation_bed: "{{PROJECT_DESIGN_DATA}}/GMS560/design/Twist_RNA_fusionpartners_hg38.bed"
fp_fusions: "{{PROJECT_DESIGN_DATA}}/GMS560/rna_fusion/filter_rna_fusions_20231024.txt"
fp_fusions: "{{PROJECT_DESIGN_DATA}}/GMS560/rna_fusion/filter_rna_fusions_20240923.txt"

scarhrd:
reference_name: "grch38"
Expand Down
6 changes: 3 additions & 3 deletions config/references/design_files.hg19.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,10 @@
url: https://github.com/genomic-medicine-sweden/Twist_Solid_pipeline_files/raw/v0.1.1/rna_fusion/Twist_RNA_fusionpartners.bed

fp_fusions:
checksum: eca1c1cf1f1e068cbdada20bdb75c392
path: GMS560/rna_fusion/filter_rna_fusions_20231024.txt
checksum: 3886104b8c9c0c73ec8edb40b8d1428d
path: GMS560/rna_fusion/filter_rna_fusions_20240923.txt
type: file
url: https://github.com/genomic-medicine-sweden/Twist_Solid_pipeline_files/raw/v0.4.0/rna_fusion/filter_rna_fusions_20231024.txt
url: https://github.com/genomic-medicine-sweden/Twist_Solid_pipeline_files/raw/v0.4.0/rna_fusion/filter_rna_fusions_20240923.txt

references:
design_bed:
Expand Down
6 changes: 3 additions & 3 deletions config/references/design_files.hg38.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,10 @@
url: https://github.com/genomic-medicine-sweden/Twist_Solid_pipeline_files/raw/v0.5.0/rna_fusion/Twist_RNA_fusionpartners.bed

fp_fusions:
checksum: eca1c1cf1f1e068cbdada20bdb75c392
path: GMS560/rna_fusion/filter_rna_fusions_20231024.txt
checksum: 3886104b8c9c0c73ec8edb40b8d1428d
path: GMS560/rna_fusion/filter_rna_fusions_20240923.txt
type: file
url: https://github.com/genomic-medicine-sweden/Twist_Solid_pipeline_files/raw/v0.5.0/rna_fusion/filter_rna_fusions_20231024.txt
url: https://github.com/genomic-medicine-sweden/Twist_Solid_pipeline_files/raw/v0.5.0/rna_fusion/filter_rna_fusions_20240923.txt

references:
design_bed:
Expand Down
49 changes: 45 additions & 4 deletions workflow/scripts/report_fusions.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@
gene2 = columns[1]
read_limit_SF = int(columns[2])
read_limit_FC = int(columns[3])
af_limit = float(columns[4])
if gene2 == "housekeeping":
housekeeping_genes[gene1] = [read_limit_SF, read_limit_FC]
housekeeping_genes[gene1] = [read_limit_SF, read_limit_FC, af_limit, 0]
if gene1 not in artefact_gene_dict:
artefact_gene_dict[gene1] = {}
artefact_gene_dict[gene1][gene2] = [read_limit_SF, read_limit_FC]
artefact_gene_dict[gene1][gene2] = [read_limit_SF, read_limit_FC, af_limit, 0]

# Only keep fusions with one gene that are in the design
design_genes = {}
Expand Down Expand Up @@ -135,6 +136,20 @@
fusion_dict[break_points] = {}
fusion_dict[break_points]["Arriba"] = [gene1, gene2, exon1, exon2, confidence, "", predicted_effect, breakpoint1, breakpoint2,
total_split_reads, discordant_mates, total_supporting_reads]
# dedup coverage for potential filtering in StarFusion and FusionCatcher
exon_coverage1 = 0
exon_coverage2 = 0
max_exon_coverage = 0
for exon in dedup_coverage_list:
if chrom1 == exon[0] and pos1 >= exon[1] and pos1 <= exon[2]:
exon_coverage1 = exon[3]
if chrom2 == exon[0] and pos2 >= exon[1] and pos2 <= exon[2]:
exon_coverage2 = exon[3]
max_exon_coverage = max(exon_coverage1, exon_coverage2)
if gene1 in artefact_gene_dict and gene2 in artefact_gene_dict[gene1]:
artefact_gene_dict[gene1][gene2][3] = max_exon_coverage
if gene2 in artefact_gene_dict and gene1 in artefact_gene_dict[gene2]:
artefact_gene_dict[gene2][gene1][3] = max_exon_coverage


# Star-fusions
Expand All @@ -161,7 +176,7 @@
continue
if int(Junction_read_count) <= star_fusion_low_support:
continue
# Higher demand of read support for genes with frequent FP, house keeping genes
# Higher demand of read support for genes with frequent FP gene fusions and house keeping genes
if (gene1 in artefact_gene_dict and gene2 in artefact_gene_dict[gene1]):
if int(Junction_read_count) < artefact_gene_dict[gene1][gene2][0]:
continue
Expand All @@ -174,6 +189,19 @@
if gene2 in housekeeping_genes:
if int(Junction_read_count) < housekeeping_genes[gene2][0]:
continue
# Min AF (1%) for frequent FP gene fusions and housekeeping gene
if (gene1 in artefact_gene_dict and gene2 in artefact_gene_dict[gene1]):
if int(Junction_read_count) / artefact_gene_dict[gene1][gene2][3] < 0.01:
continue
if (gene2 in artefact_gene_dict and gene1 in artefact_gene_dict[gene2]):
if int(Junction_read_count) / artefact_gene_dict[gene2][gene1][3] < 0.01:
continue
if gene1 in housekeeping_genes:
if int(Junction_read_count) / housekeeping_genes[gene1][3] < 0.01:
continue
if gene2 in housekeeping_genes:
if int(Junction_read_count) / housekeeping_genes[gene2][3] < 0.01:
continue
breakpoint1 = lline[7][:-2]
breakpoint2 = lline[9][:-2]
FFPM = lline[11]
Expand Down Expand Up @@ -240,7 +268,7 @@
continue
if int(Spanning_reads_unique) <= fusioncatcher_low_support:
continue
# Higher demand of read support for genes with frequent FP, house keeping genes, and pool2 genes without fusion to pool1 gene
# Higher demand of read support for genes with frequent FP gene fusions and house keeping genes
if (gene1 in artefact_gene_dict and gene2 in artefact_gene_dict[gene1]):
if int(Spanning_reads_unique) < artefact_gene_dict[gene1][gene2][1]:
continue
Expand All @@ -253,6 +281,19 @@
if gene2 in housekeeping_genes:
if int(Spanning_reads_unique) < housekeeping_genes[gene2][1]:
continue
# Min AF (1%) for frequent FP gene fusions and housekeeping gene
if (gene1 in artefact_gene_dict and gene2 in artefact_gene_dict[gene1]):
if int(Spanning_reads_unique) / artefact_gene_dict[gene1][gene2][3] < 0.01:
continue
if (gene2 in artefact_gene_dict and gene1 in artefact_gene_dict[gene2]):
if int(Spanning_reads_unique) / artefact_gene_dict[gene2][gene1][3] < 0.01:
continue
if gene1 in housekeeping_genes:
if int(Spanning_reads_unique) / housekeeping_genes[gene1][3] < 0.01:
continue
if gene2 in housekeeping_genes:
if int(Spanning_reads_unique) / housekeeping_genes[gene2][3] < 0.01:
continue
# Flag fusions annotated that are fusions with very high probability
fp_db = [
"banned", "bodymap2", "cacg", "1000genomes", "conjoing", "cortex", "distance1000bp", "ensembl_fully_overlapping",
Expand Down

0 comments on commit d46e228

Please sign in to comment.