From e87d8984dfe905530aa335cf78da6cf535d6a31c Mon Sep 17 00:00:00 2001 From: pfurio Date: Tue, 21 Apr 2020 10:01:35 +0100 Subject: [PATCH 1/2] models: improve DiseasePanel data model --- .../clinical/interpretation/Cancer.java | 107 ++++++++++++++++ .../interpretation/ClinicalProperty.java | 19 ++- .../clinical/interpretation/DiseasePanel.java | 121 ++++++++++-------- .../clinical/ClinicalVariantCreator.java | 46 +------ 4 files changed, 191 insertions(+), 102 deletions(-) create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Cancer.java diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Cancer.java b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Cancer.java new file mode 100644 index 000000000..f77d689af --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Cancer.java @@ -0,0 +1,107 @@ +package org.opencb.biodata.models.clinical.interpretation; + +import org.opencb.biodata.models.clinical.interpretation.ClinicalProperty.RoleInCancer; + +import java.util.List; + +public class Cancer { + + private boolean somatic; + private boolean germline; + private RoleInCancer role; + private List tissues; + private List somaticTumourTypes; + private List germlineTumourTypes; + private List fusionPartners; + + public Cancer() { + } + + public Cancer(boolean somatic, boolean germline, RoleInCancer role, List tissues, + List somaticTumourTypes, List germlineTumourTypes, List fusionPartners) { + this.somatic = somatic; + this.germline = germline; + this.role = role; + this.tissues = tissues; + this.somaticTumourTypes = somaticTumourTypes; + this.germlineTumourTypes = germlineTumourTypes; + this.fusionPartners = fusionPartners; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("Cancer{"); + sb.append("somatic=").append(somatic); + sb.append(", germline=").append(germline); + sb.append(", role=").append(role); + sb.append(", tissues=").append(tissues); + sb.append(", somaticTumourTypes=").append(somaticTumourTypes); + sb.append(", germlineTumourTypes=").append(germlineTumourTypes); + sb.append(", fusionPartners=").append(fusionPartners); + sb.append('}'); + return sb.toString(); + } + + public boolean isSomatic() { + return somatic; + } + + public Cancer setSomatic(boolean somatic) { + this.somatic = somatic; + return this; + } + + public boolean isGermline() { + return germline; + } + + public Cancer setGermline(boolean germline) { + this.germline = germline; + return this; + } + + public RoleInCancer getRole() { + return role; + } + + public Cancer setRole(RoleInCancer role) { + this.role = role; + return this; + } + + public List getTissues() { + return tissues; + } + + public Cancer setTissues(List tissues) { + this.tissues = tissues; + return this; + } + + public List getSomaticTumourTypes() { + return somaticTumourTypes; + } + + public Cancer setSomaticTumourTypes(List somaticTumourTypes) { + this.somaticTumourTypes = somaticTumourTypes; + return this; + } + + public List getGermlineTumourTypes() { + return germlineTumourTypes; + } + + public Cancer setGermlineTumourTypes(List germlineTumourTypes) { + this.germlineTumourTypes = germlineTumourTypes; + return this; + } + + public List getFusionPartners() { + return fusionPartners; + } + + public Cancer setFusionPartners(List fusionPartners) { + this.fusionPartners = fusionPartners; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/ClinicalProperty.java b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/ClinicalProperty.java index 7e3a4296c..ef2055560 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/ClinicalProperty.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/ClinicalProperty.java @@ -22,22 +22,22 @@ public class ClinicalProperty { public enum ModeOfInheritance { - MONOALLELIC, + AUTOSOMAL_DOMINANT, MONOALLELIC_NOT_IMPRINTED, MONOALLELIC_MATERNALLY_IMPRINTED, MONOALLELIC_PATERNALLY_IMPRINTED, - BIALLELIC, + AUTOSOMAL_RECESSIVE, MONOALLELIC_AND_BIALLELIC, MONOALLELIC_AND_MORE_SEVERE_BIALLELIC, - XLINKED_BIALLELIC, - XLINKED_MONOALLELIC, - YLINKED, + X_LINKED_DOMINANT, + X_LINKED_RECESSIVE, + Y_LINKED, MITOCHONDRIAL, // Not modes of inheritance, but... DE_NOVO, COMPOUND_HETEROZYGOUS, - + MENDELIAN_ERROR, UNKNOWN } @@ -46,6 +46,13 @@ public enum Penetrance { INCOMPLETE } + public enum Confidence { + HIGH, + MEDIUM, + LOW, + REJECTED + } + public enum RoleInCancer { ONCOGENE, TUMOR_SUPPRESSOR_GENE, diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/DiseasePanel.java b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/DiseasePanel.java index ea24693a2..1be33af55 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/DiseasePanel.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/DiseasePanel.java @@ -20,8 +20,9 @@ package org.opencb.biodata.models.clinical.interpretation; import com.fasterxml.jackson.databind.ObjectMapper; -import org.opencb.biodata.models.core.OntologyTermAnnotation; -import org.opencb.biodata.models.clinical.Phenotype; +import org.opencb.biodata.models.clinical.interpretation.ClinicalProperty.Confidence; +import org.opencb.biodata.models.clinical.interpretation.ClinicalProperty.ModeOfInheritance; +import org.opencb.biodata.models.core.OntologyTerm; import org.opencb.biodata.models.core.Xref; import java.io.IOException; @@ -37,7 +38,7 @@ public class DiseasePanel { private String name; private List categories; - private List phenotypes; + private List disorders; private List tags; private List variants; @@ -66,14 +67,14 @@ public DiseasePanel(String id, String name) { this.name = name; } - public DiseasePanel(String id, String name, List categories, List phenotypes, + public DiseasePanel(String id, String name, List categories, List disorders, List tags, List variants, List genes, List strs, List regions, Map stats, SourcePanel source, String creationDate, String modificationDate, String description, Map attributes) { this.id = id; this.name = name; this.categories = categories; - this.phenotypes = phenotypes; + this.disorders = disorders; this.tags = tags; this.variants = variants; this.genes = genes; @@ -224,11 +225,12 @@ public static class VariantPanel extends Common { public VariantPanel() { } - public VariantPanel(String id, List xrefs, String modeOfInheritance, Penetrance penetrance, - String confidence, List evidences, List publications, - List phenotypes, List coordinates, String reference, - String alternate) { - super(id, xrefs, modeOfInheritance, penetrance, confidence, evidences, publications, phenotypes, coordinates); + public VariantPanel(String id, List xrefs, ModeOfInheritance modeOfInheritance, Penetrance penetrance, + Confidence confidence, List evidences, List publications, + List phenotypes, List coordinates, Cancer cancer, + String reference, String alternate) { + super(id, xrefs, modeOfInheritance, penetrance, confidence, evidences, publications, phenotypes, + coordinates, cancer); this.reference = reference; this.alternate = alternate; } @@ -325,20 +327,21 @@ public static class Common { protected String id; protected List xrefs; - protected String modeOfInheritance; + protected ModeOfInheritance modeOfInheritance; protected Penetrance penetrance; - protected String confidence; + protected Confidence confidence; protected List evidences; protected List publications; - protected List phenotypes; + protected List phenotypes; protected List coordinates; + protected Cancer cancer; public Common() { } - public Common(String id, List xrefs, String modeOfInheritance, Penetrance penetrance, String confidence, - List evidences, List publications, List phenotypes, - List coordinates) { + public Common(String id, List xrefs, ModeOfInheritance modeOfInheritance, Penetrance penetrance, + Confidence confidence, List evidences, List publications, + List phenotypes, List coordinates, Cancer cancer) { this.id = id; this.xrefs = xrefs; this.modeOfInheritance = modeOfInheritance; @@ -348,21 +351,24 @@ public Common(String id, List xrefs, String modeOfInheritance, Penetrance this.publications = publications; this.phenotypes = phenotypes; this.coordinates = coordinates; + this.cancer = cancer; } @Override public String toString() { - return "Common{" + - "id='" + id + '\'' + - ", xrefs=" + xrefs + - ", modeOfInheritance='" + modeOfInheritance + '\'' + - ", penetrance=" + penetrance + - ", confidence='" + confidence + '\'' + - ", evidences=" + evidences + - ", publications=" + publications + - ", phenotypes=" + phenotypes + - ", coordinates=" + coordinates + - '}'; + final StringBuilder sb = new StringBuilder("Common{"); + sb.append("id='").append(id).append('\''); + sb.append(", xrefs=").append(xrefs); + sb.append(", modeOfInheritance=").append(modeOfInheritance); + sb.append(", penetrance=").append(penetrance); + sb.append(", confidence=").append(confidence); + sb.append(", evidences=").append(evidences); + sb.append(", publications=").append(publications); + sb.append(", phenotypes=").append(phenotypes); + sb.append(", coordinates=").append(coordinates); + sb.append(", cancer=").append(cancer); + sb.append('}'); + return sb.toString(); } public String getId() { @@ -374,11 +380,11 @@ public Common setId(String id) { return this; } - public String getModeOfInheritance() { + public ModeOfInheritance getModeOfInheritance() { return modeOfInheritance; } - public Common setModeOfInheritance(String modeOfInheritance) { + public Common setModeOfInheritance(ModeOfInheritance modeOfInheritance) { this.modeOfInheritance = modeOfInheritance; return this; } @@ -392,11 +398,11 @@ public Common setPenetrance(Penetrance penetrance) { return this; } - public String getConfidence() { + public Confidence getConfidence() { return confidence; } - public Common setConfidence(String confidence) { + public Common setConfidence(Confidence confidence) { this.confidence = confidence; return this; } @@ -419,11 +425,11 @@ public Common setPublications(List publications) { return this; } - public List getPhenotypes() { + public List getPhenotypes() { return phenotypes; } - public Common setPhenotypes(List phenotypes) { + public Common setPhenotypes(List phenotypes) { this.phenotypes = phenotypes; return this; } @@ -445,6 +451,15 @@ public Common setCoordinates(List coordinates) { this.coordinates = coordinates; return this; } + + public Cancer getCancer() { + return cancer; + } + + public Common setCancer(Cancer cancer) { + this.cancer = cancer; + return this; + } } public static class RegionPanel extends Common { @@ -458,13 +473,13 @@ public static class RegionPanel extends Common { public RegionPanel() { } - public RegionPanel(String name, List xrefs, String modeOfInheritance, Penetrance penetrance, - String confidence, List evidences, List publications, - List phenotypes, List coordinates, String description, - VariantType typeOfVariants, String haploinsufficiencyScore, String triplosensitivityScore, - int requiredOverlapPercentage) { + public RegionPanel(String name, List xrefs, ModeOfInheritance modeOfInheritance, Penetrance penetrance, + Confidence confidence, List evidences, List publications, + List phenotypes, List coordinates, Cancer cancer, + String description, VariantType typeOfVariants, String haploinsufficiencyScore, + String triplosensitivityScore, int requiredOverlapPercentage) { super(name, xrefs, modeOfInheritance, penetrance, confidence, evidences, publications, phenotypes, - coordinates); + coordinates, cancer); this.description = description; this.typeOfVariants = typeOfVariants; this.haploinsufficiencyScore = haploinsufficiencyScore; @@ -554,11 +569,12 @@ public static class STR extends Common { public STR() { } - public STR(String id, List xrefs, String modeOfInheritance, Penetrance penetrance, String confidence, - List evidences, List publications, List phenotypes, - List coordinates, String repeatedSequence, int normalRepeats, int pathogenicRepeats) { + public STR(String id, List xrefs, ModeOfInheritance modeOfInheritance, Penetrance penetrance, + Confidence confidence, List evidences, List publications, + List phenotypes, List coordinates, Cancer cancer, String repeatedSequence, + int normalRepeats, int pathogenicRepeats) { super(id, xrefs, modeOfInheritance, penetrance, confidence, evidences, publications, phenotypes, - coordinates); + coordinates, cancer); this.repeatedSequence = repeatedSequence; this.normalRepeats = normalRepeats; this.pathogenicRepeats = pathogenicRepeats; @@ -621,11 +637,12 @@ public static class GenePanel extends Common { public GenePanel() { } - public GenePanel(String id, String name, List xrefs, String modeOfInheritance, Penetrance penetrance, - String confidence, List evidences, List publications, - List phenotypes, List coordinates) { + public GenePanel(String id, String name, List xrefs, ModeOfInheritance modeOfInheritance, + Penetrance penetrance, Confidence confidence, List evidences, + List publications, List phenotypes, List coordinates, + Cancer cancer) { super(id, xrefs, modeOfInheritance, penetrance, confidence, evidences, publications, phenotypes, - coordinates); + coordinates, cancer); this.name = name; } @@ -661,7 +678,7 @@ public String toString() { sb.append("id='").append(id).append('\''); sb.append(", name='").append(name).append('\''); sb.append(", categories=").append(categories); - sb.append(", phenotypes=").append(phenotypes); + sb.append(", phenotypes=").append(disorders); sb.append(", tags=").append(tags); sb.append(", variants=").append(variants); sb.append(", genes=").append(genes); @@ -704,12 +721,12 @@ public DiseasePanel setCategories(List categories) { return this; } - public List getPhenotypes() { - return phenotypes; + public List getDisorders() { + return disorders; } - public DiseasePanel setPhenotypes(List phenotypes) { - this.phenotypes = phenotypes; + public DiseasePanel setDisorders(List disorders) { + this.disorders = disorders; return this; } diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/clinical/ClinicalVariantCreator.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/clinical/ClinicalVariantCreator.java index 3ed472064..e91b4950d 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/clinical/ClinicalVariantCreator.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/clinical/ClinicalVariantCreator.java @@ -184,12 +184,11 @@ protected Map> getGeneTo // Put gene IDs if (CollectionUtils.isNotEmpty(panel.getGenes())) { for (DiseasePanel.GenePanel panelGene : panel.getGenes()) { - if (StringUtils.isNotEmpty(panelGene.getId()) && StringUtils.isNotEmpty(panelGene.getModeOfInheritance())) { + if (StringUtils.isNotEmpty(panelGene.getId()) && panelGene.getModeOfInheritance() != null) { if (!idToPanelMoiMap.containsKey(panelGene.getId())) { idToPanelMoiMap.put(panelGene.getId(), new HashMap()); } - idToPanelMoiMap.get(panelGene.getId()).put(panel.getId(), - getMoiFromGenePanel(panelGene.getModeOfInheritance())); + idToPanelMoiMap.get(panelGene.getId()).put(panel.getId(), panelGene.getModeOfInheritance()); } } } @@ -352,47 +351,6 @@ protected List createClinicalVariantEvidences(String ti return clinicalVariantEvidences; } - private ClinicalProperty.ModeOfInheritance getMoiFromGenePanel(String inputMoi) { - if (org.apache.commons.lang3.StringUtils.isEmpty(inputMoi)) { - return ModeOfInheritance.UNKNOWN; - } - - String moi = inputMoi.toUpperCase(); - - if (moi.startsWith("BIALLELIC")) { - return ModeOfInheritance.BIALLELIC; - } - if (moi.startsWith("MONOALLELIC")) { - if (moi.contains("NOT")) { - return ModeOfInheritance.MONOALLELIC_NOT_IMPRINTED; - } else if (moi.contains("MATERNALLY")) { - return ModeOfInheritance.MONOALLELIC_MATERNALLY_IMPRINTED; - } else if (moi.contains("PATERNALLY")) { - return ModeOfInheritance.MONOALLELIC_PATERNALLY_IMPRINTED; - } else { - return ModeOfInheritance.MONOALLELIC; - } - } - if (moi.startsWith("BOTH")) { - if (moi.contains("SEVERE")) { - return ModeOfInheritance.MONOALLELIC_AND_MORE_SEVERE_BIALLELIC; - } else if (moi.contains("")) { - return ModeOfInheritance.MONOALLELIC_AND_BIALLELIC; - } - } - if (moi.startsWith("MITOCHONDRIAL")) { - return ModeOfInheritance.MITOCHONDRIAL; - } - if (moi.startsWith("X-LINKED")) { - if (moi.contains("BIALLELIC")) { - return ModeOfInheritance.XLINKED_BIALLELIC; - } else { - return ModeOfInheritance.XLINKED_MONOALLELIC; - } - } - return ModeOfInheritance.UNKNOWN; - } - public List groupCHVariants(Map> clinicalVariantMap) { List clinicalVariants = new ArrayList<>(); From 80ece47378c96c85332f1d3aff3e8c6d88ac48d0 Mon Sep 17 00:00:00 2001 From: pfurio Date: Mon, 27 Apr 2020 12:12:35 +0100 Subject: [PATCH 2/2] tools: add new DiseasePanel parsers, #183 --- .../tools/clinical/DiseasePanelParsers.java | 472 ++++++++++++++++++ 1 file changed, 472 insertions(+) create mode 100644 biodata-tools/src/main/java/org/opencb/biodata/tools/clinical/DiseasePanelParsers.java diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/clinical/DiseasePanelParsers.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/clinical/DiseasePanelParsers.java new file mode 100644 index 000000000..f3a8b5a65 --- /dev/null +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/clinical/DiseasePanelParsers.java @@ -0,0 +1,472 @@ +package org.opencb.biodata.tools.clinical; + +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.MapperFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.models.clinical.interpretation.Cancer; +import org.opencb.biodata.models.clinical.interpretation.ClinicalProperty; +import org.opencb.biodata.models.clinical.interpretation.DiseasePanel; +import org.opencb.biodata.models.core.OntologyTerm; +import org.opencb.biodata.models.core.Xref; +import org.opencb.commons.utils.FileUtils; +import org.opencb.commons.utils.ListUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.file.Path; +import java.util.*; + +public class DiseasePanelParsers { + + protected static Logger logger = LoggerFactory.getLogger(DiseasePanelParsers.class); + + public static DiseasePanel parseCensus(Path censusTsvFile) throws IOException { + Set myKeys = new HashSet<>(Arrays.asList("Gene Symbol", "Name", "Entrez GeneId", "Genome Location", + "Tier", "Hallmark", "Chr Band", "Somatic", "Germline", "Tumour Types(Somatic)", + "Tumour Types(Germline)", "Cancer Syndrome", "Tissue Type", "Molecular Genetics", "Role in Cancer", + "Mutation Types", "Translocation Partner", "Other Germline Mut", "Other Syndrome", "Synonyms")); + + try (BufferedReader bufferedReader = FileUtils.newBufferedReader(censusTsvFile)) { + Map keyPositionMap = new HashMap<>(); + String[] header = bufferedReader.readLine().split("\t"); + for (int i = 0; i < header.length; i++) { + String key = header[i]; + if (!myKeys.contains(key)) { + throw new IOException("Key '" + key + "' from census file not found in our whitelist"); + } + keyPositionMap.put(i, key); + } + + DiseasePanel panel = new DiseasePanel("gene-census", "gene-census", new LinkedList<>(), new LinkedList<>(), + new LinkedList<>(), new LinkedList<>(), new LinkedList<>(), new LinkedList<>(), new LinkedList<>(), + new HashMap<>(), new DiseasePanel.SourcePanel("", "", "", "", ""), "", "", "", new HashMap<>()); + String line; + while ((line = bufferedReader.readLine()) != null) { + String[] splittedLine = line.split("\t"); + + DiseasePanel.GenePanel genePanel = new DiseasePanel.GenePanel("", "", new LinkedList<>(), + ClinicalProperty.ModeOfInheritance.UNKNOWN, null, null, new LinkedList<>(), new LinkedList<>(), + new LinkedList<>(), new LinkedList<>(), new Cancer(false, false, null, new LinkedList<>(), + new LinkedList<>(), new LinkedList<>(), new LinkedList<>())); + for (int i = 0; i < splittedLine.length; i++) { + String value = StringUtils.strip(splittedLine[i], "\"'"); + + switch (keyPositionMap.get(i)) { + case "Gene Symbol": + genePanel.setId(value); + genePanel.setName(value); + break; + case "Name": + genePanel.getXrefs().add(new Xref(value, "Census", "Census", "Name")); + break; + case "Entrez GeneId": + genePanel.getXrefs().add(new Xref(value, "Census", "Census", "Entrez GeneId")); + break; + case "Genome Location": + genePanel.getCoordinates().add(new DiseasePanel.Coordinate("GRCh38", value, "Census")); + break; + case "Tier": + if ("1".equals(value)) { + genePanel.setConfidence(ClinicalProperty.Confidence.HIGH); + } else if ("2".equals(value)) { + genePanel.setConfidence(ClinicalProperty.Confidence.MEDIUM); + } else { + genePanel.setConfidence(ClinicalProperty.Confidence.LOW); + } + break; + case "Hallmark": + break; + case "Chr Band": + break; + case "Somatic": + if ("yes".equals(value)) { + genePanel.getCancer().setSomatic(true); + } + break; + case "Germline": + if ("yes".equals(value)) { + genePanel.getCancer().setGermline(true); + } + break; + case "Tumour Types(Somatic)": + if (StringUtils.isNotEmpty(value)) { + List tumourTypes = Arrays.asList(value.split(", ")); + genePanel.getCancer().setSomaticTumourTypes(tumourTypes); + } + break; + case "Tumour Types(Germline)": + if (StringUtils.isNotEmpty(value)) { + List tumourTypes = Arrays.asList(value.split(", ")); + genePanel.getCancer().setGermlineTumourTypes(tumourTypes); + } + break; + case "Cancer Syndrome": + if (StringUtils.isNotEmpty(value)) { + genePanel.getPhenotypes().add(new OntologyTerm(value, value, "Census", "", "", "", + Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList())); + } + break; + case "Tissue Type": + if (StringUtils.isNotEmpty(value)) { + List tissues = Arrays.asList(value.split(", ")); + genePanel.getCancer().setTissues(tissues); + } + break; + case "Molecular Genetics": + if (StringUtils.isNotEmpty(value)) { + if ("Dom".equals(value)) { + genePanel.setModeOfInheritance(ClinicalProperty.ModeOfInheritance.AUTOSOMAL_DOMINANT); + } else if ("Rec".equals(value)) { + genePanel.setModeOfInheritance(ClinicalProperty.ModeOfInheritance.AUTOSOMAL_RECESSIVE); + } else if ("Dom/Rec".equals(value)) { + genePanel.setModeOfInheritance(ClinicalProperty.ModeOfInheritance.AUTOSOMAL_DOMINANT); + } else if ("Rec/X".equals(value)) { + genePanel.setModeOfInheritance(ClinicalProperty.ModeOfInheritance.X_LINKED_RECESSIVE); + } else { + System.out.println("Unknown moi '" + value + "'"); + } + } + break; + case "Role in Cancer": + if (StringUtils.isNotEmpty(value)) { + String[] roles = value.split(", "); + ClinicalProperty.RoleInCancer roleInCancer = null; + for (String role : roles) { + ClinicalProperty.RoleInCancer tmpRole = null; + if ("TSG".equals(role)) { + tmpRole = ClinicalProperty.RoleInCancer.TUMOR_SUPPRESSOR_GENE; + } else if ("oncogene".equals(role)) { + tmpRole = ClinicalProperty.RoleInCancer.ONCOGENE; + } + if (tmpRole != null && roleInCancer == null) { + roleInCancer = tmpRole; + } else if (tmpRole != null) { + if (tmpRole != roleInCancer) { + roleInCancer = ClinicalProperty.RoleInCancer.BOTH; + } else { + System.out.println("Found repeated roles?"); + } + } + } + if (roleInCancer != null) { + genePanel.getCancer().setRole(roleInCancer); + } + } + break; + case "Mutation Types": + break; + case "Translocation Partner": + if (StringUtils.isNotEmpty(value) && !"?".equals(value)) { + List partners = Arrays.asList(value.split(", ")); + genePanel.getCancer().setFusionPartners(partners); + } + break; + case "Other Germline Mut": + break; + case "Other Syndrome": + break; + case "Synonyms": + String[] synonyms = value.split(","); + for (String synonym : synonyms) { + genePanel.getXrefs().add(new Xref(synonym, "Census", "Census", "Synonyms")); + } + break; + default: + break; + } + + } + panel.getGenes().add(genePanel); + } + return panel; + } + } + + public static DiseasePanel parsePanelApp(Path panelAppJsonFile) throws IOException { + ObjectMapper objectMapper = new ObjectMapper(); + objectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true); + objectMapper.configure(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES, false); + objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + objectMapper.configure(DeserializationFeature.FAIL_ON_NULL_FOR_PRIMITIVES, false); + objectMapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true); + + Map panelInfo = objectMapper.readValue(panelAppJsonFile.toFile(), Map.class); + + List categories = new ArrayList<>(2); + categories.add(new DiseasePanel.PanelCategory(String.valueOf(panelInfo.get("disease_group")), 1)); + categories.add(new DiseasePanel.PanelCategory(String.valueOf(panelInfo.get("disease_sub_group")), 2)); + + List disorders = new ArrayList<>(); + for (String relevantDisorder : (List) panelInfo.get("relevant_disorders")) { + if (StringUtils.isNotEmpty(relevantDisorder)) { + disorders.add(new OntologyTerm(relevantDisorder, relevantDisorder, "", "", "", "", + Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), + Collections.emptyList())); + } + } + + List genes = new ArrayList<>(); + for (Map gene : (List) panelInfo.get("genes")) { + DiseasePanel.GenePanel genePanel = new DiseasePanel.GenePanel(); + + extractCommonInformationFromPanelApp(gene, genePanel); + + List coordinates = new ArrayList<>(); + + Map geneData = (Map) gene.get("gene_data"); + Map ensemblGenes = (Map) geneData.get("ensembl_genes"); + // Read coordinates + for (String assembly : ensemblGenes.keySet()) { + Map assemblyObject = (Map) ensemblGenes.get(assembly); + for (String version : assemblyObject.keySet()) { + Map coordinateObject = (Map) assemblyObject.get(version); + String correctAssembly = "GRch37".equals(assembly) ? "GRCh37" : "GRCh38"; + coordinates.add(new DiseasePanel.Coordinate(correctAssembly, String.valueOf(coordinateObject.get("location")), + "Ensembl v" + version)); + } + } + + genePanel.setName(String.valueOf(geneData.get("hgnc_symbol"))); + genePanel.setCoordinates(coordinates); + + genes.add(genePanel); + } + + List regions = new ArrayList<>(); + for (Map panelAppRegion : (List) panelInfo.get("regions")) { + DiseasePanel.RegionPanel region = new DiseasePanel.RegionPanel(); + + extractCommonInformationFromPanelApp(panelAppRegion, region); + + List coordinateList = null; + if (ListUtils.isNotEmpty((Collection) panelAppRegion.get("grch38_coordinates"))) { + coordinateList = (List) panelAppRegion.get("grch38_coordinates"); + } else if (ListUtils.isNotEmpty((Collection) panelAppRegion.get("grch37_coordinates"))) { + coordinateList = (List) panelAppRegion.get("grch37_coordinates"); + } + + String id; + if (panelAppRegion.get("entity_name") != null + && StringUtils.isNotEmpty(String.valueOf(panelAppRegion.get("entity_name")))) { + id = String.valueOf(panelAppRegion.get("entity_name")); + } else { + id = (String) panelAppRegion.get("chromosome"); + if (coordinateList != null && coordinateList.size() == 2) { + id = id + ":" + coordinateList.get(0) + "-" + coordinateList.get(1); + } else { + logger.warn("Could not read region coordinates"); + } + } + + DiseasePanel.VariantType variantType = null; + String typeOfVariant = String.valueOf(panelAppRegion.get("type_of_variants")); + if ("cnv_loss".equals(typeOfVariant)) { + variantType = DiseasePanel.VariantType.LOSS; + } else if ("cnv_gain".equals(typeOfVariant)) { + variantType = DiseasePanel.VariantType.GAIN; + } else { + System.out.println(typeOfVariant); + } + + region.setId(id); + region.setDescription(String.valueOf(panelAppRegion.get("verbose_name"))); + region.setHaploinsufficiencyScore(String.valueOf(panelAppRegion.get("haploinsufficiency_score"))); + region.setTriplosensitivityScore(String.valueOf(panelAppRegion.get("triplosensitivity_score"))); + region.setRequiredOverlapPercentage((int) panelAppRegion.get("required_overlap_percentage")); + region.setTypeOfVariants(variantType); + + regions.add(region); + } + + List strs = new ArrayList<>(); + for (Map panelAppSTR : (List) panelInfo.get("strs")) { + DiseasePanel.STR str = new DiseasePanel.STR(); + + extractCommonInformationFromPanelApp(panelAppSTR, str); + + str.setRepeatedSequence(String.valueOf(panelAppSTR.get("repeated_sequence"))); + str.setNormalRepeats((int) panelAppSTR.get("normal_repeats")); + str.setPathogenicRepeats((int) panelAppSTR.get("pathogenic_repeats")); + + strs.add(str); + } + + Map attributes = new HashMap<>(); +// attributes.put("PanelAppInfo", panel); + + DiseasePanel diseasePanel = new DiseasePanel(); + diseasePanel.setId(String.valueOf(panelInfo.get("name")) + .replace(" - ", "-") + .replace("/", "-") + .replace(" (", "-") + .replace("(", "-") + .replace(") ", "-") + .replace(")", "") + .replace(" & ", "_and_") + .replace(", ", "-") + .replace(" ", "_") + "-PanelAppId-" + panelInfo.get("id")); + diseasePanel.setName(String.valueOf(panelInfo.get("name"))); + diseasePanel.setCategories(categories); + diseasePanel.setDisorders(disorders); + diseasePanel.setGenes(genes); + diseasePanel.setStrs(strs); + diseasePanel.setRegions(regions); + diseasePanel.setSource(new DiseasePanel.SourcePanel() + .setId(String.valueOf(panelInfo.get("id"))) + .setName(String.valueOf(panelInfo.get("name"))) + .setVersion(String.valueOf(panelInfo.get("version"))) + .setProject("PanelApp (GEL)") + ); + diseasePanel.setDescription(panelInfo.get("disease_sub_group") + + " (" + panelInfo.get("disease_group") + ")"); + diseasePanel.setAttributes(attributes); + + if ("Cancer Programme".equals(String.valueOf(panelInfo.get("disease_group")))) { + diseasePanel.setTags(Collections.singletonList("cancer")); + } + + return diseasePanel; + } + + private static void extractCommonInformationFromPanelApp(Map panelAppCommonMap, T common) { + String ensemblGeneId = ""; + List xrefs = new ArrayList<>(); + List publications = new ArrayList<>(); + List phenotypes = new ArrayList<>(); + List coordinates = new ArrayList<>(); + + Map geneData = (Map) panelAppCommonMap.get("gene_data"); + if (geneData != null) { + Map ensemblGenes = (Map) geneData.get("ensembl_genes"); + + if (ensemblGenes.containsKey("GRch37")) { + ensemblGeneId = String.valueOf(((Map) ((Map) ensemblGenes.get("GRch37")).get("82")).get("ensembl_id")); + } else if (ensemblGenes.containsKey("GRch38")) { + ensemblGeneId = String.valueOf(((Map) ((Map) ensemblGenes.get("GRch38")).get("90")).get("ensembl_id")); + } + + // read OMIM ID + if (geneData.containsKey("omim_gene") && geneData.get("omim_gene") != null) { + for (String omim : (List) geneData.get("omim_gene")) { + xrefs.add(new Xref(omim, "OMIM", "OMIM")); + } + } + xrefs.add(new Xref(String.valueOf(geneData.get("gene_name")), "GeneName", "GeneName")); + } + + // Add coordinates + String chromosome = String.valueOf(panelAppCommonMap.get("chromosome")); + if (ListUtils.isNotEmpty((Collection) panelAppCommonMap.get("grch38_coordinates"))) { + List auxCoordinates = (List) panelAppCommonMap.get("grch38_coordinates"); + coordinates.add(new DiseasePanel.Coordinate("GRCh38", chromosome + ":" + auxCoordinates.get(0) + "-" + auxCoordinates.get(1), + "Ensembl")); + } + if (ListUtils.isNotEmpty((Collection) panelAppCommonMap.get("grch37_coordinates"))) { + List auxCoordinates = (List) panelAppCommonMap.get("grch37_coordinates"); + coordinates.add(new DiseasePanel.Coordinate("GRCh37", chromosome + ":" + auxCoordinates.get(0) + "-" + auxCoordinates.get(1), + "Ensembl")); + } + + + // read publications + if (panelAppCommonMap.containsKey("publications")) { + publications = (List) panelAppCommonMap.get("publications"); + } + + // Read phenotypes + if (panelAppCommonMap.containsKey("phenotypes") && !((List) panelAppCommonMap.get("phenotypes")).isEmpty()) { + for (String phenotype : ((List) panelAppCommonMap.get("phenotypes"))) { + String id = phenotype; + String source = ""; + if (phenotype.length() >= 6) { + String substring = phenotype.substring(phenotype.length() - 6); + try { + Integer.parseInt(substring); + // If the previous call doesn't raise any exception, we are reading an OMIM id. + id = substring; + source = "OMIM"; + } catch (NumberFormatException e) { + id = phenotype; + } + } + + phenotypes.add(new OntologyTerm(id, phenotype, source, "", "", "", Collections.emptyList(), Collections.emptyList(), + Collections.emptyList(), Collections.emptyList())); + } + } + + // Read penetrance + String panelAppPenetrance = String.valueOf(panelAppCommonMap.get("penetrance")); + ClinicalProperty.Penetrance penetrance = null; + if (StringUtils.isNotEmpty(panelAppPenetrance)) { + try { + penetrance = ClinicalProperty.Penetrance.valueOf(panelAppPenetrance.toUpperCase()); + } catch (IllegalArgumentException e) { + logger.warn("Could not parse penetrance. Value found: " + panelAppPenetrance); + } + } + + common.setId(ensemblGeneId); + common.setXrefs(xrefs); + common.setModeOfInheritance(getMoiFromGenePanel(String.valueOf(panelAppCommonMap.get("mode_of_inheritance")))); + common.setPenetrance(penetrance); + ClinicalProperty.Confidence confidence = ClinicalProperty.Confidence.LOW; + int confidenceLevel = Integer.valueOf(String.valueOf(panelAppCommonMap.get("confidence_level"))); + if (confidenceLevel == 2) { + confidence = ClinicalProperty.Confidence.MEDIUM; + } else if (confidenceLevel == 3) { + confidence = ClinicalProperty.Confidence.HIGH; + } + common.setConfidence(confidence); + common.setEvidences((List) panelAppCommonMap.get("evidence")); + common.setPublications(publications); + common.setPhenotypes(phenotypes); + common.setCoordinates(coordinates); + } + + private static ClinicalProperty.ModeOfInheritance getMoiFromGenePanel(String inputMoi) { + if (org.apache.commons.lang3.StringUtils.isEmpty(inputMoi)) { + return ClinicalProperty.ModeOfInheritance.UNKNOWN; + } + + String moi = inputMoi.toUpperCase(); + + if (moi.startsWith("BIALLELIC")) { + return ClinicalProperty.ModeOfInheritance.AUTOSOMAL_RECESSIVE; + } + if (moi.startsWith("MONOALLELIC")) { + if (moi.contains("NOT")) { + return ClinicalProperty.ModeOfInheritance.MONOALLELIC_NOT_IMPRINTED; + } else if (moi.contains("MATERNALLY")) { + return ClinicalProperty.ModeOfInheritance.MONOALLELIC_MATERNALLY_IMPRINTED; + } else if (moi.contains("PATERNALLY")) { + return ClinicalProperty.ModeOfInheritance.MONOALLELIC_PATERNALLY_IMPRINTED; + } else { + return ClinicalProperty.ModeOfInheritance.AUTOSOMAL_DOMINANT; + } + } + if (moi.startsWith("BOTH")) { + if (moi.contains("SEVERE")) { + return ClinicalProperty.ModeOfInheritance.MONOALLELIC_AND_MORE_SEVERE_BIALLELIC; + } else if (moi.contains("")) { + return ClinicalProperty.ModeOfInheritance.MONOALLELIC_AND_BIALLELIC; + } + } + if (moi.startsWith("MITOCHONDRIAL")) { + return ClinicalProperty.ModeOfInheritance.MITOCHONDRIAL; + } + if (moi.startsWith("X-LINKED")) { + if (moi.contains("BIALLELIC")) { + return ClinicalProperty.ModeOfInheritance.X_LINKED_RECESSIVE; + } else { + return ClinicalProperty.ModeOfInheritance.X_LINKED_RECESSIVE; + } + } + return ClinicalProperty.ModeOfInheritance.UNKNOWN; + } + +}