From 73f82be1324b0ec52e1fd9dce70a1c51d2682541 Mon Sep 17 00:00:00 2001 From: julie-sullivan Date: Fri, 9 Jul 2021 09:25:58 +0100 Subject: [PATCH 01/27] when checkAminoAcidChange is TRUE, merge all clinvar accessions, not just the accession from the original variant --- .../VariantAnnotationCalculator.java | 99 ++----------------- 1 file changed, 8 insertions(+), 91 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java index c49fb64a84..81403c060e 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java @@ -1695,103 +1695,20 @@ public void processResults(Future>> clinicalFut for (int i = 0; i < variantAnnotationList.size(); i++) { CellBaseDataResult clinicalCellBaseDataResult = clinicalCellBaseDataResults.get(i); if (clinicalCellBaseDataResult.getResults() != null && clinicalCellBaseDataResult.getResults().size() > 0) { - variantAnnotationList.get(i) - .setTraitAssociation(clinicalCellBaseDataResult.getResults().get(0).getAnnotation() - .getTraitAssociation()); -// // DEPRECATED -// // TODO: remove in 4.6 -// variantAnnotationList.get(i) -// .setVariantTraitAssociation(convertToVariantTraitAssociation(clinicalCellBaseDataResult -// .getResults() -// .get(0) -// .getAnnotation() -// .getTraitAssociation())); + variantAnnotationList.get(i).setTraitAssociation(getAllTraitAssociations(clinicalCellBaseDataResult)); } } } } -// private VariantTraitAssociation convertToVariantTraitAssociation(List traitAssociation) { -// List clinvarList = new ArrayList<>(); -// List cosmicList = new ArrayList<>(traitAssociation.size()); -// for (EvidenceEntry evidenceEntry : traitAssociation) { -// switch (evidenceEntry.getSource().getName()) { -// case CLINVAR: -// clinvarList.add(parseClinvar(evidenceEntry)); -// break; -// case COSMIC: -// cosmicList.add(parseCosmic(evidenceEntry)); -// break; -// default: -// break; -// } -// } -// return new VariantTraitAssociation(clinvarList, null, cosmicList); -// } - -// private Cosmic parseCosmic(EvidenceEntry evidenceEntry) { -// String primarySite = null; -// String siteSubtype = null; -// String primaryHistology = null; -// String histologySubtype = null; -// String sampleSource = null; -// String tumourOrigin = null; -// if (evidenceEntry.getSomaticInformation() != null) { -// primarySite = evidenceEntry.getSomaticInformation().getPrimarySite(); -// siteSubtype = evidenceEntry.getSomaticInformation().getSiteSubtype(); -// primaryHistology = evidenceEntry.getSomaticInformation().getPrimaryHistology(); -// histologySubtype = evidenceEntry.getSomaticInformation().getHistologySubtype(); -// sampleSource = evidenceEntry.getSomaticInformation().getSampleSource(); -// tumourOrigin = evidenceEntry.getSomaticInformation().getTumourOrigin(); -// } -// return new Cosmic(evidenceEntry.getId(), primarySite, siteSubtype, primaryHistology, histologySubtype, -// sampleSource, tumourOrigin, parseGeneName(evidenceEntry), -// getAdditionalProperty(evidenceEntry, MUTATION_SOMATIC_STATUS_IN_SOURCE_FILE)); -// } - -// private String parseGeneName(EvidenceEntry evidenceEntry) { -// if (evidenceEntry.getGenomicFeatures() != null && !evidenceEntry.getGenomicFeatures().isEmpty() -// && evidenceEntry.getGenomicFeatures().get(0).getXrefs() != null) { -// // There may be more than one genomic feature for cosmic evidence entries. However, the actual gene symbol -// // is expected to be found at index 0. -// return evidenceEntry.getGenomicFeatures().get(0).getXrefs().get(SYMBOL); -// } -// return null; -// } -// private ClinVar parseClinvar(EvidenceEntry evidenceEntry) { -// String clinicalSignificance = getAdditionalProperty(evidenceEntry, CLINICAL_SIGNIFICANCE_IN_SOURCE_FILE); -// List traitList = null; -// if (evidenceEntry.getHeritableTraits() != null) { -// traitList = evidenceEntry -// .getHeritableTraits() -// .stream() -// .map((heritableTrait) -> heritableTrait.getTrait()) -// .collect(Collectors.toList()); -// } -// List geneNameList = null; -// if (evidenceEntry.getGenomicFeatures() != null) { -// geneNameList = evidenceEntry -// .getGenomicFeatures() -// .stream() -// .map((genomicFeature) -> genomicFeature.getXrefs().get(SYMBOL)) -// .collect(Collectors.toList()); -// } -// String reviewStatus = getAdditionalProperty(evidenceEntry, REVIEW_STATUS_IN_SOURCE_FILE); -// return new ClinVar(evidenceEntry.getId(), clinicalSignificance, traitList, geneNameList, -// reviewStatus); -// } - -// private String getAdditionalProperty(EvidenceEntry evidenceEntry, String name) { -// if (evidenceEntry.getAdditionalProperties() != null) { -// for (Property property : evidenceEntry.getAdditionalProperties()) { -// if (name.equals(property.getName())) { -// return property.getValue(); -// } -// } -// } -// return null; -// } + private List getAllTraitAssociations(CellBaseDataResult clinicalQueryResult) { + List traitAssociations = new ArrayList<>(); + for (Variant variant: clinicalQueryResult.getResults()) { + traitAssociations.addAll(variant.getAnnotation().getTraitAssociation()); + } + return traitAssociations; + } } class FutureRepeatsAnnotator implements Callable>> { From 6e7909318c348899f702333ea2e81158aca7a9d0 Mon Sep 17 00:00:00 2001 From: julie-sullivan Date: Fri, 9 Jul 2021 09:29:57 +0100 Subject: [PATCH 02/27] hgvs-fixes: no HGVS is calculated for symbolic variants. Transcript HGVS no longer fails for long insertions. No HGVSp is calculated for insertions falling on an incomplete first codon --- .../lib/variant/hgvs/HgvsCalculator.java | 38 ++++++++----------- .../variant/hgvs/HgvsProteinCalculator.java | 17 ++++++++- .../hgvs/HgvsTranscriptCalculator.java | 24 +++++++----- 3 files changed, 46 insertions(+), 33 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsCalculator.java index 38c1233cdd..0b4ff01814 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsCalculator.java @@ -51,6 +51,7 @@ public class HgvsCalculator { protected static final int NEIGHBOURING_SEQUENCE_SIZE = 100; protected GenomeManager genomeManager; protected BuildingComponents buildingComponents; + private static final String VARIANT_STRING_PATTERN = "[ACGT]*"; public HgvsCalculator(GenomeManager genomeManager) { this.genomeManager = genomeManager; @@ -121,29 +122,20 @@ protected List run(Variant variant, Transcript transcript, String geneId return hgvsStrings; } -// private HgvsCalculator getHgvsCalculator(Variant normalizedVariant) { -//// switch (VariantAnnotationUtils.getVariantType(normalizedVariant)) { -// switch (normalizedVariant.getType()) { -// case SNV: -// return new HgvsSNVCalculator(genomeManager); -// case INSERTION: -// case DELETION: -// case INDEL: -// if (StringUtils.isBlank(normalizedVariant.getReference())) { -// return new HgvsInsertionCalculator(genomeManager); -// } else if (StringUtils.isBlank(normalizedVariant.getAlternate())) { -// return new HgvsDeletionCalculator(genomeManager); -// } else { -// logger.debug("No HGVS implementation available for variant MNV. Returning empty list of HGVS " -// + "identifiers."); -// return null; -// } -// default: -// logger.debug("No HGVS implementation available for structural variants. Found {}. Returning empty list" -// + " of HGVS identifiers.", normalizedVariant.getType()); -// return null; -// } -// } + /** + * Checks whether a variant is valid. + * + * @param variant Variant object to be checked. + * @return true/false depending on whether 'variant' does contain valid values. Currently just a simple check of + * reference/alternate attributes being strings of [A,C,G,T] of length >= 0 is performed to detect cases such as + * 19:13318673:(CAG)4:(CAG)5 which are not currently supported by CellBase. Ref and alt alleles must be different + * as well for the variant to be valid. Functionality of the method may be improved in the future. + */ + protected static boolean isValid(Variant variant) { + return (variant.getReference().matches(VARIANT_STRING_PATTERN) + && variant.getAlternate().matches(VARIANT_STRING_PATTERN) + && !variant.getAlternate().equals(variant.getReference())); + } protected Variant normalize(Variant variant, boolean normalize) { Variant normalizedVariant; diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java index 1f3e35d034..c5c0b27af7 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java @@ -55,6 +55,7 @@ public class HgvsProteinCalculator { protected BuildingComponents buildingComponents = null; public static final int MAX_NUMBER_AMINOACIDS_DISPLAYED = 20; + private static final Integer MAXIMUM_HGVS_DELETION_LENGTH = 1000; /** * Constructor. @@ -74,6 +75,10 @@ public HgvsProteinCalculator(Variant variant, Transcript transcript) { * @return HGVSp string for variant and transcript */ public HgvsProtein calculate() { + if (!HgvsCalculator.isValid(this.variant)) { + return null; + } + // FIXME restore !onlySpansCodingSequence(variant, transcript) check if (!transcriptUtils.isCoding() || StringUtils.isEmpty(transcript.getProteinSequence())) { return null; @@ -91,7 +96,12 @@ public HgvsProtein calculate() { } else { // deletion if (StringUtils.isBlank(variant.getAlternate())) { - return calculateDeletionHgvs(); + // Only for deletions shorter than a threshold + if (variant.getLength() < MAXIMUM_HGVS_DELETION_LENGTH) { + return calculateDeletionHgvs(); + } else { + return null; + } } else { logger.debug("No HGVS implementation available for variant MNV. Returning empty list of HGVS identifiers."); return null; @@ -253,6 +263,11 @@ private HgvsProtein calculateInsertionHgvs() { int codonPosition = transcriptUtils.getCodonPosition(cdsVariantStartPosition); int positionAtCodon = transcriptUtils.getPositionAtCodon(cdsVariantStartPosition); + // No prediction to be made if the variant falls on the first codon and this codon is incomplete + if (positionAtCodon == 0) { + return null; + } + // Check if this is an in an insertion, duplication or frameshift. // Alternate length for Insertions and Duplications must be multiple of 3, otherwise it is a frameshift. if (variant.getAlternate().length() % 3 == 0) { diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsTranscriptCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsTranscriptCalculator.java index 9da1d14c7a..a45e16ffc3 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsTranscriptCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsTranscriptCalculator.java @@ -48,7 +48,7 @@ public class HgvsTranscriptCalculator { private static final String INS = "ins"; private static final String DEL = "del"; private static final String DUP = "dup"; - private static final int NEIGHBOURING_SEQUENCE_SIZE = 100; + private static final int MINIMUM_NEIGHBOURING_SEQUENCE_SIZE = 100; private final GenomeManager genomeManager; /** @@ -72,6 +72,11 @@ public HgvsTranscriptCalculator(GenomeManager genomeManager, Variant variant, Tr * @return HGVS string for this transcript */ public String calculate() { + // Check reference and alternate alleles do not contain unexpected characters + if (!HgvsCalculator.isValid(this.variant)) { + return null; + } + switch (variant.getType()) { case SNV: return calculateSNVHgvsString(); @@ -425,8 +430,9 @@ private void setRangeCoordsAndAlleles(int genomicStart, int genomicEnd, String g private BuildingComponents.MutationType genomicInsertionHgvsNormalize(Variant variant, Transcript transcript, Variant normalizedVariant) { // Get genomic sequence around the lesion. - int start = Math.max(variant.getStart() - NEIGHBOURING_SEQUENCE_SIZE, 1); // TODO: might need to adjust +-1 nt - int end = variant.getStart() + NEIGHBOURING_SEQUENCE_SIZE + variant.getAlternate().length(); // TODO: might need to adjust +-1 nt + int neighbouringSequenceSize = Math.max(MINIMUM_NEIGHBOURING_SEQUENCE_SIZE, variant.getAlternate().length()); + int start = Math.max(variant.getStart() - neighbouringSequenceSize, 1); // TODO: might need to adjust +-1 nt + int end = variant.getStart() + neighbouringSequenceSize + variant.getAlternate().length(); // TODO: might need to adjust +-1 nt Query query = new Query("region", variant.getChromosome() + ":" + start + "-" + end); String genomicSequence = genomeManager.getGenomicSequence(query, new QueryOptions()).getResults().get(0).getSequence(); @@ -450,13 +456,13 @@ private BuildingComponents.MutationType genomicInsertionHgvsNormalize(Variant va // Check duplication String previousSequence = genomicSequence.substring(Math.max(0, - NEIGHBOURING_SEQUENCE_SIZE - variant.getAlternate().length() // TODO: might need to adjust +-1 nt + neighbouringSequenceSize - variant.getAlternate().length() // TODO: might need to adjust +-1 nt + (normalizedVariant.getStart() - variant.getStart())), // Needs to sum the difference with the // normalized one in order to take into // account potential // normalization/lef-right alignment // differences - NEIGHBOURING_SEQUENCE_SIZE + (normalizedVariant.getStart() - variant.getStart())); // Needs to sum the difference with the + neighbouringSequenceSize + (normalizedVariant.getStart() - variant.getStart())); // Needs to sum the difference with the // normalized one in order to take into // account potential // normalization/lef-right alignment @@ -464,13 +470,13 @@ private BuildingComponents.MutationType genomicInsertionHgvsNormalize(Variant va if (previousSequence.equals(normalizedVariant.getAlternate())) { return BuildingComponents.MutationType.DUPLICATION; } else { - String nextSequence = genomicSequence.substring(NEIGHBOURING_SEQUENCE_SIZE // TODO: might need to adjust +-1 nt + String nextSequence = genomicSequence.substring(neighbouringSequenceSize // TODO: might need to adjust +-1 nt + (normalizedVariant.getStart() - variant.getStart()), // Needs to sum the difference with the // normalized one in order to take into // account potential // normalization/lef-right alignment // differences - NEIGHBOURING_SEQUENCE_SIZE + variant.getAlternate().length() + neighbouringSequenceSize + variant.getAlternate().length() + (normalizedVariant.getStart() - variant.getStart())); // Needs to sum the difference with the // normalized one in order to take into // account potential @@ -485,8 +491,8 @@ private BuildingComponents.MutationType genomicInsertionHgvsNormalize(Variant va private String transcriptDeletionHgvsNormalize(Variant variant, Transcript transcript, Variant normalizedVariant) { // Get genomic sequence around the lesion. - int start = Math.max(variant.getStart() - NEIGHBOURING_SEQUENCE_SIZE, 1); // TODO: might need to adjust +-1 nt - int end = variant.getStart() + NEIGHBOURING_SEQUENCE_SIZE; // TODO: might need to adjust +-1 nt + int start = Math.max(variant.getStart() - MINIMUM_NEIGHBOURING_SEQUENCE_SIZE, 1); // TODO: might need to adjust +-1 nt + int end = variant.getStart() + MINIMUM_NEIGHBOURING_SEQUENCE_SIZE; // TODO: might need to adjust +-1 nt Query query = new Query("region", variant.getChromosome() + ":" + start + "-" + end); String genomicSequence From 9bbfaaf3597c9d43e8758ef47e21d1dc1f183fbe Mon Sep 17 00:00:00 2001 From: julie-sullivan Date: Fri, 9 Jul 2021 09:39:34 +0100 Subject: [PATCH 03/27] HGVS - Update to skip first codon if it's a frameshift, and the AA is M but the first codon is not ATG --- .../variant/hgvs/HgvsProteinCalculator.java | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java index c5c0b27af7..5f8bc67fe3 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java @@ -872,21 +872,42 @@ private HgvsProtein calculateFrameshiftHgvs() { } String hgvsString; - int phaseOffset = 0; int currentAaIndex = 0; + StringBuilder alternateProteinSeq = new StringBuilder(); + String alternateCdnaSeq = transcriptUtils.getAlternateCdnaSequence(variant); + if (alternateCdnaSeq == null) { + return null; + } + int codonIndex = transcript.getCdnaCodingStart() + phaseOffset - 1; if (transcriptUtils.hasUnconfirmedStart()) { phaseOffset = transcriptUtils.getFirstCodonPhase(); + codonIndex += phaseOffset; + // if reference protein sequence start with X, prepend X to our new alternate sequence also if (transcript.getProteinSequence().startsWith(HgvsCalculator.UNKNOWN_AMINOACID)) { alternateProteinSeq.append("X"); currentAaIndex++; } + } else if (transcript.getProteinSequence().startsWith("M") && !"ATG".equals(alternateCdnaSeq.substring(transcript.getCdnaCodingStart(), 3))) { + + /* + First codon is NOT ATG but protein sequence starts with M. This is due to Ensembl curation. From Ensembl: + "We have some information about non-ATG start codons in our blog post from release 102: + https://www.ensembl.info/2020/11/30/ensembl-102-has-been-released/ + Quite simply, there is not a rule. This is a situation of exceptional biology which we are only able to annotate correctly + because of our expert manual gene annotators analysing the data in detail." + Only relevant for frameshifts, and transcripts with confirmed starts. + */ + // fast forward past first + alternateProteinSeq.append("M"); + currentAaIndex++; + codonIndex += 3; } - int codonIndex = transcript.getCdnaCodingStart() + phaseOffset - 1; + int firstDiffIndex = -1; String firstReferencedAa = ""; String firstAlternateAa = ""; From 502c91814e0a8a13ba4a6d60ba2ae81fd388d5cd Mon Sep 17 00:00:00 2001 From: julie-sullivan Date: Fri, 9 Jul 2021 09:47:45 +0100 Subject: [PATCH 04/27] fix clinvar normalisation issue, clinvar has the incorrect locations for normalised deletion variants --- .../cellbase/lib/builders/clinical/variant/ClinVarIndexer.java | 3 +++ .../cellbase/lib/variant/hgvs/HgvsProteinCalculator.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer.java index 5525b7d4ec..217d4fe866 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer.java @@ -837,6 +837,9 @@ private SequenceLocation parseSequenceLocation(String[] parts) { if (emptySequence(reference) && !emptySequence(alternate) && end == (start + 1)) { // NOTE! swapped start and end return new SequenceLocation(chromosome, end, start, reference, alternate); + } else if (alternate.length() == 1 && reference.length() > 1 && reference.startsWith(alternate)) { + // variant summary file has the wrong location for deletions. Adjust! + return new SequenceLocation(chromosome, start - 1, end, reference, alternate); } else { return new SequenceLocation(chromosome, start, end, reference, alternate); } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java index 5f8bc67fe3..2e6cc213b6 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java @@ -914,7 +914,7 @@ private HgvsProtein calculateFrameshiftHgvs() { int stopIndex = -1; String stopAlternateAa = ""; int originalStopIndex = -1; - String alternateCdnaSeq = transcriptUtils.getAlternateCdnaSequence(variant); + // We ned to include the STOP codon in the loop to check if there is a variant braking the STOP codon while (codonIndex + 3 <= alternateCdnaSeq.length()) { // Build the new amino acid sequence From 2a85649ca644695c4f655e733cdb5964ca6ac799 Mon Sep 17 00:00:00 2001 From: julie-sullivan Date: Fri, 9 Jul 2021 09:58:56 +0100 Subject: [PATCH 05/27] use complement for negative strand, allows MNVs to calculate consequence types correctly when phased --- .../VariantAnnotationCalculator.java | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java index 81403c060e..735813b1ef 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java @@ -816,8 +816,14 @@ private void adjustPhasedConsequenceTypes(Object[] variantArray) { if (consequenceType3 != null) { String referenceCodon = consequenceType1.getCodon().split("/")[0].toUpperCase(); // WARNING: assumes variants are sorted according to their coordinates - String alternateCodon = variant0.getAlternate() + variant1.getAlternate() - + variant2.getAlternate(); + String alternateCodon = null; + if ("-".equals(variant0.getStrand())) { + alternateCodon = "" + VariantAnnotationUtils.COMPLEMENTARY_NT.get(variant2.getAlternate()) + + VariantAnnotationUtils.COMPLEMENTARY_NT.get(variant1.getAlternate()) + + VariantAnnotationUtils.COMPLEMENTARY_NT.get(variant0.getAlternate()); + } else { + alternateCodon = variant0.getAlternate() + variant1.getAlternate() + variant2.getAlternate(); + } codon = referenceCodon + "/" + alternateCodon; alternateAA = VariantAnnotationUtils.CODON_TO_A.get(alternateCodon); soTerms = updatePhasedSoTerms(consequenceType1.getSequenceOntologyTerms(), @@ -849,8 +855,16 @@ private void adjustPhasedConsequenceTypes(Object[] variantArray) { referenceCodonArray[codonIdx1] = Character.toUpperCase(referenceCodonArray[codonIdx1]); referenceCodonArray[codonIdx2] = Character.toUpperCase(referenceCodonArray[codonIdx2]); char[] alternateCodonArray = referenceCodonArray.clone(); - alternateCodonArray[codonIdx1] = variant0.getAlternate().toUpperCase().toCharArray()[0]; - alternateCodonArray[codonIdx2] = variant1.getAlternate().toUpperCase().toCharArray()[0]; + // negative strand + if ("-".equals(variant0.getStrand())) { + alternateCodonArray[codonIdx1] = + VariantAnnotationUtils.COMPLEMENTARY_NT.get(variant0.getAlternate().toUpperCase().toCharArray()[0]); + alternateCodonArray[codonIdx2] = + VariantAnnotationUtils.COMPLEMENTARY_NT.get(variant1.getAlternate().toUpperCase().toCharArray()[0]); + } else { + alternateCodonArray[codonIdx1] = variant0.getAlternate().toUpperCase().toCharArray()[0]; + alternateCodonArray[codonIdx2] = variant1.getAlternate().toUpperCase().toCharArray()[0]; + } codon = String.valueOf(referenceCodonArray) + "/" + String.valueOf(alternateCodonArray); alternateAA = VariantAnnotationUtils.CODON_TO_A.get(String.valueOf(alternateCodonArray).toUpperCase()); From 67be70e0d489e58373742d7f2fd5fcb7d1aa9661 Mon Sep 17 00:00:00 2001 From: julie-sullivan Date: Tue, 25 May 2021 08:53:43 +0100 Subject: [PATCH 06/27] remove threshold for indel annotation --- ...onsequenceTypeGenericRegionCalculator.java | 38 +++++-------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/ConsequenceTypeGenericRegionCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/ConsequenceTypeGenericRegionCalculator.java index 7e035bdff6..d424c38b24 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/ConsequenceTypeGenericRegionCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/ConsequenceTypeGenericRegionCalculator.java @@ -474,46 +474,26 @@ protected void solveJunction(Integer spliceSite1, Integer spliceSite2, String le } if (regionsOverlap(spliceSite1, spliceSite1 + 1, variantStart, variantEnd)) { // Variant donor/acceptor - if ((variantEnd - variantStart) <= BIG_VARIANT_SIZE_THRESHOLD) { // Big cnvs should not be annotated with such a detail - SoNames.add(leftSpliceSiteTag); // donor/acceptor depending on transcript strand - // BE CAREFUL: there are introns shorter than 7nts, and even just 1nt long!! (22:36587846) - junctionSolution[0] = (variantStart <= spliceSite2 || variantEnd <= spliceSite2); - } else { - // BE CAREFUL: there are introns shorter than 7nts, and even just 1nt long!! (22:36587846) - junctionSolution[0] = (variantStart <= spliceSite2 || variantEnd <= spliceSite2); - } + SoNames.add(leftSpliceSiteTag); // donor/acceptor depending on transcript strand + // BE CAREFUL: there are introns shorter than 7nts, and even just 1nt long!! (22:36587846) + junctionSolution[0] = (variantStart <= spliceSite2 || variantEnd <= spliceSite2); } else if (regionsOverlap(spliceSite1 + 2, spliceSite1 + 7, variantStart, variantEnd)) { // Insertion coordinates are passed to this function as (variantStart-1,variantStart) - if ((variantEnd - variantStart) <= BIG_VARIANT_SIZE_THRESHOLD) { - SoNames.add(VariantAnnotationUtils.SPLICE_REGION_VARIANT); - } + SoNames.add(VariantAnnotationUtils.SPLICE_REGION_VARIANT); // BE CAREFUL: there are introns shorter than 7nts, and even just 1nt long!! (22:36587846) junctionSolution[0] = (variantStart <= spliceSite2 || variantEnd <= spliceSite2); - } else if (regionsOverlap(spliceSite1 - 3, spliceSite1 - 1, variantStart, variantEnd) - // Insertion coordinates are passed to this function as (variantStart-1,variantStart) - && ((variantEnd - variantStart) <= BIG_VARIANT_SIZE_THRESHOLD)) { + } else if (regionsOverlap(spliceSite1 - 3, spliceSite1 - 1, variantStart, variantEnd)) { SoNames.add(VariantAnnotationUtils.SPLICE_REGION_VARIANT); } if (regionsOverlap(spliceSite2 - 1, spliceSite2, variantStart, variantEnd)) { // Variant donor/acceptor - if ((variantEnd - variantStart) <= BIG_VARIANT_SIZE_THRESHOLD) { // Big cnvs should not be annotated with such a detail - SoNames.add(rightSpliceSiteTag); // donor/acceptor depending on transcript strand - // BE CAREFUL: there are introns shorter than 7nts, and even just 1nt long!! (22:36587846) - junctionSolution[0] = (spliceSite1 <= variantStart || spliceSite1 <= variantEnd); - } else { - // BE CAREFUL: there are introns shorter than 7nts, and even just 1nt long!! (22:36587846) - junctionSolution[0] = (spliceSite1 <= variantStart || spliceSite1 <= variantEnd); - } + SoNames.add(rightSpliceSiteTag); // donor/acceptor depending on transcript strand + junctionSolution[0] = (spliceSite1 <= variantStart || spliceSite1 <= variantEnd); } else if (regionsOverlap(spliceSite2 - 7, spliceSite2 - 2, variantStart, variantEnd)) { - // Insertion coordinates are passed to this function as (variantStart-1,variantStart) { - if ((variantEnd - variantStart) <= BIG_VARIANT_SIZE_THRESHOLD) { - SoNames.add(VariantAnnotationUtils.SPLICE_REGION_VARIANT); - } + SoNames.add(VariantAnnotationUtils.SPLICE_REGION_VARIANT); // BE CAREFUL: there are introns shorter than 7nts, and even just 1nt long!! (22:36587846) junctionSolution[0] = (spliceSite1 <= variantStart || spliceSite1 <= variantEnd); - } else if (regionsOverlap(spliceSite2 + 1, spliceSite2 + 3, variantStart, variantEnd) - // Insertion coordinates are passed to this function as (variantStart-1,variantStart) { - && ((variantEnd - variantStart) <= BIG_VARIANT_SIZE_THRESHOLD)) { + } else if (regionsOverlap(spliceSite2 + 1, spliceSite2 + 3, variantStart, variantEnd)) { SoNames.add(VariantAnnotationUtils.SPLICE_REGION_VARIANT); } } From 43d66e027ee389793a3f0cf098a7437630b30a8b Mon Sep 17 00:00:00 2001 From: julie-sullivan Date: Fri, 9 Jul 2021 10:04:20 +0100 Subject: [PATCH 07/27] do not allow codons of length 2, they should always be start + 3 --- .../cellbase/lib/variant/hgvs/TranscriptUtils.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/TranscriptUtils.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/TranscriptUtils.java index b4252204ee..9d9fa10960 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/TranscriptUtils.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/TranscriptUtils.java @@ -129,8 +129,13 @@ public String getCodon(int codonPosition) { // Adjust for manipulating strings, set to be zero base cdnaCodonStart = cdnaCodonStart - 1; - int cdnaCodonEnd = Math.min(cdnaCodonStart + 3, transcript.getCdnaCodingEnd()); - return transcript.getCdnaSequence().substring(cdnaCodonStart, cdnaCodonEnd); + // See ac289d2fcd10c8a0af3163b0fca6ffc149ba915a + // Can't use transcript.getCdnaCodingEnd() for the test as sometimes this is shorter than cdnaCodonStart + 3, e.g. when the + // variant is the penultimate variant in the sequence + int cdnaCodonEnd = cdnaCodonStart + 3; + if (cdnaCodonStart >= 0 && transcript.getCdnaSequence().length() >= cdnaCodonEnd) { + return transcript.getCdnaSequence().substring(cdnaCodonStart, cdnaCodonEnd); + } } return ""; } From a613fa736715a5a2077d75f21f7fd2a9e81b30ff Mon Sep 17 00:00:00 2001 From: julie-sullivan Date: Thu, 10 Jun 2021 09:17:05 +0100 Subject: [PATCH 08/27] Add chromosome to test in additon to start and end, otherwise if two variants with the same coordinates are annotated at the same time, the wrong gene is used for annotation. --- .../lib/variant/annotation/VariantAnnotationCalculator.java | 4 ++-- .../org/opencb/cellbase/lib/variant/hgvs/HgvsCalculator.java | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java index 735813b1ef..e0ab215e81 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java @@ -1148,8 +1148,8 @@ private Set getAnnotatorSet(QueryOptions queryOptions) { private List getIncludedGeneFields(Set annotatorSet) { List includeGeneFields = new ArrayList<>(Arrays.asList("name", "id", "chromosome", "start", "end", "transcripts.id", - "transcripts.proteinId", "transcripts.start", "transcripts.end", "transcripts.cdnaSequence", "transcripts.proteinSequence", - "transcripts.strand", "transcripts.cdsLength", "transcripts.flags", "transcripts.biotype", + "transcripts.proteinId", "transcripts.chromosome", "transcripts.start", "transcripts.end", "transcripts.cdnaSequence", + "transcripts.proteinSequence", "transcripts.strand", "transcripts.cdsLength", "transcripts.flags", "transcripts.biotype", "transcripts.genomicCodingStart", "transcripts.genomicCodingEnd", "transcripts.cdnaCodingStart", "transcripts.cdnaCodingEnd", "transcripts.exons.start", "transcripts.exons.cdsStart", "transcripts.exons.end", "transcripts.exons.cdsEnd", "transcripts.exons.sequence", "transcripts.exons.phase", diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsCalculator.java index 0b4ff01814..70a2d5eab4 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsCalculator.java @@ -97,7 +97,8 @@ protected List run(Variant variant, Transcript transcript, String geneId List hgvsStrings = new ArrayList<>(); // Check variant falls within transcript coords - if (variant.getStart() <= transcript.getEnd() && variant.getEnd() >= transcript.getStart()) { + if (variant.getChromosome().equals(transcript.getChromosome()) + && variant.getStart() <= transcript.getEnd() && variant.getEnd() >= transcript.getStart()) { // We cannot know the type of variant before normalization has been carried out Variant normalizedVariant = normalize(variant, normalize); From 76e2ea5ead3994d19922d33e3f8523026be53863 Mon Sep 17 00:00:00 2001 From: julie-sullivan Date: Mon, 12 Jul 2021 13:25:53 +0100 Subject: [PATCH 09/27] more descriptive comment --- .../opencb/cellbase/lib/variant/hgvs/TranscriptUtils.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/TranscriptUtils.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/TranscriptUtils.java index 9d9fa10960..a1687ee617 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/TranscriptUtils.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/TranscriptUtils.java @@ -130,8 +130,9 @@ public String getCodon(int codonPosition) { cdnaCodonStart = cdnaCodonStart - 1; // See ac289d2fcd10c8a0af3163b0fca6ffc149ba915a - // Can't use transcript.getCdnaCodingEnd() for the test as sometimes this is shorter than cdnaCodonStart + 3, e.g. when the - // variant is the penultimate variant in the sequence + // Use cdnaCodonStart + 3, not transcript.getCdnaCodingEnd() + // if the variant is the penultimate NT in the sequence, then transcript.cdnaCodingEnd can be too short and product a codon + // of size = 2, generating an exception. int cdnaCodonEnd = cdnaCodonStart + 3; if (cdnaCodonStart >= 0 && transcript.getCdnaSequence().length() >= cdnaCodonEnd) { return transcript.getCdnaSequence().substring(cdnaCodonStart, cdnaCodonEnd); From 971e03b76013024f4ba3851c1d480918c92220be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 13 Jul 2021 10:14:47 +0100 Subject: [PATCH 10/27] k8s: Change worker deployment strategy to "recreate" --- .../kubernetes/charts/cellbase/templates/worker-deployment.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cellbase-app/app/cloud/kubernetes/charts/cellbase/templates/worker-deployment.yaml b/cellbase-app/app/cloud/kubernetes/charts/cellbase/templates/worker-deployment.yaml index 52a30b0112..db328dbd68 100644 --- a/cellbase-app/app/cloud/kubernetes/charts/cellbase/templates/worker-deployment.yaml +++ b/cellbase-app/app/cloud/kubernetes/charts/cellbase/templates/worker-deployment.yaml @@ -6,6 +6,8 @@ metadata: app: {{ .Values.worker.name }} spec: replicas: {{ .Values.worker.replicaCount }} + strategy: + type: Recreate selector: matchLabels: app: {{ .Values.worker.name }} From e340dacc99b09f8b4034aa99979ea9c12e4afd3a Mon Sep 17 00:00:00 2001 From: julie-sullivan Date: Tue, 13 Jul 2021 15:03:04 +0100 Subject: [PATCH 11/27] add maven profile to whitelist the HTTP repos --- .github/workflows/junit.yml | 3 ++- .mvn/README | 1 + .mvn/local-settings.xml | 20 ++++++++++++++++++++ .mvn/maven.config | 1 + 4 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 .mvn/README create mode 100644 .mvn/local-settings.xml create mode 100644 .mvn/maven.config diff --git a/.github/workflows/junit.yml b/.github/workflows/junit.yml index d48ceec4be..305e261f10 100644 --- a/.github/workflows/junit.yml +++ b/.github/workflows/junit.yml @@ -22,6 +22,7 @@ jobs: - name: Clone and build java-common-libs run: cd ~ && git clone -b develop https://github.com/opencb/java-common-libs.git && cd ~/java-common-libs && mvn -T 2 clean install -DskipTests && cd $GITHUB_WORKSPACE - name: Clone and build biodata - run: cd ~ && git clone -b develop https://github.com/opencb/biodata.git && cd ~/biodata && mvn -T 2 clean install -DskipTests && cd $GITHUB_WORKSPACE + # remove copying over the .mvn directory once the HTTP repo is removed + run: cd ~ && git clone -b develop https://github.com/opencb/biodata.git && cd ~/biodata && cp -ar $GITHUB_WORKSPACE/.mvn . && mvn -T 2 clean install -DskipTests && cd $GITHUB_WORKSPACE - name: Build with Maven run: mvn -T 2 clean install diff --git a/.mvn/README b/.mvn/README new file mode 100644 index 0000000000..8483eeb213 --- /dev/null +++ b/.mvn/README @@ -0,0 +1 @@ + Hack to get around Maven blocking HTTP repos. Once CellBase does not have any HTTP dependencies, delete this directory. diff --git a/.mvn/local-settings.xml b/.mvn/local-settings.xml new file mode 100644 index 0000000000..54c518fb5e --- /dev/null +++ b/.mvn/local-settings.xml @@ -0,0 +1,20 @@ + + + + release-opencb-ext-libs + opencb-ext-libs + + http://bioinfo.hpc.cam.ac.uk/downloads/ext-libs/ + false + + + release-maven-restlet + maven-restlet + + http://maven.restlet.org + false + + + diff --git a/.mvn/maven.config b/.mvn/maven.config new file mode 100644 index 0000000000..d694bf811b --- /dev/null +++ b/.mvn/maven.config @@ -0,0 +1 @@ + --settings .mvn/local-settings.xml From d99d2d1928215281a89225bcff98272c6529e693 Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 15 Jul 2021 01:14:41 +0100 Subject: [PATCH 12/27] docker: improve docker base image --- cellbase-app/app/cloud/docker/cellbase-base/Dockerfile | 5 ++++- cellbase-app/app/cloud/docker/docker-build.py | 9 +++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile b/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile index 6ed232f5f6..9960039781 100644 --- a/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile +++ b/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile @@ -10,7 +10,10 @@ LABEL org.label-schema.vendor="OpenCB" \ ENV CELLBASE_USER cellbase ENV CELLBASE_HOME /opt/cellbase/ -RUN addgroup -S $CELLBASE_USER && adduser -S $CELLBASE_USER -G $CELLBASE_USER -u 1001 +RUN apk update && apk upgrade && apk add ca-certificates openssl wget bash \ + && update-ca-certificates \ + && addgroup -S $CELLBASE_USER && adduser -S $CELLBASE_USER -G $CELLBASE_USER -u 1001 + USER $CELLBASE_USER VOLUME /opt/cellbase/conf diff --git a/cellbase-app/app/cloud/docker/docker-build.py b/cellbase-app/app/cloud/docker/docker-build.py index 2da43d2da0..919a20045a 100755 --- a/cellbase-app/app/cloud/docker/docker-build.py +++ b/cellbase-app/app/cloud/docker/docker-build.py @@ -12,7 +12,7 @@ ## Configure command-line options parser = argparse.ArgumentParser() parser.add_argument('action', help="Action to execute", choices=["build", "push", "delete"], default="build") -parser.add_argument('--images', help="comma separated list of images to be made, e.g. base,rest,python,builder", default="base,rest,python") +parser.add_argument('--images', help="comma separated list of images to be made, e.g. base,rest,python,builder", default="base,rest") parser.add_argument('--tag', help="the tag for this code, e.g. v5.0.0") parser.add_argument('--build-folder', help="the location of the build folder, if not default location") parser.add_argument('--username', help="credentials for dockerhub (REQUIRED if deleting from DockerHub)") @@ -64,9 +64,9 @@ def build(): print() print(shell_colors['blue'] + "Building opencb/cellbase-" + image + ":" + tag + " ..." + shell_colors['reset']) if image == "base": - run("docker build -t opencb/cellbase-" + image + ":" + tag + " -f " + build_folder + "/cloud/docker/cellbase-" + image + "/Dockerfile " + build_folder) + run("docker build -t opencb/cellbase-" + image + ":" + tag + " -f " + build_folder + "/cloud/docker/cellbase-" + image + "/Dockerfile.ubuntu " + build_folder) else: - run("docker build -t opencb/cellbase-" + image + ":" + tag + " -f " + build_folder + "/cloud/docker/cellbase-" + image + "/Dockerfile --build-arg TAG=" + tag + " " + build_folder) + run("docker build -t opencb/cellbase-" + image + ":" + tag + " -f " + build_folder + "/cloud/docker/cellbase-" + image + "/Dockerfile.ubuntu --build-arg TAG=" + tag + " " + build_folder) def tag_latest(image): @@ -139,7 +139,8 @@ def delete(): # 4. init images: get a list with all images if args.images is None: - images = ["base", "rest", "python"] + # TODO think about "python" + images = ["base", "rest"] else: images = args.images.split(",") From a670709ce2fbb35875a0e02163acb30fcac56fff Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 15 Jul 2021 01:15:08 +0100 Subject: [PATCH 13/27] docker: add new experimental docker based on Ubuntu --- .../docker/cellbase-base/Dockerfile.ubuntu | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 cellbase-app/app/cloud/docker/cellbase-base/Dockerfile.ubuntu diff --git a/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile.ubuntu b/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile.ubuntu new file mode 100644 index 0000000000..be7e483346 --- /dev/null +++ b/cellbase-app/app/cloud/docker/cellbase-base/Dockerfile.ubuntu @@ -0,0 +1,21 @@ +FROM ubuntu:20.04 + +LABEL org.label-schema.vendor="OpenCB" \ + org.label-schema.name="cellbase-base" \ + org.label-schema.url="http://docs.opencb.org/display/cellbase" \ + org.label-schema.description="An Open Computational Genomics Analysis platform for big data processing and analysis in genomics" \ + maintainer="Julie Sullivan " \ + org.label-schema.schema-version="1.0" + +ENV CELLBASE_USER cellbase +ENV CELLBASE_HOME /opt/cellbase/ + +RUN apt-get update && apt-get install -y openjdk-8-jre wget htop vim \ + && adduser --uid 1001 $CELLBASE_USER +#&& addgroup --gid 1001 $CELLBASE_USER && +USER $CELLBASE_USER + +VOLUME /opt/cellbase/conf + +COPY . /opt/cellbase +WORKDIR /opt/cellbase From b236527710d13ef4867b5b9191903c0e93890ef4 Mon Sep 17 00:00:00 2001 From: imedina Date: Fri, 16 Jul 2021 01:45:41 +0100 Subject: [PATCH 14/27] builer: skip xref objects when id does not exist --- .../org/opencb/cellbase/lib/builders/GeneBuilder.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java index 1461eddc16..4ef36ab7d9 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java @@ -528,10 +528,15 @@ private void addGtfXrefs(Transcript transcript, Gene gene, Map g if (transcript.getXrefs() == null) { transcript.setXrefs(new ArrayList<>()); } + transcript.getXrefs().add(new Xref(gene.getId(), "ensembl_gene", "Ensembl Gene")); - transcript.getXrefs().add(new Xref(gene.getName(), "hgnc_symbol", "HGNC Symbol")); transcript.getXrefs().add(new Xref(transcript.getId(), "ensembl_transcript", "Ensembl Transcript")); - transcript.getXrefs().add(new Xref(transcript.getName(), "ensembl_transcript_name", "Ensembl Transcript Name")); + + // Some non-coding genes do not have Gene names + if (StringUtils.isNotEmpty(gene.getName())) { + transcript.getXrefs().add(new Xref(gene.getName(), "hgnc_symbol", "HGNC Symbol")); + transcript.getXrefs().add(new Xref(transcript.getName(), "ensembl_transcript_name", "Ensembl Transcript Name")); + } if (gtfAttributes.get("ccds_id") != null) { transcript.getXrefs().add(new Xref(gtfAttributes.get("ccds_id"), "ccds_id", "CCDS")); From 6f88a5fd7d2b771b90fcfce1c713d191021144f0 Mon Sep 17 00:00:00 2001 From: imedina Date: Fri, 16 Jul 2021 01:49:16 +0100 Subject: [PATCH 15/27] Fix style issues --- .../variant/hgvs/HgvsProteinCalculator.java | 30 ++++++++++--------- .../lib/variant/hgvs/TranscriptUtils.java | 2 +- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java index 2e6cc213b6..ec3e1e4167 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java @@ -891,20 +891,22 @@ private HgvsProtein calculateFrameshiftHgvs() { alternateProteinSeq.append("X"); currentAaIndex++; } - } else if (transcript.getProteinSequence().startsWith("M") && !"ATG".equals(alternateCdnaSeq.substring(transcript.getCdnaCodingStart(), 3))) { - - /* - First codon is NOT ATG but protein sequence starts with M. This is due to Ensembl curation. From Ensembl: - "We have some information about non-ATG start codons in our blog post from release 102: - https://www.ensembl.info/2020/11/30/ensembl-102-has-been-released/ - Quite simply, there is not a rule. This is a situation of exceptional biology which we are only able to annotate correctly - because of our expert manual gene annotators analysing the data in detail." - Only relevant for frameshifts, and transcripts with confirmed starts. - */ - // fast forward past first - alternateProteinSeq.append("M"); - currentAaIndex++; - codonIndex += 3; + } else { + if (transcript.getProteinSequence().startsWith("M") + && !"ATG".equals(alternateCdnaSeq.substring(transcript.getCdnaCodingStart(), 3))) { + /* + First codon is NOT ATG but protein sequence starts with M. This is due to Ensembl curation. From Ensembl: + "We have some information about non-ATG start codons in our blog post from release 102: + https://www.ensembl.info/2020/11/30/ensembl-102-has-been-released/ + Quite simply, there is not a rule. This is a situation of exceptional biology which we are only able to annotate + correctly because of our expert manual gene annotators analysing the data in detail." + Only relevant for frameshifts, and transcripts with confirmed starts. + */ + // fast forward past first + alternateProteinSeq.append("M"); + currentAaIndex++; + codonIndex += 3; + } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/TranscriptUtils.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/TranscriptUtils.java index a1687ee617..7012b220ba 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/TranscriptUtils.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/TranscriptUtils.java @@ -132,7 +132,7 @@ public String getCodon(int codonPosition) { // See ac289d2fcd10c8a0af3163b0fca6ffc149ba915a // Use cdnaCodonStart + 3, not transcript.getCdnaCodingEnd() // if the variant is the penultimate NT in the sequence, then transcript.cdnaCodingEnd can be too short and product a codon - // of size = 2, generating an exception. + // of size = 2, generating an exception. int cdnaCodonEnd = cdnaCodonStart + 3; if (cdnaCodonStart >= 0 && transcript.getCdnaSequence().length() >= cdnaCodonEnd) { return transcript.getCdnaSequence().substring(cdnaCodonStart, cdnaCodonEnd); From ee1db852ba6fc1b835bd280377fe0b3e172755b4 Mon Sep 17 00:00:00 2001 From: julie-sullivan Date: Mon, 19 Jul 2021 11:52:13 +0100 Subject: [PATCH 16/27] typo --- .../opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java index ec3e1e4167..9ea7694b33 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/hgvs/HgvsProteinCalculator.java @@ -893,7 +893,7 @@ private HgvsProtein calculateFrameshiftHgvs() { } } else { if (transcript.getProteinSequence().startsWith("M") - && !"ATG".equals(alternateCdnaSeq.substring(transcript.getCdnaCodingStart(), 3))) { + && !"ATG".equals(alternateCdnaSeq.substring(transcript.getCdnaCodingStart(), transcript.getCdnaCodingStart() + 3))) { /* First codon is NOT ATG but protein sequence starts with M. This is due to Ensembl curation. From Ensembl: "We have some information about non-ATG start codons in our blog post from release 102: From 12b3ca6ea6b9136d8fe9d746818ace003e3d37e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 21 Jul 2021 18:40:38 +0100 Subject: [PATCH 17/27] client: Use client rest timeout --- .../org/opencb/cellbase/client/config/RestConfig.java | 2 +- .../opencb/cellbase/client/rest/ParentRestClient.java | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/cellbase-client/src/main/java/org/opencb/cellbase/client/config/RestConfig.java b/cellbase-client/src/main/java/org/opencb/cellbase/client/config/RestConfig.java index 73d2436e2f..0618b256ca 100644 --- a/cellbase-client/src/main/java/org/opencb/cellbase/client/config/RestConfig.java +++ b/cellbase-client/src/main/java/org/opencb/cellbase/client/config/RestConfig.java @@ -24,7 +24,7 @@ public class RestConfig { private List hosts; - private int timeout; + private int timeout = 60000; public RestConfig() { } diff --git a/cellbase-client/src/main/java/org/opencb/cellbase/client/rest/ParentRestClient.java b/cellbase-client/src/main/java/org/opencb/cellbase/client/rest/ParentRestClient.java index f7933ef1a9..7f3ea31ded 100644 --- a/cellbase-client/src/main/java/org/opencb/cellbase/client/rest/ParentRestClient.java +++ b/cellbase-client/src/main/java/org/opencb/cellbase/client/rest/ParentRestClient.java @@ -24,6 +24,7 @@ import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.util.StdConverter; import org.apache.commons.lang3.StringUtils; +import org.glassfish.jersey.client.ClientProperties; import org.opencb.biodata.models.variant.avro.ConsequenceType; import org.opencb.biodata.models.variant.avro.DrugResponseClassification; import org.opencb.biodata.models.variant.avro.GeneCancerAssociation; @@ -94,6 +95,8 @@ public ParentRestClient(String species, String assembly, ClientConfiguration con this.configuration = configuration; this.client = ClientBuilder.newClient(); + client.property(ClientProperties.CONNECT_TIMEOUT, 1000); + client.property(ClientProperties.READ_TIMEOUT, configuration.getRest().getTimeout()); logger = LoggerFactory.getLogger(this.getClass().toString()); } @@ -204,6 +207,7 @@ protected CellBaseDataResponse execute(List idList, String resour ? options.getInt("numThreads", DEFAULT_NUM_THREADS) : DEFAULT_NUM_THREADS; + // TODO: Use cached thread pool ExecutorService executorService = Executors.newFixedThreadPool(numThreads); List>> futureList = new ArrayList<>((idList.size() / REST_CALL_BATCH_SIZE) + 1); for (int i = 0; i < idList.size(); i += REST_CALL_BATCH_SIZE) { @@ -223,8 +227,11 @@ protected CellBaseDataResponse execute(List idList, String resour Thread.sleep(5); } cellBaseDataResults.addAll(responseFuture.get().getResponses()); - } catch (InterruptedException | ExecutionException e) { - e.printStackTrace(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException(e); + } catch (ExecutionException e) { + throw new IOException(e); } } From d6c39d01ca2390de6ce9efafbe10b8edb046c0ba Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 22 Jul 2021 01:11:42 +0100 Subject: [PATCH 18/27] lib: fix MANE file name --- .../java/org/opencb/cellbase/lib/builders/GeneBuilder.java | 4 +--- .../org/opencb/cellbase/lib/builders/RefSeqGeneBuilder.java | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java index 4ef36ab7d9..b3bce0aa1a 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java @@ -89,7 +89,7 @@ public GeneBuilder(Path geneDirectoryPath, Path genomeSequenceFastaFile, Species boolean flexibleGTFParsing, CellBaseSerializer serializer) throws CellBaseException { this(null, geneDirectoryPath.resolve("description.txt"), geneDirectoryPath.resolve("xrefs.txt"), - geneDirectoryPath.resolve("MANE.GRCh38.v0.91.summary.txt.gz"), + geneDirectoryPath.resolve("MANE.GRCh38.v0.93.summary.txt.gz"), geneDirectoryPath.resolve("list_LRGs_transcripts_xrefs.txt"), geneDirectoryPath.resolve("idmapping_selected.tab.gz"), geneDirectoryPath.getParent().resolve("regulation/motif_features.gff.gz"), @@ -448,13 +448,11 @@ private Transcript getTranscript(Gene gene, EnsemblGeneBuilderIndexer indexer, T if (StringUtils.isNotEmpty(maneRefSeq)) { String tso500Flag = indexer.getTSO500(maneRefSeq.split("\\.")[0]); if (StringUtils.isNotEmpty(tso500Flag)) { - System.out.println("tso500Flag = " + tso500Flag); transcript.getFlags().add(tso500Flag); } String eglhHaemOncFlag = indexer.getEGLHHaemOnc(maneRefSeq.split("\\.")[0]); if (StringUtils.isNotEmpty(eglhHaemOncFlag)) { - System.out.println("eglhHaemOncFlag = " + eglhHaemOncFlag); transcript.getFlags().add(eglhHaemOncFlag); } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilder.java index 091a6e2d00..f51ffc481d 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilder.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilder.java @@ -120,9 +120,9 @@ private void getCdnaFastaFileFromDirectoryPath(Path refSeqDirectoryPath) { public void parse() throws Exception { // Preparing the fasta file for fast accessing FastaIndex fastaIndex = null; -// if (fastaFile != null) { -// fastaIndex = new FastaIndex(fastaFile); -// } + if (fastaFile != null) { + fastaIndex = new FastaIndex(fastaFile); + } // index protein sequences for later RefSeqGeneBuilderIndexer indexer = new RefSeqGeneBuilderIndexer(gtfFile.getParent()); From 9f3f5d64c6ea3213b1cbf3a6ee9f49a809209f54 Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 22 Jul 2021 01:20:03 +0100 Subject: [PATCH 19/27] lib: replace newFixedThreadPool with a newCachedThreadPool --- .../VariantAnnotationCalculator.java | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java index e0ab215e81..50262b7570 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java @@ -85,6 +85,7 @@ public class VariantAnnotationCalculator { private static final String REGULATORY_REGION_FEATURE_TYPE_ATTRIBUTE = "featureType"; private static final String TF_BINDING_SITE = ParamConstants.FeatureType.TF_binding_site.name(); + private static final ExecutorService CACHED_THREAD_POOL = Executors.newCachedThreadPool(); private static Logger logger = LoggerFactory.getLogger(VariantAnnotationCalculator.class); public VariantAnnotationCalculator(String species, String assembly, CellBaseManagerFactory cellbaseManagerFactory) @@ -455,7 +456,6 @@ private List runAnnotationProcess(List normalizedVar * Next three async blocks calculate annotations using Futures, this will be calculated in a different thread. * Once the main loop has finished then they will be stored. This provides a ~30% of performance improvement. */ - ExecutorService fixedThreadPool = Executors.newFixedThreadPool(6); FutureVariationAnnotator futureVariationAnnotator = null; Future>> variationFuture = null; List batchGeneList = getBatchGeneList(normalizedVariantList); @@ -464,21 +464,21 @@ private List runAnnotationProcess(List normalizedVar futureVariationAnnotator = new FutureVariationAnnotator(normalizedVariantList, new QueryOptions("include", "id,annotation.populationFrequencies,annotation.additionalAttributes.dgvSpecificAttributes") .append("imprecise", imprecise)); - variationFuture = fixedThreadPool.submit(futureVariationAnnotator); + variationFuture = CACHED_THREAD_POOL.submit(futureVariationAnnotator); } FutureConservationAnnotator futureConservationAnnotator = null; Future>> conservationFuture = null; if (annotatorSet.contains("conservation")) { futureConservationAnnotator = new FutureConservationAnnotator(normalizedVariantList, QueryOptions.empty()); - conservationFuture = fixedThreadPool.submit(futureConservationAnnotator); + conservationFuture = CACHED_THREAD_POOL.submit(futureConservationAnnotator); } FutureVariantFunctionalScoreAnnotator futureVariantFunctionalScoreAnnotator = null; Future>> variantFunctionalScoreFuture = null; if (annotatorSet.contains("functionalScore")) { futureVariantFunctionalScoreAnnotator = new FutureVariantFunctionalScoreAnnotator(normalizedVariantList, QueryOptions.empty()); - variantFunctionalScoreFuture = fixedThreadPool.submit(futureVariantFunctionalScoreAnnotator); + variantFunctionalScoreFuture = CACHED_THREAD_POOL.submit(futureVariantFunctionalScoreAnnotator); } FutureClinicalAnnotator futureClinicalAnnotator = null; @@ -489,21 +489,21 @@ private List runAnnotationProcess(List normalizedVar queryOptions.add(ParamConstants.QueryParams.PHASE.key(), phased); queryOptions.add(ParamConstants.QueryParams.CHECK_AMINO_ACID_CHANGE.key(), checkAminoAcidChange); futureClinicalAnnotator = new FutureClinicalAnnotator(normalizedVariantList, batchGeneList, queryOptions); - clinicalFuture = fixedThreadPool.submit(futureClinicalAnnotator); + clinicalFuture = CACHED_THREAD_POOL.submit(futureClinicalAnnotator); } FutureRepeatsAnnotator futureRepeatsAnnotator = null; Future>> repeatsFuture = null; if (annotatorSet.contains("repeats")) { futureRepeatsAnnotator = new FutureRepeatsAnnotator(normalizedVariantList); - repeatsFuture = fixedThreadPool.submit(futureRepeatsAnnotator); + repeatsFuture = CACHED_THREAD_POOL.submit(futureRepeatsAnnotator); } FutureCytobandAnnotator futureCytobandAnnotator = null; Future>> cytobandFuture = null; if (annotatorSet.contains("cytoband")) { futureCytobandAnnotator = new FutureCytobandAnnotator(normalizedVariantList, QueryOptions.empty()); - cytobandFuture = fixedThreadPool.submit(futureCytobandAnnotator); + cytobandFuture = CACHED_THREAD_POOL.submit(futureCytobandAnnotator); } // We iterate over all variants to get the rest of the annotations and to create the VariantAnnotation objects @@ -622,7 +622,9 @@ private List runAnnotationProcess(List normalizedVar if (futureCytobandAnnotator != null) { futureCytobandAnnotator.processResults(cytobandFuture, variantAnnotationList); } - fixedThreadPool.shutdown(); + + // Not needed with newCachedThreadPool + // fixedThreadPool.shutdown(); logger.debug("Total batch annotation performance is {}ms for {} variants", System.currentTimeMillis() - globalStartTime, normalizedVariantList.size()); From 02b961b28962d50ef4ba2206795a2e79089f03d0 Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 22 Jul 2021 10:03:28 +0100 Subject: [PATCH 20/27] app: fix docker build script --- cellbase-app/app/cloud/docker/docker-build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cellbase-app/app/cloud/docker/docker-build.py b/cellbase-app/app/cloud/docker/docker-build.py index 919a20045a..dd3d12f254 100755 --- a/cellbase-app/app/cloud/docker/docker-build.py +++ b/cellbase-app/app/cloud/docker/docker-build.py @@ -64,9 +64,9 @@ def build(): print() print(shell_colors['blue'] + "Building opencb/cellbase-" + image + ":" + tag + " ..." + shell_colors['reset']) if image == "base": - run("docker build -t opencb/cellbase-" + image + ":" + tag + " -f " + build_folder + "/cloud/docker/cellbase-" + image + "/Dockerfile.ubuntu " + build_folder) + run("docker build -t opencb/cellbase-" + image + ":" + tag + " -f " + build_folder + "/cloud/docker/cellbase-" + image + "/Dockerfile " + build_folder) else: - run("docker build -t opencb/cellbase-" + image + ":" + tag + " -f " + build_folder + "/cloud/docker/cellbase-" + image + "/Dockerfile.ubuntu --build-arg TAG=" + tag + " " + build_folder) + run("docker build -t opencb/cellbase-" + image + ":" + tag + " -f " + build_folder + "/cloud/docker/cellbase-" + image + "/Dockerfile --build-arg TAG=" + tag + " " + build_folder) def tag_latest(image): From a7852f186c406f4fb80d9fbd519dee6079d2a651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 22 Jul 2021 17:03:30 +0100 Subject: [PATCH 21/27] client: Remove excessive debug logs on client --- .../cellbase/client/rest/ParentRestClient.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cellbase-client/src/main/java/org/opencb/cellbase/client/rest/ParentRestClient.java b/cellbase-client/src/main/java/org/opencb/cellbase/client/rest/ParentRestClient.java index 7f3ea31ded..0e11cb32f7 100644 --- a/cellbase-client/src/main/java/org/opencb/cellbase/client/rest/ParentRestClient.java +++ b/cellbase-client/src/main/java/org/opencb/cellbase/client/rest/ParentRestClient.java @@ -93,11 +93,13 @@ public ParentRestClient(String species, String assembly, ClientConfiguration con this.species = species; this.assembly = assembly; this.configuration = configuration; + logger = LoggerFactory.getLogger(this.getClass().toString()); this.client = ClientBuilder.newClient(); client.property(ClientProperties.CONNECT_TIMEOUT, 1000); client.property(ClientProperties.READ_TIMEOUT, configuration.getRest().getTimeout()); - logger = LoggerFactory.getLogger(this.getClass().toString()); + + logger.debug("Configure read timeout : " + configuration.getRest().getTimeout() + "ms"); } static { @@ -255,10 +257,9 @@ private CellBaseDataResponse fetchData(List idList, String resour List newIdsList = null; boolean call = true; int skip = 0; - CellBaseDataResponse queryResponse = null; CellBaseDataResponse finalDataResponse = null; while (call) { - queryResponse = robustRestCall(idList, resource, options, clazz, post); + CellBaseDataResponse queryResponse = robustRestCall(idList, resource, options, clazz, post); // First iteration we set the response object, no merge needed // Create id -> finalDataResponse-position map, so that we can know in forthcoming iterations where to @@ -303,8 +304,12 @@ private CellBaseDataResponse fetchData(List idList, String resour options.put("skip", skip); } } + logger.debug("queryResponse: {" + + "time: " + finalDataResponse.getTime() + ", " + + "apiVersion: " + finalDataResponse.getApiVersion() + ", " + + "responses: " + finalDataResponse.getResponses().size() + ", " + + "events: " + finalDataResponse.getEvents() + "}"); - logger.debug("queryResponse = " + queryResponse); return finalDataResponse; } From 7b2a9908ffef329ff47932982096fb54b18afc26 Mon Sep 17 00:00:00 2001 From: imedina Date: Sat, 24 Jul 2021 04:52:35 +0100 Subject: [PATCH 22/27] Prepare release 5.0.0 --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- cellbase-test/pom.xml | 2 +- pom.xml | 8 ++++---- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 1003af1121..4252c52c7b 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0-SNAPSHOT + 5.0.0 ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 0999a207c6..30e72cd9e3 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0-SNAPSHOT + 5.0.0 ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index 0e2739d580..f082733a7e 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0-SNAPSHOT + 5.0.0 ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 6a422457c1..124aa415b3 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0-SNAPSHOT + 5.0.0 ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index c0d4c36ed5..ff811334ba 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0-SNAPSHOT + 5.0.0 ../pom.xml diff --git a/cellbase-test/pom.xml b/cellbase-test/pom.xml index e6c6037191..0161ca5e75 100644 --- a/cellbase-test/pom.xml +++ b/cellbase-test/pom.xml @@ -22,7 +22,7 @@ org.opencb.cellbase cellbase-test - 5.0.0-SNAPSHOT + 5.0.0 pom diff --git a/pom.xml b/pom.xml index 85637b20ac..d8950ea722 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0-SNAPSHOT + 5.0.0 pom CellBase project @@ -22,9 +22,9 @@ - 5.0.0-SNAPSHOT - 4.1.1-SNAPSHOT - 2.1.1-SNAPSHOT + 5.0.0 + 4.1.1 + 2.1.1 0.1.0 2.10.1 2.25.1 From 9444947449c548e106e9817227acadba98e6e66a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Tue, 3 Aug 2021 17:47:35 +0100 Subject: [PATCH 23/27] devops: Remove "test" job from release workflow. --- .github/workflows/release.yml | 49 ++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5b800e6edc..9e8493672c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,6 +1,7 @@ name: CellBase Release workflow on: + workflow_dispatch: {} push: tags: - '*' @@ -22,33 +23,33 @@ jobs: run: mvn clean install -DskipTests - name: Maven Validate run: mvn validate - test: - name: CellBase Test - runs-on: ubuntu-18.04 - needs: build - strategy: - matrix: - java: ["1.8"] - mongodb: ["4.0", "4.2"] - services: - mongodb: - image: mongo:${{ matrix.mongodb }} - ports: - - 27017:27017 - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: '10' - - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v1 - with: - java-version: ${{ matrix.java }} - - name: Build with Maven - run: mvn clean install -Dcheckstyle.skip +# test: +# name: CellBase Test +# runs-on: ubuntu-18.04 +# needs: build +# strategy: +# matrix: +# java: ["1.8"] +# mongodb: ["4.0", "4.2"] +# services: +# mongodb: +# image: mongo:${{ matrix.mongodb }} +# ports: +# - 27017:27017 +# steps: +# - uses: actions/checkout@v2 +# with: +# fetch-depth: '10' +# - name: Set up JDK ${{ matrix.java }} +# uses: actions/setup-java@v1 +# with: +# java-version: ${{ matrix.java }} +# - name: Build with Maven +# run: mvn clean install -Dcheckstyle.skip deploy: name: Build and deploy CellBase libs, Java docs and Docker images in Maven Central and DockerHub repositories runs-on: ubuntu-18.04 - needs: test +# needs: test steps: - uses: actions/checkout@v2 with: From 7f87a09f284c0f4b1b95dff95b454ac0b5f4f5c7 Mon Sep 17 00:00:00 2001 From: imedina Date: Wed, 8 Dec 2021 02:09:04 +0000 Subject: [PATCH 24/27] pom: fix build configuration --- pom.xml | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pom.xml b/pom.xml index d8950ea722..4aae1bb4d2 100644 --- a/pom.xml +++ b/pom.xml @@ -425,16 +425,17 @@ - - - ossrh - https://oss.sonatype.org/service/local/staging/deploy/maven2/ - - - ossrh - https://oss.sonatype.org/content/repositories/snapshots - - + + + + ossrh + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + + ossrh + https://oss.sonatype.org/content/repositories/snapshots + + From e07ff38102e718dc2285549d07b7a058650223ca Mon Sep 17 00:00:00 2001 From: imedina Date: Wed, 8 Dec 2021 02:44:37 +0000 Subject: [PATCH 25/27] Increment version to 5.1.0-SNAPSHOT --- cellbase-app/pom.xml | 2 +- .../main/annotation/PopulationFrequenciesAnnotator.java | 2 +- .../app/cli/main/annotation/VcfVariantAnnotator.java | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- .../opencb/cellbase/core/variant/PhasedQueryManager.java | 2 +- cellbase-lib/pom.xml | 2 +- .../variant/annotation/VariantAnnotationCalculator.java | 6 +++--- cellbase-server/pom.xml | 2 +- cellbase-test/pom.xml | 2 +- pom.xml | 8 ++++---- 11 files changed, 16 insertions(+), 16 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 4252c52c7b..7783d0f6cf 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0 + 5.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/annotation/PopulationFrequenciesAnnotator.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/annotation/PopulationFrequenciesAnnotator.java index 81757ac976..b017aebe5b 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/annotation/PopulationFrequenciesAnnotator.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/annotation/PopulationFrequenciesAnnotator.java @@ -127,8 +127,8 @@ private CellBaseDataResult getPopulationFrequencies(Variant variant) { flagVisitedVariant(variantKey, variant1); populationFrequencyCellBaseDataResult.setResults(Collections.singletonList(variant1)); - populationFrequencyCellBaseDataResult.setNumTotalResults(1); populationFrequencyCellBaseDataResult.setNumResults(1); + populationFrequencyCellBaseDataResult.setNumMatches(1); } } catch (RocksDBException | IOException e) { e.printStackTrace(); diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/annotation/VcfVariantAnnotator.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/annotation/VcfVariantAnnotator.java index f6c54ff1cd..0582d2b521 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/annotation/VcfVariantAnnotator.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/annotation/VcfVariantAnnotator.java @@ -145,8 +145,8 @@ private CellBaseDataResult getCustomAnnotation(Variant variant) { Variant variant1 = mapper.readValue(dbContent, Variant.class); customAnnotationCellBaseDataResult.setResults(Collections.singletonList(variant1)); - customAnnotationCellBaseDataResult.setNumTotalResults(1); customAnnotationCellBaseDataResult.setNumResults(1); + customAnnotationCellBaseDataResult.setNumMatches(1); } } catch (RocksDBException | IOException e) { e.printStackTrace(); diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 30e72cd9e3..1e398b47b4 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0 + 5.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index f082733a7e..02ec7a8b40 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0 + 5.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/PhasedQueryManager.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/PhasedQueryManager.java index 823a02695b..1b371a941b 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/PhasedQueryManager.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/PhasedQueryManager.java @@ -224,7 +224,7 @@ private boolean alternateAlleleMatch(String allele, String allele1) { protected void reset(CellBaseDataResult variantCellBaseDataResult) { variantCellBaseDataResult.setResults(Collections.emptyList()); variantCellBaseDataResult.setNumResults(0); - variantCellBaseDataResult.setNumTotalResults(0); + variantCellBaseDataResult.setNumMatches(0); } diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 124aa415b3..b82f1b07c8 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0 + 5.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java index 50262b7570..cf3520f8e7 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java @@ -131,7 +131,7 @@ public CellBaseDataResult getAllConsequenceTypesByVariant(Variant variant, Query cellBaseDataResult.setId(variant.toString()); cellBaseDataResult.setTime(Long.valueOf(System.currentTimeMillis() - dbTimeStart).intValue()); cellBaseDataResult.setNumResults(consequenceTypeList.size()); - cellBaseDataResult.setNumTotalResults(consequenceTypeList.size()); + cellBaseDataResult.setNumMatches(consequenceTypeList.size()); cellBaseDataResult.setResults(consequenceTypeList); return cellBaseDataResult; } @@ -1761,7 +1761,7 @@ public List> call() throws Exception { // to the two breakpoints repeatSet.addAll(tmpCellBaseDataResultList.get(1).getResults()); newCellBaseDataResult.setNumResults(repeatSet.size()); - newCellBaseDataResult.setNumTotalResults(repeatSet.size()); + newCellBaseDataResult.setNumMatches(repeatSet.size()); newCellBaseDataResult.setResults(new ArrayList(repeatSet)); } cellBaseDataResultList.add(newCellBaseDataResult); @@ -1824,7 +1824,7 @@ public List> call() throws Exception { // to the two breakpoints cytobandSet.addAll(tmpCellBaseDataResultList.get(1).getResults()); newCellBaseDataResult.setNumResults(cytobandSet.size()); - newCellBaseDataResult.setNumTotalResults(cytobandSet.size()); + newCellBaseDataResult.setNumMatches(cytobandSet.size()); newCellBaseDataResult.setResults(new ArrayList(cytobandSet)); } cellBaseDataResultList.add(newCellBaseDataResult); diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index ff811334ba..032fd4bc01 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0 + 5.1.0-SNAPSHOT ../pom.xml diff --git a/cellbase-test/pom.xml b/cellbase-test/pom.xml index 0161ca5e75..7ee27a1ca0 100644 --- a/cellbase-test/pom.xml +++ b/cellbase-test/pom.xml @@ -22,7 +22,7 @@ org.opencb.cellbase cellbase-test - 5.0.0 + 5.1.0-SNAPSHOT pom diff --git a/pom.xml b/pom.xml index 4aae1bb4d2..fc94d8a160 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0 + 5.1.0-SNAPSHOT pom CellBase project @@ -22,9 +22,9 @@ - 5.0.0 - 4.1.1 - 2.1.1 + 5.1.0-SNAPSHOT + 4.2.0-SNAPSHOT + 2.2.0-SNAPSHOT 0.1.0 2.10.1 2.25.1 From 62522d491d3d176084e16d072d458bcf5a55945c Mon Sep 17 00:00:00 2001 From: imedina Date: Wed, 8 Dec 2021 03:02:25 +0000 Subject: [PATCH 26/27] pom: remove local settings file --- .mvn/README | 1 - .mvn/local-settings.xml | 20 -------------------- .mvn/maven.config | 1 - 3 files changed, 22 deletions(-) delete mode 100644 .mvn/README delete mode 100644 .mvn/local-settings.xml delete mode 100644 .mvn/maven.config diff --git a/.mvn/README b/.mvn/README deleted file mode 100644 index 8483eeb213..0000000000 --- a/.mvn/README +++ /dev/null @@ -1 +0,0 @@ - Hack to get around Maven blocking HTTP repos. Once CellBase does not have any HTTP dependencies, delete this directory. diff --git a/.mvn/local-settings.xml b/.mvn/local-settings.xml deleted file mode 100644 index 54c518fb5e..0000000000 --- a/.mvn/local-settings.xml +++ /dev/null @@ -1,20 +0,0 @@ - - - - release-opencb-ext-libs - opencb-ext-libs - - http://bioinfo.hpc.cam.ac.uk/downloads/ext-libs/ - false - - - release-maven-restlet - maven-restlet - - http://maven.restlet.org - false - - - diff --git a/.mvn/maven.config b/.mvn/maven.config deleted file mode 100644 index d694bf811b..0000000000 --- a/.mvn/maven.config +++ /dev/null @@ -1 +0,0 @@ - --settings .mvn/local-settings.xml From 3bebc9e71292f4c64a78b60aad67fabb50b33e16 Mon Sep 17 00:00:00 2001 From: imedina Date: Thu, 9 Dec 2021 11:21:00 +0000 Subject: [PATCH 27/27] pom: bump version to 5.0.1-SNAPSHOT --- cellbase-app/pom.xml | 2 +- cellbase-client/pom.xml | 2 +- cellbase-core/pom.xml | 2 +- cellbase-lib/pom.xml | 2 +- cellbase-server/pom.xml | 2 +- cellbase-test/pom.xml | 2 +- pom.xml | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 4252c52c7b..68b2c368e2 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0 + 5.0.1-SNAPSHOT ../pom.xml diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 30e72cd9e3..670a027aa3 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0 + 5.0.1-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index f082733a7e..dcafd970ca 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0 + 5.0.1-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 124aa415b3..5c7371d597 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0 + 5.0.1-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index ff811334ba..e1e39e80c8 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0 + 5.0.1-SNAPSHOT ../pom.xml diff --git a/cellbase-test/pom.xml b/cellbase-test/pom.xml index 0161ca5e75..0f14d8849e 100644 --- a/cellbase-test/pom.xml +++ b/cellbase-test/pom.xml @@ -22,7 +22,7 @@ org.opencb.cellbase cellbase-test - 5.0.0 + 5.0.1-SNAPSHOT pom diff --git a/pom.xml b/pom.xml index 4aae1bb4d2..e8b677ef75 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 5.0.0 + 5.0.1-SNAPSHOT pom CellBase project @@ -22,7 +22,7 @@ - 5.0.0 + 5.0.1-SNAPSHOT 4.1.1 2.1.1 0.1.0