From 465810cccc8223f244c2e938dbdb643725bf4534 Mon Sep 17 00:00:00 2001 From: waterflow80 Date: Fri, 24 Nov 2023 21:21:31 +0100 Subject: [PATCH 1/9] updated the SeqcolComparisonController --- .../evaseqcol/controller/seqcol/SeqColComparisonController.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/controller/seqcol/SeqColComparisonController.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/controller/seqcol/SeqColComparisonController.java index f71c47f..81856fd 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/controller/seqcol/SeqColComparisonController.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/controller/seqcol/SeqColComparisonController.java @@ -85,7 +85,7 @@ public ResponseEntity compareSequenceCollections( required = true) @PathVariable String digest1, @Parameter(name = "seqColLevelTwo", description = "SeqCol object level 2", - required = true) @RequestBody TreeMap> seqColLevelTwo + required = true) @RequestBody TreeMap> seqColLevelTwo ) { try { SeqColComparisonResultEntity comparisonResult = seqColService.compareSeqCols(digest1, seqColLevelTwo); From a5d7df8a40c162bd03e50354ece4c4bb2a74afea Mon Sep 17 00:00:00 2001 From: waterflow80 Date: Fri, 24 Nov 2023 21:22:05 +0100 Subject: [PATCH 2/9] changed the attributeType names into the normalized ones/names --- .../eva/evaseqcol/entities/SeqColExtendedDataEntity.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColExtendedDataEntity.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColExtendedDataEntity.java index add22c2..1133875 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColExtendedDataEntity.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColExtendedDataEntity.java @@ -21,7 +21,6 @@ import javax.persistence.Transient; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Comparator; import java.util.LinkedList; import java.util.List; @@ -52,7 +51,7 @@ public class SeqColExtendedDataEntity { private SeqColEntity.NamingConvention namingConvention; public enum AttributeType { - names, sequences, md5DigestsOfSequences, lengths, sortedNameLengthPairs + names, sequences, md5_sequences, lengths, sorted_name_length_pairs } public SeqColExtendedDataEntity setAttributeType(AttributeType attributeType) { @@ -138,7 +137,7 @@ public static SeqColExtendedDataEntity> constructSeqColSequencesObj public static SeqColExtendedDataEntity> constructSeqColSequencesMd5Object( AssemblySequenceEntity assemblySequenceEntity) throws IOException { SeqColExtendedDataEntity> seqColSequencesObject = new SeqColExtendedDataEntity>().setAttributeType( - AttributeType.md5DigestsOfSequences); + AttributeType.md5_sequences); JSONExtData> seqColSequencesArray = new JSONStringListExtData(); List sequencesList = new LinkedList<>(); @@ -160,7 +159,7 @@ public static SeqColExtendedDataEntity> constructSeqColSortedNameLe return null; // Names and Lengths entities are not compatible } SeqColExtendedDataEntity> SeqColSortedNameLengthPairsObject = new SeqColExtendedDataEntity>().setAttributeType( - AttributeType.sortedNameLengthPairs); + AttributeType.sorted_name_length_pairs); JSONExtData> seqColSortedNameLengthPairsArray = new JSONStringListExtData(); // Get the plain name-length pairs From fa7ac1a9fb814bbb4593b53b04cde3295d442168 Mon Sep 17 00:00:00 2001 From: waterflow80 Date: Fri, 24 Nov 2023 21:22:43 +0100 Subject: [PATCH 3/9] some refactoring --- .../eva/evaseqcol/service/SeqColLevelOneService.java | 10 ++++------ .../eva/evaseqcol/service/SeqColLevelTwoService.java | 12 ++++++------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelOneService.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelOneService.java index fecbff6..044a69f 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelOneService.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelOneService.java @@ -1,6 +1,5 @@ package uk.ac.ebi.eva.evaseqcol.service; -import com.vladmihalcea.hibernate.type.basic.Inet; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -9,7 +8,6 @@ import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity; import uk.ac.ebi.eva.evaseqcol.digests.DigestCalculator; import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity; -import uk.ac.ebi.eva.evaseqcol.refget.SHA512ChecksumCalculator; import uk.ac.ebi.eva.evaseqcol.repo.SeqColLevelOneRepository; import uk.ac.ebi.eva.evaseqcol.utils.JSONExtData; import uk.ac.ebi.eva.evaseqcol.utils.JSONIntegerListExtData; @@ -80,10 +78,10 @@ public SeqColLevelOneEntity constructSeqColLevelOne(List> md5SequencesExtEntity = new SeqColExtendedDataEntity<>(); - md5SequencesExtEntity.setAttributeType(SeqColExtendedDataEntity.AttributeType.md5DigestsOfSequences); + md5SequencesExtEntity.setAttributeType(SeqColExtendedDataEntity.AttributeType.md5_sequences); md5SequencesExtEntity.setExtendedSeqColData(md5SequencesExtData); md5SequencesExtEntity.setDigest(digestCalculator.getSha512Digest(md5SequencesExtData.toString())); // Lengths @@ -138,7 +136,7 @@ public SeqColLevelOneEntity constructSeqColLevelOne( namesExtEntity.setDigest(digestCalculator.getSha512Digest(namesExtData.toString())); //sorted-name-length-pairs SeqColExtendedDataEntity> sortedNameLengthPairsExtEntity = new SeqColExtendedDataEntity<>(); - sortedNameLengthPairsExtEntity.setAttributeType(SeqColExtendedDataEntity.AttributeType.sortedNameLengthPairs); + sortedNameLengthPairsExtEntity.setAttributeType(SeqColExtendedDataEntity.AttributeType.sorted_name_length_pairs); sortedNameLengthPairsExtEntity.setExtendedSeqColData(sortedNameLengthPairsData); sortedNameLengthPairsExtEntity.setDigest(digestCalculator.getSha512Digest(sortedNameLengthPairsData.toString())); diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelTwoService.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelTwoService.java index 6af73fa..999e3a5 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelTwoService.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelTwoService.java @@ -46,10 +46,10 @@ public Optional getSeqColLevelTwoByDigest(String digest) { case sequences: levelTwoEntity.setSequences(extendedStringTypeData.getExtendedSeqColData().getObject()); break; - case md5DigestsOfSequences: + case md5_sequences: levelTwoEntity.setMd5DigestsOfSequences(extendedStringTypeData.getExtendedSeqColData().getObject()); break; - case sortedNameLengthPairs: + case sorted_name_length_pairs: levelTwoEntity.setSortedNameLengthPairs(extendedStringTypeData.getExtendedSeqColData().getObject()); break; } @@ -79,7 +79,7 @@ private List>> getStringTypeExtendedAttrib if (!extendedMD5Sequences.isPresent()) { throw new RuntimeException("Extended md5 sequences data with digest:" + levelOneEntity.getSeqColLevel1Object().getMd5DigestsOfSequences() + " not found"); } - extendedMD5Sequences.get().setAttributeType(SeqColExtendedDataEntity.AttributeType.md5DigestsOfSequences); + extendedMD5Sequences.get().setAttributeType(SeqColExtendedDataEntity.AttributeType.md5_sequences); Optional>> extendedNames = extendedDataService.getExtendedAttributeByDigest(levelOneEntity.getSeqColLevel1Object().getNames()); if (!extendedNames.isPresent()) { @@ -91,7 +91,7 @@ private List>> getStringTypeExtendedAttrib if (!extendedSortedNameLengthPairs.isPresent()) { throw new RuntimeException("Extended names data with digest: " + levelOneEntity.getSeqColLevel1Object().getNames() + " not found"); } - extendedSortedNameLengthPairs.get().setAttributeType(SeqColExtendedDataEntity.AttributeType.sortedNameLengthPairs); + extendedSortedNameLengthPairs.get().setAttributeType(SeqColExtendedDataEntity.AttributeType.sorted_name_length_pairs); return Arrays.asList( extendedSequences.get(), @@ -125,10 +125,10 @@ public SeqColLevelTwoEntity constructSeqColL2(String level0Digest, case sequences: levelTwoEntity.setSequences(extendedStringTypeData.getExtendedSeqColData().getObject()); break; - case md5DigestsOfSequences: + case md5_sequences: levelTwoEntity.setMd5DigestsOfSequences(extendedStringTypeData.getExtendedSeqColData().getObject()); break; - case sortedNameLengthPairs: + case sorted_name_length_pairs: levelTwoEntity.setSortedNameLengthPairs(extendedStringTypeData.getExtendedSeqColData().getObject()); break; } From 34e7e50aadfec940b422c1f9776495caa72dd2e0 Mon Sep 17 00:00:00 2001 From: waterflow80 Date: Fri, 24 Nov 2023 21:23:09 +0100 Subject: [PATCH 4/9] updated the comparison function and fixed the bug --- .../eva/evaseqcol/service/SeqColService.java | 88 +++++++------------ .../evaseqcol/utils/SeqColMapConverter.java | 4 +- 2 files changed, 32 insertions(+), 60 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java index 3eab905..5c682ba 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java @@ -18,6 +18,8 @@ import uk.ac.ebi.eva.evaseqcol.exception.SeqColNotFoundException; import uk.ac.ebi.eva.evaseqcol.exception.UnableToLoadServiceInfoException; import uk.ac.ebi.eva.evaseqcol.utils.JSONExtData; +import uk.ac.ebi.eva.evaseqcol.utils.JSONIntegerListExtData; +import uk.ac.ebi.eva.evaseqcol.utils.JSONStringListExtData; import uk.ac.ebi.eva.evaseqcol.utils.SeqColMapConverter; import java.io.IOException; @@ -263,8 +265,8 @@ public SeqColComparisonResultEntity compareSeqCols(String seqColADigest, String public SeqColComparisonResultEntity compareSeqCols(String seqColADigest, SeqColLevelTwoEntity seqColAEntity, String seqColBDigest, SeqColLevelTwoEntity seqColBEntity) { - Map> seqColAMap = SeqColMapConverter.getSeqColLevelTwoMap(seqColAEntity); - Map> seqColBMap = SeqColMapConverter.getSeqColLevelTwoMap(seqColBEntity); + Map> seqColAMap = SeqColMapConverter.getSeqColLevelTwoMap(seqColAEntity); + Map> seqColBMap = SeqColMapConverter.getSeqColLevelTwoMap(seqColBEntity); return compareSeqCols(seqColADigest, seqColAMap, seqColBDigest, seqColBMap); } @@ -272,14 +274,14 @@ public SeqColComparisonResultEntity compareSeqCols(String seqColADigest, SeqColL * Compare two seqCol objects; an already saved one: seqColA, with pre-defined attributes, * and undefined one: seqColB (unknown attributes). BE CAREFUL: the order of the arguments matters!!. * Note: of course the seqCol minimal required attributes should be present*/ - public SeqColComparisonResultEntity compareSeqCols(String seqColADigest, Map> seqColBEntityMap) throws IOException { + public SeqColComparisonResultEntity compareSeqCols(String seqColADigest, Map> seqColBEntityMap) throws IOException { Optional seqColAEntity = levelTwoService.getSeqColLevelTwoByDigest(seqColADigest); // Calculating the seqColB level 0 digest String seqColBDigest = calculateSeqColLevelTwoMapDigest(seqColBEntityMap); // Converting seqColA object into a Map in order to handle attributes generically ( - Map> seqColAEntityMap = SeqColMapConverter.getSeqColLevelTwoMap(seqColAEntity.get()); + Map> seqColAEntityMap = SeqColMapConverter.getSeqColLevelTwoMap(seqColAEntity.get()); return compareSeqCols(seqColADigest, seqColAEntityMap, seqColBDigest, seqColBEntityMap); } @@ -287,7 +289,7 @@ public SeqColComparisonResultEntity compareSeqCols(String seqColADigest, Map> seqColAEntityMap, String seqColBDigest, Map> seqColBEntityMap) { + String seqColADigest, Map> seqColAEntityMap, String seqColBDigest, Map> seqColBEntityMap) { logger.info("Comparing seqCol " + seqColADigest + " and seqCol " + seqColBDigest); SeqColComparisonResultEntity comparisonResult = new SeqColComparisonResultEntity(); @@ -354,10 +356,10 @@ public SeqColComparisonResultEntity compareSeqCols( * ==> Same order elements * NOTE: Assuming that the method List.retainAll() preserves the order in the original list (no counterexample at the moment) * @see "https://github.com/ga4gh/seqcol-spec/blob/master/docs/decision_record.md#same-order-specification" */ - public boolean check_A_And_B_Same_Order(List elementsA, List elementsB) { - LinkedList elementsALocal = new LinkedList<>(elementsA); - LinkedList elementsBLocal = new LinkedList<>(elementsB); - List commonElements = getCommonElementsDistinct(elementsALocal, elementsBLocal); + public boolean check_A_And_B_Same_Order(List elementsA, List elementsB) { + LinkedList elementsALocal = new LinkedList<>(elementsA); + LinkedList elementsBLocal = new LinkedList<>(elementsB); + List commonElements = getCommonElementsDistinct(elementsALocal, elementsBLocal); elementsALocal.retainAll(commonElements); // Leaving only the common elements (keeping the original order to check) elementsBLocal.retainAll(commonElements); // Leaving only the common elements (keeping the original order to check) @@ -366,12 +368,14 @@ public boolean check_A_And_B_Same_Order(List elementsA, List ele /** * Construct a seqCol level 2 (Map representation) out of the given seqColL2Map*/ - public Map constructSeqColLevelOneMap(Map> seqColL2Map) throws IOException { + public Map constructSeqColLevelOneMap(Map> seqColL2Map) throws IOException { Map seqColL1Map = new TreeMap<>(); Set seqColAttributes = seqColL2Map.keySet(); // The set of the seqCol attributes ("lengths", "sequences", etc.) for (String attribute: seqColAttributes) { String attributeDigest = digestCalculator.getSha512Digest( - convertSeqColLevelTwoAttributeValuesToString(seqColL2Map.get(attribute))); + convertSeqColLevelTwoAttributeValuesToString(seqColL2Map.get(attribute), + SeqColExtendedDataEntity.AttributeType.valueOf( + attribute))); seqColL1Map.put(attribute, attributeDigest); } return seqColL1Map; @@ -379,7 +383,7 @@ public Map constructSeqColLevelOneMap(Map> /** * Return the level 0 digest of the given seqColLevelTwoMap, which is in the form of a Map (undefined attributes)*/ - public String calculateSeqColLevelTwoMapDigest(Map> seqColLevelTwoMap) throws IOException { + public String calculateSeqColLevelTwoMapDigest(Map> seqColLevelTwoMap) throws IOException { Map seqColLevelOne = constructSeqColLevelOneMap(seqColLevelTwoMap); String levelZeroDigest = calculateSeqColLevelOneMapDigest(seqColLevelOne); return levelZeroDigest; @@ -393,54 +397,22 @@ public String calculateSeqColLevelOneMapDigest(Map seqColLevelOn return levelZeroDigest; } - private boolean onlyDigits(String str) { - String regex = "[0-9]+"; - Pattern p = Pattern.compile(regex); - if (str == null) { - return false; - } - Matcher m = p.matcher(str); - return m.matches(); - } - - /** - * Check whether the given list contains only digits (in a form of strings)*/ - private boolean onlyDigitsStringList(List list) { - return list.isEmpty() || list.stream() - .allMatch(this::onlyDigits); - } - /** * Return a normalized string representation of the given seqColL2Attribute - * Note: This is the same method as the toString of the JSONExtData class*/ - private String convertSeqColLevelTwoAttributeValuesToString(List seqColL2Attribute) { - StringBuilder objectStr = new StringBuilder(); - objectStr.append("["); - if (onlyDigitsStringList(seqColL2Attribute)) { // Lengths array, No quotes "...". Eg: [1111, 222, 333] - for (int i=0; i seqColL2Attribute, SeqColExtendedDataEntity.AttributeType type) { + switch (type) { + case lengths: // List type + return JSONIntegerListExtData.toString((List) seqColL2Attribute); } - return objectStr.toString(); + // List types + return JSONStringListExtData.toString((List) seqColL2Attribute); } /** * Return a normalized seqCol representation of the given seqColLevelOneMap - * Note: This method is the same as the toString method of the SeqColLevelOneEntity class*/ + * Note: This method is the same as the toString method of the SeqColLevelOneEntity class + * // TODO: remove code duplicates*/ private String convertSeqColLevelOneAttributeToString(Map seqColLevelOneMap) { StringBuilder seqColStringRepresentation = new StringBuilder(); seqColStringRepresentation.append("{"); @@ -472,10 +444,10 @@ public List getUniqueElements(List list1, List list2) { /** * Return the list of the common elements between seqColAFields and seqColBFields (with no duplicates)*/ - public List getCommonElementsDistinct(List seqColAFields, List seqColBFields) { - List commonFields = new ArrayList<>(seqColAFields); + public List getCommonElementsDistinct(List seqColAFields, List seqColBFields) { + List commonFields = new ArrayList<>(seqColAFields); commonFields.retainAll(seqColBFields); - List commonFieldsDistinct = commonFields.stream().distinct().collect(Collectors.toList()); + List commonFieldsDistinct = (List) commonFields.stream().distinct().collect(Collectors.toList()); return commonFieldsDistinct; } @@ -508,7 +480,7 @@ public Integer getCommonElementsCount(List listA, List listB) { /** * Return true if there are less than two overlapping elements * @see 'https://github.com/ga4gh/seqcol-spec/blob/master/docs/decision_record.md#same-order-specification'*/ - public boolean lessThanTwoOverlappingElements(List list1, List list2) { + public boolean lessThanTwoOverlappingElements(List list1, List list2) { return getCommonElementsDistinct(list1, list2).size() < 2; } @@ -530,7 +502,7 @@ public boolean lessThanTwoOverlappingElements(List list1, List l * } * Unbalanced duplicates * @see 'https://github.com/ga4gh/seqcol-spec/blob/master/docs/decision_record.md#same-order-specification'*/ - public boolean unbalancedDuplicatesPresent(List listA, List listB) { + public boolean unbalancedDuplicatesPresent(List listA, List listB) { List commonElements = getCommonElementsDistinct(listA, listB); Map> duplicatesCountMap = new HashMap<>(); for (Object element: commonElements) { diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/SeqColMapConverter.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/SeqColMapConverter.java index 493d312..559959f 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/SeqColMapConverter.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/SeqColMapConverter.java @@ -31,9 +31,9 @@ public static Map getSeqColLevelOneMap(SeqColLevelOneEntity seqC * NOTE!: Not all the attributes will be returned, only the ones concerned by the comparison * NOTE!: The level 0 digest as well as the naming convention values will be lost */ - public static Map> getSeqColLevelTwoMap(SeqColLevelTwoEntity levelTwoEntity) { + public static Map> getSeqColLevelTwoMap(SeqColLevelTwoEntity levelTwoEntity) { ObjectMapper objectMapper = new ObjectMapper(); - Map> seqColMap = objectMapper.convertValue(levelTwoEntity, Map.class); + Map> seqColMap = objectMapper.convertValue(levelTwoEntity, Map.class); return seqColMap; } From 9a65f8b29e9aa0a93537a16a19546982d87e4e12 Mon Sep 17 00:00:00 2001 From: waterflow80 Date: Fri, 24 Nov 2023 21:25:38 +0100 Subject: [PATCH 5/9] tiny refactor in the SeqColMapConverterTest --- .../uk/ac/ebi/eva/evaseqcol/utils/SeqColMapConverterTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/uk/ac/ebi/eva/evaseqcol/utils/SeqColMapConverterTest.java b/src/test/java/uk/ac/ebi/eva/evaseqcol/utils/SeqColMapConverterTest.java index 37fc245..317b6e1 100644 --- a/src/test/java/uk/ac/ebi/eva/evaseqcol/utils/SeqColMapConverterTest.java +++ b/src/test/java/uk/ac/ebi/eva/evaseqcol/utils/SeqColMapConverterTest.java @@ -42,7 +42,7 @@ void setSeqColLevelOneMapConverterTest() { @Test void seqColLevelTwoMapConverterTest() { SeqColLevelTwoEntity levelTwoEntity = seqColGenerator.generateLevelTwoEntity(); - Map> levelTwoMap = seqColMapConverter.getSeqColLevelTwoMap(levelTwoEntity); + Map> levelTwoMap = seqColMapConverter.getSeqColLevelTwoMap(levelTwoEntity); assertFalse(levelTwoMap.keySet().isEmpty()); // At least we should have the "sequences", "lengths" and "names" assertTrue(levelTwoMap.containsKey("sequences")); assertTrue(levelTwoMap.containsKey("lengths")); From b20a61a3bfae006578f3dde9042fc5e76416c06d Mon Sep 17 00:00:00 2001 From: waterflow80 Date: Fri, 24 Nov 2023 21:26:19 +0100 Subject: [PATCH 6/9] added static to string method to remove code duplication in different classes --- .../utils/JSONIntegerListExtData.java | 7 ++++++ .../utils/JSONStringListExtData.java | 23 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONIntegerListExtData.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONIntegerListExtData.java index 09abf2a..7c019b8 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONIntegerListExtData.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONIntegerListExtData.java @@ -13,6 +13,13 @@ public JSONIntegerListExtData(List object) { super(object); } + /** + * The same as the Overridden toString method + * // TODO: we can get rid of this method for List types*/ + public static String toString(List object) { + return object.toString(); + } + @Override public String toString() { return this.object.toString(); diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONStringListExtData.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONStringListExtData.java index 4ec1066..0b1e28d 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONStringListExtData.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONStringListExtData.java @@ -13,6 +13,29 @@ public JSONStringListExtData(List object) { super(object); } + /** + * The same as the Overridden toString method + * Used to avoid code duplication in different classes + * // TODO: We can find a better way to avoid code duplication*/ + public static String toString(List object) { + StringBuilder objectStr = new StringBuilder(); + int arraySize = ((List) object).size(); + // Include quotes. Eg: ["aaa", "bbb", "ccc"]. + objectStr.append("["); + for (int i=0; i) object).get(i)); + objectStr.append("\""); + objectStr.append(","); + } + objectStr.append("\""); + objectStr.append(((List) object).get(arraySize-1)); + objectStr.append("\""); + objectStr.append("]"); + + return objectStr.toString(); + } + @Override public String toString() { StringBuilder objectStr = new StringBuilder(); From 140f852f148beb59662cd8e6d12b791c65119baa Mon Sep 17 00:00:00 2001 From: waterflow80 Date: Fri, 1 Dec 2023 13:38:53 +0100 Subject: [PATCH 7/9] updated the enum AttributeType --- .../entities/SeqColExtendedDataEntity.java | 27 ++++++++++++++++--- .../service/SeqColLevelOneService.java | 8 +++--- .../service/SeqColLevelTwoService.java | 12 ++++----- .../eva/evaseqcol/service/SeqColService.java | 7 +++-- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColExtendedDataEntity.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColExtendedDataEntity.java index 1133875..d5ec5c6 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColExtendedDataEntity.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/entities/SeqColExtendedDataEntity.java @@ -51,7 +51,28 @@ public class SeqColExtendedDataEntity { private SeqColEntity.NamingConvention namingConvention; public enum AttributeType { - names, sequences, md5_sequences, lengths, sorted_name_length_pairs + names("names"), + sequences("sequences"), + md5DigestsOfSequences("md5_sequences"), + lengths("lengths"), + sortedNameLengthPairs("sorted_name_length_pairs"); + + private String attrVal; + + AttributeType(String attrVal) { + this.attrVal = attrVal; + } + + /** + * Return the enum type name given the attribute val*/ + public static AttributeType fromAttributeVal(String attrVal) { + for (AttributeType b : AttributeType.values()) { + if (b.attrVal.equalsIgnoreCase(attrVal)) { + return b; + } + } + throw new IllegalArgumentException("No seqcol attribute with value " + attrVal + " found"); + } } public SeqColExtendedDataEntity setAttributeType(AttributeType attributeType) { @@ -137,7 +158,7 @@ public static SeqColExtendedDataEntity> constructSeqColSequencesObj public static SeqColExtendedDataEntity> constructSeqColSequencesMd5Object( AssemblySequenceEntity assemblySequenceEntity) throws IOException { SeqColExtendedDataEntity> seqColSequencesObject = new SeqColExtendedDataEntity>().setAttributeType( - AttributeType.md5_sequences); + AttributeType.md5DigestsOfSequences); JSONExtData> seqColSequencesArray = new JSONStringListExtData(); List sequencesList = new LinkedList<>(); @@ -159,7 +180,7 @@ public static SeqColExtendedDataEntity> constructSeqColSortedNameLe return null; // Names and Lengths entities are not compatible } SeqColExtendedDataEntity> SeqColSortedNameLengthPairsObject = new SeqColExtendedDataEntity>().setAttributeType( - AttributeType.sorted_name_length_pairs); + AttributeType.sortedNameLengthPairs); JSONExtData> seqColSortedNameLengthPairsArray = new JSONStringListExtData(); // Get the plain name-length pairs diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelOneService.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelOneService.java index 044a69f..2810409 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelOneService.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelOneService.java @@ -78,10 +78,10 @@ public SeqColLevelOneEntity constructSeqColLevelOne(List> md5SequencesExtEntity = new SeqColExtendedDataEntity<>(); - md5SequencesExtEntity.setAttributeType(SeqColExtendedDataEntity.AttributeType.md5_sequences); + md5SequencesExtEntity.setAttributeType(SeqColExtendedDataEntity.AttributeType.md5DigestsOfSequences); md5SequencesExtEntity.setExtendedSeqColData(md5SequencesExtData); md5SequencesExtEntity.setDigest(digestCalculator.getSha512Digest(md5SequencesExtData.toString())); // Lengths @@ -136,7 +136,7 @@ public SeqColLevelOneEntity constructSeqColLevelOne( namesExtEntity.setDigest(digestCalculator.getSha512Digest(namesExtData.toString())); //sorted-name-length-pairs SeqColExtendedDataEntity> sortedNameLengthPairsExtEntity = new SeqColExtendedDataEntity<>(); - sortedNameLengthPairsExtEntity.setAttributeType(SeqColExtendedDataEntity.AttributeType.sorted_name_length_pairs); + sortedNameLengthPairsExtEntity.setAttributeType(SeqColExtendedDataEntity.AttributeType.sortedNameLengthPairs); sortedNameLengthPairsExtEntity.setExtendedSeqColData(sortedNameLengthPairsData); sortedNameLengthPairsExtEntity.setDigest(digestCalculator.getSha512Digest(sortedNameLengthPairsData.toString())); diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelTwoService.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelTwoService.java index 999e3a5..6af73fa 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelTwoService.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColLevelTwoService.java @@ -46,10 +46,10 @@ public Optional getSeqColLevelTwoByDigest(String digest) { case sequences: levelTwoEntity.setSequences(extendedStringTypeData.getExtendedSeqColData().getObject()); break; - case md5_sequences: + case md5DigestsOfSequences: levelTwoEntity.setMd5DigestsOfSequences(extendedStringTypeData.getExtendedSeqColData().getObject()); break; - case sorted_name_length_pairs: + case sortedNameLengthPairs: levelTwoEntity.setSortedNameLengthPairs(extendedStringTypeData.getExtendedSeqColData().getObject()); break; } @@ -79,7 +79,7 @@ private List>> getStringTypeExtendedAttrib if (!extendedMD5Sequences.isPresent()) { throw new RuntimeException("Extended md5 sequences data with digest:" + levelOneEntity.getSeqColLevel1Object().getMd5DigestsOfSequences() + " not found"); } - extendedMD5Sequences.get().setAttributeType(SeqColExtendedDataEntity.AttributeType.md5_sequences); + extendedMD5Sequences.get().setAttributeType(SeqColExtendedDataEntity.AttributeType.md5DigestsOfSequences); Optional>> extendedNames = extendedDataService.getExtendedAttributeByDigest(levelOneEntity.getSeqColLevel1Object().getNames()); if (!extendedNames.isPresent()) { @@ -91,7 +91,7 @@ private List>> getStringTypeExtendedAttrib if (!extendedSortedNameLengthPairs.isPresent()) { throw new RuntimeException("Extended names data with digest: " + levelOneEntity.getSeqColLevel1Object().getNames() + " not found"); } - extendedSortedNameLengthPairs.get().setAttributeType(SeqColExtendedDataEntity.AttributeType.sorted_name_length_pairs); + extendedSortedNameLengthPairs.get().setAttributeType(SeqColExtendedDataEntity.AttributeType.sortedNameLengthPairs); return Arrays.asList( extendedSequences.get(), @@ -125,10 +125,10 @@ public SeqColLevelTwoEntity constructSeqColL2(String level0Digest, case sequences: levelTwoEntity.setSequences(extendedStringTypeData.getExtendedSeqColData().getObject()); break; - case md5_sequences: + case md5DigestsOfSequences: levelTwoEntity.setMd5DigestsOfSequences(extendedStringTypeData.getExtendedSeqColData().getObject()); break; - case sorted_name_length_pairs: + case sortedNameLengthPairs: levelTwoEntity.setSortedNameLengthPairs(extendedStringTypeData.getExtendedSeqColData().getObject()); break; } diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java index 5c682ba..38eecc7 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java @@ -33,8 +33,6 @@ import java.util.Optional; import java.util.Set; import java.util.TreeMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; @Service @@ -372,9 +370,10 @@ public Map constructSeqColLevelOneMap(Map> seqCo Map seqColL1Map = new TreeMap<>(); Set seqColAttributes = seqColL2Map.keySet(); // The set of the seqCol attributes ("lengths", "sequences", etc.) for (String attribute: seqColAttributes) { - String attributeDigest = digestCalculator.getSha512Digest( + String attributeDigest; + attributeDigest= digestCalculator.getSha512Digest( convertSeqColLevelTwoAttributeValuesToString(seqColL2Map.get(attribute), - SeqColExtendedDataEntity.AttributeType.valueOf( + SeqColExtendedDataEntity.AttributeType.fromAttributeVal( attribute))); seqColL1Map.put(attribute, attributeDigest); } From d66bc0ada4d38d117b331989d61f084dd1c1d125 Mon Sep 17 00:00:00 2001 From: waterflow80 Date: Fri, 1 Dec 2023 13:46:09 +0100 Subject: [PATCH 8/9] added a default block for the defult return value --- .../java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java index 38eecc7..3c81f27 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java @@ -403,9 +403,9 @@ private String convertSeqColLevelTwoAttributeValuesToString(List seqColL2Attr switch (type) { case lengths: // List type return JSONIntegerListExtData.toString((List) seqColL2Attribute); + default: // List types + return JSONStringListExtData.toString((List) seqColL2Attribute); } - // List types - return JSONStringListExtData.toString((List) seqColL2Attribute); } /** From 7deaef341eef87890678ad6935c9f66eeca8bb91 Mon Sep 17 00:00:00 2001 From: waterflow80 Date: Fri, 1 Dec 2023 14:11:49 +0100 Subject: [PATCH 9/9] resolved toString code duplication problem --- .../eva/evaseqcol/service/SeqColService.java | 4 ++-- .../utils/JSONIntegerListExtData.java | 7 ------ .../utils/JSONStringListExtData.java | 23 ------------------- 3 files changed, 2 insertions(+), 32 deletions(-) diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java index 3c81f27..3066beb 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java @@ -402,9 +402,9 @@ public String calculateSeqColLevelOneMapDigest(Map seqColLevelOn private String convertSeqColLevelTwoAttributeValuesToString(List seqColL2Attribute, SeqColExtendedDataEntity.AttributeType type) { switch (type) { case lengths: // List type - return JSONIntegerListExtData.toString((List) seqColL2Attribute); + return new JSONIntegerListExtData((List) seqColL2Attribute).toString(); default: // List types - return JSONStringListExtData.toString((List) seqColL2Attribute); + return new JSONStringListExtData((List) seqColL2Attribute).toString(); } } diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONIntegerListExtData.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONIntegerListExtData.java index 7c019b8..09abf2a 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONIntegerListExtData.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONIntegerListExtData.java @@ -13,13 +13,6 @@ public JSONIntegerListExtData(List object) { super(object); } - /** - * The same as the Overridden toString method - * // TODO: we can get rid of this method for List types*/ - public static String toString(List object) { - return object.toString(); - } - @Override public String toString() { return this.object.toString(); diff --git a/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONStringListExtData.java b/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONStringListExtData.java index 0b1e28d..4ec1066 100644 --- a/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONStringListExtData.java +++ b/src/main/java/uk/ac/ebi/eva/evaseqcol/utils/JSONStringListExtData.java @@ -13,29 +13,6 @@ public JSONStringListExtData(List object) { super(object); } - /** - * The same as the Overridden toString method - * Used to avoid code duplication in different classes - * // TODO: We can find a better way to avoid code duplication*/ - public static String toString(List object) { - StringBuilder objectStr = new StringBuilder(); - int arraySize = ((List) object).size(); - // Include quotes. Eg: ["aaa", "bbb", "ccc"]. - objectStr.append("["); - for (int i=0; i) object).get(i)); - objectStr.append("\""); - objectStr.append(","); - } - objectStr.append("\""); - objectStr.append(((List) object).get(arraySize-1)); - objectStr.append("\""); - objectStr.append("]"); - - return objectStr.toString(); - } - @Override public String toString() { StringBuilder objectStr = new StringBuilder();