Skip to content

Commit

Permalink
Merge pull request #67 from waterflow80/fixing-comparison-function
Browse files Browse the repository at this point in the history
Fixing comparison function
  • Loading branch information
waterflow80 authored Dec 2, 2023
2 parents 35556b1 + 7deaef3 commit ff31f18
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public ResponseEntity<?> compareSequenceCollections(
required = true) @PathVariable String digest1,
@Parameter(name = "seqColLevelTwo",
description = "SeqCol object level 2",
required = true) @RequestBody TreeMap<String, List<String>> seqColLevelTwo
required = true) @RequestBody TreeMap<String, List<?>> seqColLevelTwo
) {
try {
SeqColComparisonResultEntity comparisonResult = seqColService.compareSeqCols(digest1, seqColLevelTwo);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import javax.persistence.Transient;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
Expand Down Expand Up @@ -52,7 +51,28 @@ public class SeqColExtendedDataEntity<T> {
private SeqColEntity.NamingConvention namingConvention;

public enum AttributeType {
names, sequences, md5DigestsOfSequences, lengths, sortedNameLengthPairs
names("names"),
sequences("sequences"),
md5DigestsOfSequences("md5_sequences"),
lengths("lengths"),
sortedNameLengthPairs("sorted_name_length_pairs");

private String attrVal;

AttributeType(String attrVal) {
this.attrVal = attrVal;
}

/**
* Return the enum type name given the attribute val*/
public static AttributeType fromAttributeVal(String attrVal) {
for (AttributeType b : AttributeType.values()) {
if (b.attrVal.equalsIgnoreCase(attrVal)) {
return b;
}
}
throw new IllegalArgumentException("No seqcol attribute with value " + attrVal + " found");
}
}

public SeqColExtendedDataEntity<T> setAttributeType(AttributeType attributeType) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package uk.ac.ebi.eva.evaseqcol.service;

import com.vladmihalcea.hibernate.type.basic.Inet;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

Expand All @@ -9,7 +8,6 @@
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelOneEntity;
import uk.ac.ebi.eva.evaseqcol.digests.DigestCalculator;
import uk.ac.ebi.eva.evaseqcol.entities.SeqColLevelTwoEntity;
import uk.ac.ebi.eva.evaseqcol.refget.SHA512ChecksumCalculator;
import uk.ac.ebi.eva.evaseqcol.repo.SeqColLevelOneRepository;
import uk.ac.ebi.eva.evaseqcol.utils.JSONExtData;
import uk.ac.ebi.eva.evaseqcol.utils.JSONIntegerListExtData;
Expand Down
93 changes: 32 additions & 61 deletions src/main/java/uk/ac/ebi/eva/evaseqcol/service/SeqColService.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import uk.ac.ebi.eva.evaseqcol.exception.SeqColNotFoundException;
import uk.ac.ebi.eva.evaseqcol.exception.UnableToLoadServiceInfoException;
import uk.ac.ebi.eva.evaseqcol.utils.JSONExtData;
import uk.ac.ebi.eva.evaseqcol.utils.JSONIntegerListExtData;
import uk.ac.ebi.eva.evaseqcol.utils.JSONStringListExtData;
import uk.ac.ebi.eva.evaseqcol.utils.SeqColMapConverter;

import java.io.IOException;
Expand All @@ -32,8 +34,6 @@
import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

@Service
Expand Down Expand Up @@ -264,31 +264,31 @@ public SeqColComparisonResultEntity compareSeqCols(String seqColADigest, String

public SeqColComparisonResultEntity compareSeqCols(String seqColADigest, SeqColLevelTwoEntity seqColAEntity,
String seqColBDigest, SeqColLevelTwoEntity seqColBEntity) {
Map<String, List<String>> seqColAMap = SeqColMapConverter.getSeqColLevelTwoMap(seqColAEntity);
Map<String, List<String>> seqColBMap = SeqColMapConverter.getSeqColLevelTwoMap(seqColBEntity);
Map<String, List<?>> seqColAMap = SeqColMapConverter.getSeqColLevelTwoMap(seqColAEntity);
Map<String, List<?>> seqColBMap = SeqColMapConverter.getSeqColLevelTwoMap(seqColBEntity);
return compareSeqCols(seqColADigest, seqColAMap, seqColBDigest, seqColBMap);
}

/**
* Compare two seqCol objects; an already saved one: seqColA, with pre-defined attributes,
* and undefined one: seqColB (unknown attributes). BE CAREFUL: the order of the arguments matters!!.
* Note: of course the seqCol minimal required attributes should be present*/
public SeqColComparisonResultEntity compareSeqCols(String seqColADigest, Map<String, List<String>> seqColBEntityMap) throws IOException {
public SeqColComparisonResultEntity compareSeqCols(String seqColADigest, Map<String, List<?>> seqColBEntityMap) throws IOException {
Optional<SeqColLevelTwoEntity> seqColAEntity = levelTwoService.getSeqColLevelTwoByDigest(seqColADigest);

// Calculating the seqColB level 0 digest
String seqColBDigest = calculateSeqColLevelTwoMapDigest(seqColBEntityMap);

// Converting seqColA object into a Map in order to handle attributes generically (
Map<String, List<String>> seqColAEntityMap = SeqColMapConverter.getSeqColLevelTwoMap(seqColAEntity.get());
Map<String, List<?>> seqColAEntityMap = SeqColMapConverter.getSeqColLevelTwoMap(seqColAEntity.get());

return compareSeqCols(seqColADigest, seqColAEntityMap, seqColBDigest, seqColBEntityMap);
}

/**
* Compare two seqCol L2 objects*/
public SeqColComparisonResultEntity compareSeqCols(
String seqColADigest, Map<String,List<String>> seqColAEntityMap, String seqColBDigest, Map<String, List<String>> seqColBEntityMap) {
String seqColADigest, Map<String,List<?>> seqColAEntityMap, String seqColBDigest, Map<String, List<?>> seqColBEntityMap) {

logger.info("Comparing seqCol " + seqColADigest + " and seqCol " + seqColBDigest);
SeqColComparisonResultEntity comparisonResult = new SeqColComparisonResultEntity();
Expand Down Expand Up @@ -355,10 +355,10 @@ public SeqColComparisonResultEntity compareSeqCols(
* ==> Same order elements
* NOTE: Assuming that the method List.retainAll() preserves the order in the original list (no counterexample at the moment)
* @see "https://github.com/ga4gh/seqcol-spec/blob/master/docs/decision_record.md#same-order-specification" */
public boolean check_A_And_B_Same_Order(List<String> elementsA, List<String> elementsB) {
LinkedList<String> elementsALocal = new LinkedList<>(elementsA);
LinkedList<String> elementsBLocal = new LinkedList<>(elementsB);
List<String> commonElements = getCommonElementsDistinct(elementsALocal, elementsBLocal);
public boolean check_A_And_B_Same_Order(List<?> elementsA, List<?> elementsB) {
LinkedList<?> elementsALocal = new LinkedList<>(elementsA);
LinkedList<?> elementsBLocal = new LinkedList<>(elementsB);
List<?> commonElements = getCommonElementsDistinct(elementsALocal, elementsBLocal);
elementsALocal.retainAll(commonElements); // Leaving only the common elements (keeping the original order to check)
elementsBLocal.retainAll(commonElements); // Leaving only the common elements (keeping the original order to check)

Expand All @@ -367,20 +367,23 @@ public boolean check_A_And_B_Same_Order(List<String> elementsA, List<String> ele

/**
* Construct a seqCol level 2 (Map representation) out of the given seqColL2Map*/
public Map<String, String> constructSeqColLevelOneMap(Map<String, List<String>> seqColL2Map) throws IOException {
public Map<String, String> constructSeqColLevelOneMap(Map<String, List<?>> seqColL2Map) throws IOException {
Map<String, String> seqColL1Map = new TreeMap<>();
Set<String> seqColAttributes = seqColL2Map.keySet(); // The set of the seqCol attributes ("lengths", "sequences", etc.)
for (String attribute: seqColAttributes) {
String attributeDigest = digestCalculator.getSha512Digest(
convertSeqColLevelTwoAttributeValuesToString(seqColL2Map.get(attribute)));
String attributeDigest;
attributeDigest= digestCalculator.getSha512Digest(
convertSeqColLevelTwoAttributeValuesToString(seqColL2Map.get(attribute),
SeqColExtendedDataEntity.AttributeType.fromAttributeVal(
attribute)));
seqColL1Map.put(attribute, attributeDigest);
}
return seqColL1Map;
}

/**
* Return the level 0 digest of the given seqColLevelTwoMap, which is in the form of a Map (undefined attributes)*/
public String calculateSeqColLevelTwoMapDigest(Map<String, List<String>> seqColLevelTwoMap) throws IOException {
public String calculateSeqColLevelTwoMapDigest(Map<String, List<?>> seqColLevelTwoMap) throws IOException {
Map<String, String> seqColLevelOne = constructSeqColLevelOneMap(seqColLevelTwoMap);
String levelZeroDigest = calculateSeqColLevelOneMapDigest(seqColLevelOne);
return levelZeroDigest;
Expand All @@ -394,54 +397,22 @@ public String calculateSeqColLevelOneMapDigest(Map<String, String> seqColLevelOn
return levelZeroDigest;
}

private boolean onlyDigits(String str) {
String regex = "[0-9]+";
Pattern p = Pattern.compile(regex);
if (str == null) {
return false;
}
Matcher m = p.matcher(str);
return m.matches();
}

/**
* Check whether the given list contains only digits (in a form of strings)*/
private boolean onlyDigitsStringList(List<String> list) {
return list.isEmpty() || list.stream()
.allMatch(this::onlyDigits);
}

/**
* Return a normalized string representation of the given seqColL2Attribute
* Note: This is the same method as the toString of the JSONExtData class*/
private String convertSeqColLevelTwoAttributeValuesToString(List<String> seqColL2Attribute) {
StringBuilder objectStr = new StringBuilder();
objectStr.append("[");
if (onlyDigitsStringList(seqColL2Attribute)) { // Lengths array, No quotes "...". Eg: [1111, 222, 333]
for (int i=0; i<seqColL2Attribute.size()-1; i++) {
objectStr.append(seqColL2Attribute.get(i));
objectStr.append(",");
}
objectStr.append(seqColL2Attribute.get(seqColL2Attribute.size()-1));
objectStr.append("]");
} else { // Not a lengths array. Include quotes. Eg: ["aaa", "bbb", "ccc"].
for (int i=0; i<seqColL2Attribute.size()-1; i++) {
objectStr.append("\"");
objectStr.append(seqColL2Attribute.get(i));
objectStr.append("\"");
objectStr.append(",");
}
objectStr.append("\"");
objectStr.append(seqColL2Attribute.get(seqColL2Attribute.size()-1));
objectStr.append("\"");
objectStr.append("]");
* //TODO: we can find a better way to identify the given type in a more generic way*/
private String convertSeqColLevelTwoAttributeValuesToString(List<?> seqColL2Attribute, SeqColExtendedDataEntity.AttributeType type) {
switch (type) {
case lengths: // List<Integer> type
return new JSONIntegerListExtData((List<Integer>) seqColL2Attribute).toString();
default: // List<String> types
return new JSONStringListExtData((List<String>) seqColL2Attribute).toString();
}
return objectStr.toString();
}

/**
* Return a normalized seqCol representation of the given seqColLevelOneMap
* Note: This method is the same as the toString method of the SeqColLevelOneEntity class*/
* Note: This method is the same as the toString method of the SeqColLevelOneEntity class
* // TODO: remove code duplicates*/
private String convertSeqColLevelOneAttributeToString(Map<String, String> seqColLevelOneMap) {
StringBuilder seqColStringRepresentation = new StringBuilder();
seqColStringRepresentation.append("{");
Expand Down Expand Up @@ -473,10 +444,10 @@ public List<String> getUniqueElements(List<String> list1, List<String> list2) {

/**
* Return the list of the common elements between seqColAFields and seqColBFields (with no duplicates)*/
public List<String> getCommonElementsDistinct(List<String> seqColAFields, List<String> seqColBFields) {
List<String> commonFields = new ArrayList<>(seqColAFields);
public List<String> getCommonElementsDistinct(List<?> seqColAFields, List<?> seqColBFields) {
List<?> commonFields = new ArrayList<>(seqColAFields);
commonFields.retainAll(seqColBFields);
List<String> commonFieldsDistinct = commonFields.stream().distinct().collect(Collectors.toList());
List<String> commonFieldsDistinct = (List<String>) commonFields.stream().distinct().collect(Collectors.toList());
return commonFieldsDistinct;
}

Expand All @@ -493,7 +464,7 @@ public Integer getCommonElementsCount(List<?> listA, List<?> listB) {
/**
* Return true if there are less than two overlapping elements
* @see 'https://github.com/ga4gh/seqcol-spec/blob/master/docs/decision_record.md#same-order-specification'*/
public boolean lessThanTwoOverlappingElements(List<String> list1, List<String> list2) {
public boolean lessThanTwoOverlappingElements(List<?> list1, List<?> list2) {
return getCommonElementsDistinct(list1, list2).size() < 2;
}

Expand All @@ -515,7 +486,7 @@ public boolean lessThanTwoOverlappingElements(List<String> list1, List<String> l
* }
* Unbalanced duplicates
* @see 'https://github.com/ga4gh/seqcol-spec/blob/master/docs/decision_record.md#same-order-specification'*/
public boolean unbalancedDuplicatesPresent(List<String> listA, List<String> listB) {
public boolean unbalancedDuplicatesPresent(List<?> listA, List<?> listB) {
List<?> commonElements = getCommonElementsDistinct(listA, listB);
Map<Object, Map<String, Integer>> duplicatesCountMap = new HashMap<>();
for (Object element: commonElements) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ public static Map<String, String> getSeqColLevelOneMap(SeqColLevelOneEntity seqC
* NOTE!: Not all the attributes will be returned, only the ones concerned by the comparison
* NOTE!: The level 0 digest as well as the naming convention values will be lost
*/
public static Map<String, List<String>> getSeqColLevelTwoMap(SeqColLevelTwoEntity levelTwoEntity) {
public static Map<String, List<?>> getSeqColLevelTwoMap(SeqColLevelTwoEntity levelTwoEntity) {
ObjectMapper objectMapper = new ObjectMapper();
Map<String, List<String>> seqColMap = objectMapper.convertValue(levelTwoEntity, Map.class);
Map<String, List<?>> seqColMap = objectMapper.convertValue(levelTwoEntity, Map.class);
return seqColMap;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void setSeqColLevelOneMapConverterTest() {
@Test
void seqColLevelTwoMapConverterTest() {
SeqColLevelTwoEntity levelTwoEntity = seqColGenerator.generateLevelTwoEntity();
Map<String, List<String>> levelTwoMap = seqColMapConverter.getSeqColLevelTwoMap(levelTwoEntity);
Map<String, List<?>> levelTwoMap = seqColMapConverter.getSeqColLevelTwoMap(levelTwoEntity);
assertFalse(levelTwoMap.keySet().isEmpty()); // At least we should have the "sequences", "lengths" and "names"
assertTrue(levelTwoMap.containsKey("sequences"));
assertTrue(levelTwoMap.containsKey("lengths"));
Expand Down

0 comments on commit ff31f18

Please sign in to comment.