-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
EVA-3501 - adding new endpoint for parsing fasta file data #80
Changes from 1 commit
8206af0
3b1926a
ddac513
91cc475
82dadd1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
import uk.ac.ebi.eva.evaseqcol.entities.AssemblySequenceEntity; | ||
import uk.ac.ebi.eva.evaseqcol.utils.GzipCompress; | ||
|
||
import java.io.ByteArrayInputStream; | ||
import java.io.FileInputStream; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
|
@@ -43,6 +44,17 @@ public NCBIAssemblySequenceDataSource(NCBIBrowserFactory factory, | |
this.readerFactory = readerFactory; | ||
} | ||
|
||
public Optional<AssemblySequenceEntity> getAssemblySequencesByAccession(String insdcAccession, String fastFileContent) throws IOException { | ||
AssemblySequenceEntity assemblySequenceEntity; | ||
try (InputStream stream = new ByteArrayInputStream(fastFileContent.getBytes())) { | ||
NCBIAssemblySequenceReader reader = readerFactory.build(stream, insdcAccession); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We'll probably have to refactor this since |
||
assemblySequenceEntity = reader.getAssemblySequencesEntity(); | ||
logger.info("FASTA file content with accession " + insdcAccession + " has been parsed successfully"); | ||
} | ||
|
||
return Optional.of(assemblySequenceEntity); | ||
} | ||
|
||
@Override | ||
public Optional<AssemblySequenceEntity> getAssemblySequencesByAccession(String accession) throws IOException, IllegalArgumentException { | ||
NCBIBrowser ncbiBrowser = factory.build(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
import uk.ac.ebi.eva.evaseqcol.utils.JSONLevelOne; | ||
|
||
import java.io.IOException; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
@@ -86,4 +87,27 @@ public Optional<Map<String, Object>> getAllPossibleSeqColExtendedData(String acc | |
|
||
return Optional.of(seqColResultData); | ||
} | ||
|
||
public Optional<Map<String, Object>> getAllPossibleSeqColExtendedData(String insdcAccession, String fastFileContent) throws IOException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This being a generic function for any fasta file, I'm not sure it makes sense to put it in |
||
Map<String, Object> seqColResultData = new HashMap<>(); | ||
|
||
// Fetching Sequence Entity (FASTA File) | ||
Optional<AssemblySequenceEntity> sequenceEntity = assemblySequenceDataSource.getAssemblySequencesByAccession(insdcAccession, fastFileContent); | ||
if (!sequenceEntity.isPresent()) { | ||
logger.error("Could not parse FASTA file content: "); | ||
return Optional.empty(); | ||
} | ||
logger.info("FASTA file have been parsed successfully"); | ||
|
||
Map<String, Object> sameValueAttributesMap = new HashMap<>(); | ||
sameValueAttributesMap.put("extendedLengths", SeqColExtendedDataEntity.constructSeqColLengthsObject(sequenceEntity.get())); | ||
sameValueAttributesMap.put("extendedSequences", SeqColExtendedDataEntity.constructSeqColSequencesObject(sequenceEntity.get())); | ||
sameValueAttributesMap.put("extendedMd5Sequences", SeqColExtendedDataEntity.constructSeqColSequencesMd5Object(sequenceEntity.get())); | ||
|
||
// Seqcol Result Data Map | ||
seqColResultData.put("sameValueAttributes", sameValueAttributesMap); | ||
seqColResultData.put("namesAttributes", Collections.singletonList(SeqColExtendedDataEntity | ||
.constructSeqColNamesObjectByNamingConvention(sequenceEntity.get(), SeqColEntity.NamingConvention.TEST))); | ||
return Optional.of(seqColResultData); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think the string parameter is being used anywhere. It is here to comply with the current object structure.
I guess we should at least change the variant name (and the doc) to make clear it does not have to be an INSDC assembly accession.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Positive
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree, it's not being used in the calculations but since we are using a lot of existing code that requires this value, I left it as it is. Was named insdcAccession, since it ultimately sets the insdcAccession value of AssemblySequenceEntity.
I have updated the parameter name to just accession.