Skip to content

Commit

Permalink
Merge pull request #706 from opencb/TASK-6647-dev
Browse files Browse the repository at this point in the history
TASK-6647 - Fix Port Patch 1.10.4 -> 2.2.1 develop
  • Loading branch information
juanfeSanahuja authored Aug 8, 2024
2 parents 25b9dd3 + dee7972 commit 60860ed
Show file tree
Hide file tree
Showing 31 changed files with 1,252 additions and 244 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.beust.jcommander.*;
import org.opencb.cellbase.app.cli.CliOptionsParser;
import org.opencb.cellbase.core.api.key.ApiKeyQuota;
import org.opencb.cellbase.lib.EtlCommons;

import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -74,6 +75,7 @@ public AdminCliOptionsParser() {
jCommander.addCommand("validate", validationCommandOptions);
}

@Override
public void parse(String[] args) throws ParameterException {
jCommander.parse(args);
}
Expand All @@ -87,9 +89,13 @@ public class DownloadCommandOptions {
@ParametersDelegate
public SpeciesAndAssemblyCommandOptions speciesAndAssemblyOptions = speciesAndAssemblyCommandOptions;

@Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download: genome, gene, "
+ "variation, variation_functional_score, regulation, protein, conservation, "
+ "clinical_variants, repeats, svs, pubmed and 'all' to download everything", required = true, arity = 1)
@Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download:"
+ EtlCommons.GENOME_DATA + ", " + EtlCommons.GENE_DATA + ", " + EtlCommons.VARIATION_DATA + ", "
+ EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA + ", " + EtlCommons.MISSENSE_VARIATION_SCORE_DATA + ", "
+ EtlCommons.REGULATION_DATA + ", " + EtlCommons.PROTEIN_DATA + ", " + EtlCommons.CONSERVATION_DATA + ", "
+ EtlCommons.CLINICAL_VARIANTS_DATA + ", " + EtlCommons.REPEATS_DATA + ", " + EtlCommons.OBO_DATA + ", "
+ EtlCommons.PUBMED_DATA + ", " + EtlCommons.PHARMACOGENOMICS_DATA + "; and 'all' to download everything",
required = true, arity = 1)
public String data;

@Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true, arity = 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import java.util.Collections;
import java.util.List;

import static org.opencb.cellbase.lib.EtlCommons.PHARMGKB_DATA;
import static org.opencb.cellbase.lib.EtlCommons.*;

/**
* Created by imedina on 03/02/15.
Expand Down Expand Up @@ -132,6 +132,9 @@ public void execute() {
case EtlCommons.REFSEQ_DATA:
parser = buildRefSeq();
break;
case EtlCommons.VARIATION_DATA:
parser = buildVariation();
break;
case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA:
parser = buildCadd();
break;
Expand Down Expand Up @@ -275,6 +278,21 @@ private CellBaseBuilder buildRefSeq() {
return new RefSeqGeneBuilder(refseqFolderPath, speciesConfiguration, serializer);
}

private CellBaseBuilder buildVariation() throws IOException {
Path downloadVariationPath = downloadFolder.resolve(VARIATION_DATA);
Path buildVariationPath = buildFolder.resolve(VARIATION_DATA);
if (!buildVariationPath.toFile().exists()) {
buildVariationPath.toFile().mkdirs();
}

CellBaseFileSerializer variationSerializer = new CellBaseJsonFileSerializer(buildVariationPath);

// Currently, only dbSNP data
Files.copy(downloadVariationPath.resolve(DBSNP_VERSION_FILENAME), buildVariationPath.resolve(DBSNP_VERSION_FILENAME),
StandardCopyOption.REPLACE_EXISTING);
return new VariationBuilder(downloadVariationPath, variationSerializer, configuration);
}

private CellBaseBuilder buildCadd() {
Path variationFunctionalScorePath = downloadFolder.resolve("variation_functional_score");
copyVersionFiles(Arrays.asList(variationFunctionalScorePath.resolve("caddVersion.json")));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ public void execute() {
case EtlCommons.GENE_DATA:
downloadFiles.addAll(downloader.downloadGene());
break;
// case EtlCommons.VARIATION_DATA:
// downloadManager.downloadVariation();
// break;
case EtlCommons.VARIATION_DATA:
downloadFiles.addAll(downloader.downloadVariation());
break;
case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA:
downloadFiles.addAll(downloader.downloadCaddScores());
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import java.util.List;
import java.util.concurrent.ExecutionException;

import static org.opencb.cellbase.lib.EtlCommons.*;

/**
* Created by imedina on 03/02/15.
*/
Expand Down Expand Up @@ -372,30 +374,57 @@ private void checkParameters() throws CellBaseException {
private void loadVariationData() throws NoSuchMethodException, InterruptedException, ExecutionException,
InstantiationException, IllegalAccessException, InvocationTargetException, ClassNotFoundException,
IOException, LoaderException, CellBaseException {
Path variationPath = input.resolve(VARIATION_DATA);
// First load data
// Common loading process from CellBase variation data models
if (field == null) {
DirectoryStream<Path> stream = Files.newDirectoryStream(input,
// Common loading process from CellBase variation data models
DirectoryStream<Path> stream = Files.newDirectoryStream(variationPath,
entry -> entry.getFileName().toString().startsWith("variation_chr"));

int numLoadings = 0;
for (Path entry : stream) {
logger.info("Loading file '{}'", entry);
loadRunner.load(input.resolve(entry.getFileName()), "variation", dataRelease);
loadRunner.load(variationPath.resolve(entry.getFileName()), "variation", dataRelease);
numLoadings++;
}

// Create index
createIndex("variation");

// Update release (collection and sources)
List<Path> sources = new ArrayList<>(Arrays.asList(
input.resolve("ensemblVariationVersion.json")
));
dataReleaseManager.update(dataRelease, "variation", EtlCommons.VARIATION_DATA, sources);
if (numLoadings > 0) {
// Create index
createIndex("variation");

// Update release (collection and sources)
List<Path> sources = new ArrayList<>(Arrays.asList(
variationPath.resolve("ensemblVariationVersion.json")
));
dataReleaseManager.update(dataRelease, "variation", EtlCommons.VARIATION_DATA, sources);
} else {
logger.info("Any variation file 'variation_chr...' found within folder '{}'", variationPath);
}
} else {
// Custom update required e.g. population freqs loading
logger.info("Loading file '{}'", variationPath);
loadRunner.load(variationPath, "variation", dataRelease, field, innerFields);
}

// Load dbSNP
Path dbSnpFilePath = variationPath.resolve(DBSNP_NAME + ".json.gz");
if (dbSnpFilePath.toFile().exists()) {
if (variationPath.resolve(DBSNP_VERSION_FILENAME).toFile().exists()) {
logger.info("Loading dbSNP file '{}'", dbSnpFilePath);
loadRunner.load(dbSnpFilePath, SNP_COLLECTION_NAME, dataRelease);

// Create index
createIndex(SNP_COLLECTION_NAME);

// Update release (collection and sources)
List<Path> sources = Collections.singletonList(variationPath.resolve(DBSNP_VERSION_FILENAME));
dataReleaseManager.update(dataRelease, SNP_COLLECTION_NAME, EtlCommons.VARIATION_DATA, sources);
} else {
logger.warn("In order to load the dbSNP file you need the version file {} within the folder '{}'", DBSNP_VERSION_FILENAME,
variationPath);
}
} else {
logger.info("Loading file '{}'", input);
loadRunner.load(input, "variation", dataRelease, field, innerFields);
logger.warn("Any dbSNP file found within the folder '{}'", variationPath);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.opencb.cellbase.client.rest;

import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.core.Snp;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.*;
import org.opencb.cellbase.client.config.ClientConfiguration;
Expand Down Expand Up @@ -236,6 +237,14 @@ public CellBaseDataResponse<String> getAllConsequenceTypes(Query query) throws I
return execute("consequenceTypes", query, new QueryOptions(), String.class);
}

public CellBaseDataResponse<Snp> searchSnp(Query query, QueryOptions options) throws IOException {
return execute("snp/search", query, options, Snp.class);
}

public CellBaseDataResponse<Snp> startsWithSnp(Query query, QueryOptions options) throws IOException {
return execute("snp/startsWith", query, options, Snp.class);
}

// public CellBaseDataResponse<String> getConsequenceTypeById(String id, QueryOptions options) throws IOException {
// return execute(id, "consequence_type", options, String.class);
// }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,26 @@

import org.apache.avro.specific.SpecificRecordBase;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.Assumptions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ArgumentsSource;
import org.opencb.biodata.models.core.Snp;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.ConsequenceType;
import org.opencb.biodata.models.variant.avro.VariantAnnotation;
import org.opencb.cellbase.client.config.ClientConfiguration;
import org.opencb.cellbase.client.config.RestConfig;
import org.opencb.cellbase.core.common.GitRepositoryState;
import org.opencb.cellbase.core.models.DataRelease;
import org.opencb.cellbase.core.result.CellBaseDataResponse;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.utils.VersionUtils;

import java.util.*;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -139,6 +149,92 @@ public void getAllConsequenceTypes(CellBaseClient cellBaseClient) throws Excepti
assertNotNull(response.firstResult(), "List of all the consequence types present should be returned");
}

@Test
public void testSearchSnpBydbSnpId() throws Exception {
int dataRelease = 7;
ClientConfiguration clientConfiguration = new ClientConfiguration()
.setDefaultSpecies("hsapiens")
.setVersion("v5.8.3-SNAPSHOT")
.setRest(new RestConfig(Collections.singletonList("https://ws.zettagenomics.com/cellbase"), 2000));

CellBaseClient client = new CellBaseClient(clientConfiguration);

// Assumptions before running the test
ObjectMap result = client.getMetaClient().about().firstResult();
Assumptions.assumeTrue(VersionUtils.isMinVersion("5.8.3-SNAPSHOT", result.getString("Version")));
CellBaseDataResponse<DataRelease> dataReleaseResponse = client.getMetaClient().dataReleases();
Assumptions.assumeTrue(dataReleaseResponse.getResponses().get(0).getResults().stream().map(DataRelease::getRelease).collect(Collectors.toList()).contains(dataRelease));

Query query = new Query();
query.put("id", "rs1570391602,rs41278952");
query.put("dataRelease", dataRelease);

CellBaseDataResponse<Snp> response = client.getVariantClient().searchSnp(query, new QueryOptions());
assertEquals(2, response.getResponses().get(0).getNumResults());
assertEquals("rs1570391602", response.getResponses().get(0).getResults().get(0).getId());
assertEquals("rs41278952", response.getResponses().get(0).getResults().get(1).getId());
}

@Test
public void testSearchSnpByPosition() throws Exception {
int dataRelease = 7;
ClientConfiguration clientConfiguration = new ClientConfiguration()
.setDefaultSpecies("hsapiens")
.setVersion("v5.8.3-SNAPSHOT")
.setRest(new RestConfig(Collections.singletonList("https://ws.zettagenomics.com/cellbase"), 2000));

CellBaseClient client = new CellBaseClient(clientConfiguration);

// Assumptions before running the test
ObjectMap result = client.getMetaClient().about().firstResult();
Assumptions.assumeTrue(VersionUtils.isMinVersion("5.8.3-SNAPSHOT", result.getString("Version")));
CellBaseDataResponse<DataRelease> dataReleaseResponse = client.getMetaClient().dataReleases();
Assumptions.assumeTrue(dataReleaseResponse.getResponses().get(0).getResults().stream().map(DataRelease::getRelease).collect(Collectors.toList()).contains(dataRelease));

Query query = new Query();
query.put("chromosome", "1");
query.put("position", "56948509");
query.put("reference", "T");
query.put("dataRelease", dataRelease);

CellBaseDataResponse<Snp> response = client.getVariantClient().searchSnp(query, new QueryOptions());
assertEquals(1, response.getResponses().get(0).getNumResults());
assertEquals("rs1570391602", response.getResponses().get(0).getResults().get(0).getId());
assertEquals(query.getInt("position"), response.getResponses().get(0).getResults().get(0).getPosition());
assertEquals(query.get("reference"), response.getResponses().get(0).getResults().get(0).getReference());
assertEquals(1, response.getResponses().get(0).getResults().get(0).getAlternates().size());
assertEquals("G", response.getResponses().get(0).getResults().get(0).getAlternates().get(0));
}

@Test
public void testStarsWithSnp() throws Exception {
int dataRelease = 7;
ClientConfiguration clientConfiguration = new ClientConfiguration()
.setDefaultSpecies("hsapiens")
.setVersion("v5.8.3-SNAPSHOT")
.setRest(new RestConfig(Collections.singletonList("https://ws.zettagenomics.com/cellbase"), 2000));

CellBaseClient client = new CellBaseClient(clientConfiguration);

// Assumptions before running the test
ObjectMap result = client.getMetaClient().about().firstResult();
Assumptions.assumeTrue(VersionUtils.isMinVersion("5.8.3-SNAPSHOT", result.getString("Version")));
CellBaseDataResponse<DataRelease> dataReleaseResponse = client.getMetaClient().dataReleases();
Assumptions.assumeTrue(dataReleaseResponse.getResponses().get(0).getResults().stream().map(DataRelease::getRelease).collect(Collectors.toList()).contains(dataRelease));

Query query = new Query();
query.put("id", "rs157039161");
query.put("dataRelease", dataRelease);

CellBaseDataResponse<Snp> response = client.getVariantClient().startsWithSnp(query, new QueryOptions());
assertEquals(9, response.getResponses().get(0).getNumResults());
for (Snp snp : response.getResponses().get(0).getResults()) {
if (!snp.getId().startsWith(query.getString("id"))) {
fail();
}
}
}

// @Test
// public void getConsequenceTypeById() throws Exception {
// CellBaseDataResponse<String> stringCellBaseDataResponse = cellBaseClient.getVariantClient().getConsequenceTypeById("22:35490160:G:A", null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ public Type type() {
public static final String VERSION_DESCRIPTION = "API version, e.g.: " + DEFAULT_VERSION;

public static final String DATA_RELEASE_PARAM = "dataRelease";
public static final String DATA_RELEASE_DESCRIPTION = "Data release. To use the default data release, set this to 0. To get the list"
+ " of available data release, please call the endpoint 'meta/dataReleases'";
public static final String DATA_RELEASE_DESCRIPTION = "Data release. To get the list of available data releases, please call the"
+ " endpoint 'meta/dataReleases'";

public static final String API_KEY_PARAM = "apiKey";
public static final String API_KEY_DESCRIPTION = "API key to allow access to licensed/restricted data sources such as"
Expand Down
Loading

0 comments on commit 60860ed

Please sign in to comment.