diff --git a/.github/workflows/check-junit-test.yml b/.github/workflows/check-junit-test.yml index f953642c9eb..4331f156cf5 100644 --- a/.github/workflows/check-junit-test.yml +++ b/.github/workflows/check-junit-test.yml @@ -51,6 +51,5 @@ jobs: uses: ./.github/workflows/test-analysis.yml with: test_profile: ${{ needs.get_profiles.outputs.profiles }} - report_context: development mvn_opts: ${{ inputs.mvn_opts }} secrets: inherit \ No newline at end of file diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 6577a417189..50c396dd7cc 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -9,34 +9,26 @@ on: jobs: build: - uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@release-4.9.x + uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop with: maven_opts: -P storage-hadoop,hdp3.1,RClient,opencga-storage-hadoop-deps -Dopencga.war.name=opencga -Dcheckstyle.skip -pl '!:opencga-storage-hadoop-deps-emr6.1,!:opencga-storage-hadoop-deps-hdp2.6' - deploy-docker-ext-tools: - uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@release-4.9.x - needs: build - with: - cli: python3 ./build/cloud/docker/docker-build.py push --images ext-tools --tag ${{ needs.build.outputs.version }} - secrets: inherit - test: uses: ./.github/workflows/test-analysis.yml - needs: deploy-docker-ext-tools - secrets: inherit + needs: build with: test_profile: runShortTests,runMediumTests - report_context: development + secrets: inherit deploy-maven: - uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@release-4.9.x + uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@develop needs: test with: maven_opts: -P storage-hadoop,hdp3.1 -Dopencga.war.name=opencga secrets: inherit deploy-docker: - uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@release-4.9.x + uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop needs: test with: cli: python3 ./build/cloud/docker/docker-build.py push --images base,init diff --git a/.github/workflows/docker-aws-emr.yml b/.github/workflows/docker-aws-emr.yml index b5bae3ed5ed..b44a777134d 100644 --- a/.github/workflows/docker-aws-emr.yml +++ b/.github/workflows/docker-aws-emr.yml @@ -8,15 +8,13 @@ on: jobs: build: - uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@release-4.9.x + uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop with: maven_opts: -P storage-hadoop,emr6.1 -Dopencga.war.name=opencga deploy-docker: - uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@release-4.9.x + uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop needs: build with: cli: python3 ./build/cloud/docker/docker-build.py push --images base,init secrets: inherit -# DOCKER_HUB_USER: ${{ secrets.DOCKER_HUB_USER }} -# DOCKER_HUB_PASSWORD: ${{ secrets.DOCKER_HUB_PASSWORD }} diff --git a/.github/workflows/docker-tools.yml b/.github/workflows/docker-ext-tools.yml similarity index 81% rename from .github/workflows/docker-tools.yml rename to .github/workflows/docker-ext-tools.yml index d4b80e913e4..07062f03278 100644 --- a/.github/workflows/docker-tools.yml +++ b/.github/workflows/docker-ext-tools.yml @@ -1,4 +1,4 @@ -name: Deploy Docker Tools +name: Deploy Docker Ext-Tools on: push: @@ -12,13 +12,12 @@ on: jobs: build: - uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@release-4.9.x - with: - maven_opts: + uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop deploy-docker-ext-tools: - uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@release-4.9.x + uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop needs: build with: cli: python3 ./build/cloud/docker/docker-build.py push --images ext-tools --tag ${{ needs.build.outputs.version }} secrets: inherit + diff --git a/.github/workflows/long-test-analysis.yml b/.github/workflows/long-test-analysis.yml index f516eb25029..c898c4dfa0b 100644 --- a/.github/workflows/long-test-analysis.yml +++ b/.github/workflows/long-test-analysis.yml @@ -1,15 +1,17 @@ name: Long test for run only on weekend -#Every Saturday at 01:10 + +## Every Saturday at 01:10 on: schedule: - cron: '10 1 * * 6' + workflow_dispatch: jobs: test: uses: ./.github/workflows/test-analysis.yml secrets: inherit with: - test_profile: runLongTests,runMediumTests,runShortTests - report_context: development + test_profile: runShortTests,runMediumTests,runLongTests, + diff --git a/.github/workflows/pull-request-merge.yml b/.github/workflows/pull-request-merge.yml index 54a63c56149..3e3e5bb46b9 100644 --- a/.github/workflows/pull-request-merge.yml +++ b/.github/workflows/pull-request-merge.yml @@ -3,14 +3,17 @@ name: "Pull Request Merge Workflow" on: pull_request: branches: - - "develop" - - "release-*" + - develop + - release-* types: - closed jobs: + build: + uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop + delete-docker: - uses: opencb/java-common-libs/.github/workflows/delete-docker-hub-workflow.yml@release-4.9.x + uses: opencb/java-common-libs/.github/workflows/delete-docker-hub-workflow.yml@develop with: cli: python3 ./build/cloud/docker/docker-build.py delete --images base --tag ${{ github.head_ref }} secrets: inherit diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 388e4aacf92..b7fccc7fa3f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,52 +8,35 @@ on: jobs: build: - uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@release-4.9.x + uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop with: maven_opts: -P storage-hadoop,hdp3.1,RClient,opencga-storage-hadoop-deps -Dopencga.war.name=opencga -Dcheckstyle.skip -pl '!:opencga-storage-hadoop-deps-emr6.1,!:opencga-storage-hadoop-deps-hdp2.6' - deploy-docker-ext-tools: - uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@release-4.9.x - needs: build - with: - cli: python3 ./build/cloud/docker/docker-build.py push --images ext-tools --tag ${{ needs.build.outputs.version }} - secrets: inherit - - test: - uses: ./.github/workflows/test-analysis.yml - needs: deploy-docker-ext-tools - secrets: inherit - with: - test_profile: runShortTests,runMediumTests - report_context: xetabase - deploy-maven: - uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@release-4.9.x - needs: test + uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@develop + needs: build with: maven_opts: -P storage-hadoop,hdp3.1 -Dopencga.war.name=opencga secrets: inherit deploy-docker: - uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@release-4.9.x - needs: test + uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop + needs: build with: cli: python3 ./build/cloud/docker/docker-build.py push --images base,init secrets: inherit - - deploy-python: - uses: opencb/java-common-libs/.github/workflows/deploy-python-workflow.yml@release-4.9.x - needs: test + uses: opencb/java-common-libs/.github/workflows/deploy-python-workflow.yml@develop + needs: build with: cli: ./clients/python/python-build.sh push artifact: build-folder secrets: inherit release: - uses: opencb/java-common-libs/.github/workflows/release-github-workflow.yml@release-4.9.x - needs: [ build,test ] + uses: opencb/java-common-libs/.github/workflows/release-github-workflow.yml@develop + needs: [ build, deploy-maven, deploy-docker, deploy-python ] with: artifact: build-folder file: | diff --git a/.github/workflows/task.yml b/.github/workflows/task.yml index d48509a97df..51888a9179d 100644 --- a/.github/workflows/task.yml +++ b/.github/workflows/task.yml @@ -5,10 +5,12 @@ on: branches: - TASK-* workflow_dispatch: -#WARNING Develop branch needed for prod + +# WARNING Develop branch needed for prod + jobs: build: - uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@release-4.9.x + uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop with: maven_opts: -P storage-hadoop,hdp3.1,RClient,opencga-storage-hadoop-deps -Dopencga.war.name=opencga -Dcheckstyle.skip -pl '!:opencga-storage-hadoop-deps-emr6.1,!:opencga-storage-hadoop-deps-hdp2.6' @@ -18,10 +20,9 @@ jobs: secrets: inherit with: test_profile: runShortTests - report_context: development deploy-docker: - uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@release-4.9.x + uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop needs: test with: cli: python3 ./build/cloud/docker/docker-build.py push --images base,init --tag ${{ github.ref_name }} diff --git a/.github/workflows/test-analysis.yml b/.github/workflows/test-analysis.yml index b778e704cae..732d92a4e56 100644 --- a/.github/workflows/test-analysis.yml +++ b/.github/workflows/test-analysis.yml @@ -1,13 +1,11 @@ name: Build and test the project + on: workflow_call: inputs: test_profile: type: string required: true - report_context: - type: string - required: true mvn_opts: type: string required: false @@ -15,16 +13,7 @@ on: secrets: SONAR_TOKEN: required: true - SSH_TESTING_SERVER_HOST: - required: true - SSH_TESTING_SERVER_PORT: - required: true - SSH_TESTING_SERVER_USER: - required: true - SSH_TESTING_SERVER_PASSWORD: - required: true -env: - xb_version: "1.7.2" + jobs: analysis: name: Execute Sonar Analysis @@ -52,6 +41,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} run: mvn -T 2 clean install -P storage-hadoop,hdp3.1,${{ inputs.test_profile }} -DskipTests -Dcheckstyle.skip org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Dsonar.projectKey=opencb_opencga + test: name: Execute JUnit and Jacoco tests runs-on: ubuntu-22.04 @@ -81,31 +71,14 @@ jobs: mongodb-version: 6.0 mongodb-replica-set: rs-test - name: Run Junit tests - run: mvn -B verify surefire-report:report -P storage-hadoop,hdp3.1,${{ inputs.test_profile }} -Dcheckstyle.skip -Popencga-storage-hadoop-deps -pl '!:opencga-storage-hadoop-deps-emr6.1,!:opencga-storage-hadoop-deps-hdp2.6' ${{ inputs.mvn_opts }} - - name: Upload result dir - uses: actions/upload-artifact@v3 - with: - name: workdir - path: "**/target/site" - publish-test: - name: Publish test results - runs-on: ubuntu-22.04 - needs: test - strategy: - matrix: - module: ["opencga-app", "opencga-catalog", "opencga-client", "opencga-clinical", "opencga-core", "opencga-master", "opencga-server", "opencga-storage", "opencga-storage/opencga-storage-app", "opencga-storage/opencga-storage-benchmark", "opencga-storage/opencga-storage-core", "opencga-storage/opencga-storage-hadoop", "opencga-storage/opencga-storage-server", "opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core", "opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps"] - steps: - - name: Download result dir - uses: actions/download-artifact@v3 - with: - name: workdir - - name: Deploy unit tests web recursively to remote - uses: garygrossgarten/github-action-scp@release + run: mvn -B verify surefire-report:report --fail-never -P storage-hadoop,hdp3.1,${{ inputs.test_profile }} -Dcheckstyle.skip -Popencga-storage-hadoop-deps -pl '!:opencga-storage-hadoop-deps-emr6.1,!:opencga-storage-hadoop-deps-hdp2.6' ${{ inputs.mvn_opts }} + - name: Publish Test Report + uses: scacap/action-surefire-report@v1 + ## Skip cancelled() + ## https://docs.github.com/en/actions/learn-github-actions/expressions#cancelled + if: success() || failure() with: - local: ${{ matrix.module }}/target/site - remote: /var/www/html/reports/${{ inputs.report_context }}/${{ env.xb_version }}/${{ github.ref_name }}/opencga/${{ github.sha }}/unit/${{ matrix.module }} - host: ${{ secrets.SSH_TESTING_SERVER_HOST}} - port: ${{ secrets.SSH_TESTING_SERVER_PORT}} - username: ${{ secrets.SSH_TESTING_SERVER_USER }} - password: ${{ secrets.SSH_TESTING_SERVER_PASSWORD }} - concurrency: 2 + check_name: "Surefire tests report" + report_paths: './**/surefire-reports/TEST-*.xml' + commit: '${{ github.sha }}' + fail_on_test_failures: true diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/hrdetect/HRDetectAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/hrdetect/HRDetectAnalysis.java index f6dee08bd8e..e808c33cd36 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/hrdetect/HRDetectAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/hrdetect/HRDetectAnalysis.java @@ -59,7 +59,7 @@ public class HRDetectAnalysis extends OpenCgaToolScopeStudy { public static final String ID = "hr-detect"; public static final String DESCRIPTION = "Run HRDetect analysis for a given somatic sample."; - public final static String HRDETECT_SCORES_FILENAME_DEFAULT = "data_matrix.tsv"; + public final static String HRDETECT_SCORES_FILENAME_DEFAULT = "hrdetect_output.tsv"; @ToolParams private HRDetectAnalysisParams hrdetectParams = new HRDetectAnalysisParams(); diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java index d0b17eac64a..c542521329a 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/mutationalSignature/MutationalSignatureLocalAnalysisExecutor.java @@ -64,6 +64,9 @@ public class MutationalSignatureLocalAnalysisExecutor extends MutationalSignatureAnalysisExecutor implements StorageToolExecutor { + private static final String SVCLASS = "SVCLASS"; + private static final String EXT_SVTYPE = "EXT_SVTYPE"; + public final static String R_DOCKER_IMAGE = "opencb/opencga-ext-tools:" + GitRepositoryState.getInstance().getBuildVersion(); @@ -80,7 +83,6 @@ public void run() throws ToolException, CatalogException, IOException, StorageEn && getSkip().contains(MutationalSignatureAnalysisParams.SIGNATURE_CATALOGUE_SKIP_VALUE) && getSkip().contains(MutationalSignatureAnalysisParams.SIGNATURE_FITTING_SKIP_VALUE)) { // Only compute genome context file - // TODO: overwrite support ! File indexFile = checkGenomeContextFile(); logger.info("Checking genome context file {} for sample {}", indexFile.getAbsolutePath(), getSample()); } @@ -97,7 +99,6 @@ && getSkip().contains(MutationalSignatureAnalysisParams.SIGNATURE_FITTING_SKIP_V // SNV logger.info("Computing catalogue (mutational signature) for SNV variants"); - // TODO: overwrite support ! File indexFile = checkGenomeContextFile(); logger.info("Mutational signature analysis is using the genome context file {} for sample {}", indexFile.getAbsolutePath(), getSample()); @@ -191,19 +192,18 @@ refGenomePath, new FastaSequenceIndex(new File(base + ".fai")), try { // Accessing to the context sequence and write it into the context index file - ReferenceSequence refSeq = indexed.getSubsequenceAt(variant.getChromosome(), variant.getStart() - 1, - variant.getEnd() + 1); + ReferenceSequence refSeq = indexed.getSubsequenceAt(variant.getChromosome(), (long) variant.getStart() - 1, + (long) variant.getEnd() + 1); String sequence = new String(refSeq.getBases()); // Write context index pw.println(variant.toString() + "\t" + sequence); } catch (Exception e) { - logger.warn("When creating genome context file for mutational signature analysis, ignoring variant " - + variant.toStringSimple() + ". " + e.getMessage()); + logger.warn("When creating genome context file for mutational signature analysis, ignoring variant {}: {}", + variant.toStringSimple(), Arrays.toString(e.getStackTrace())); } } } - } catch (IOException | CatalogException | ToolException | StorageEngineException e) { throw new ToolExecutorException(e); } @@ -211,7 +211,8 @@ refGenomePath, new FastaSequenceIndex(new File(base + ".fai")), private void updateCountMap(Variant variant, String sequence, Map> countMap) { try { - String k, seq; + String k; + String seq; String key = variant.getReference() + ">" + variant.getAlternate(); @@ -226,16 +227,15 @@ private void updateCountMap(Variant variant, String sequence, Map indexMap = new HashMap<>(); - BufferedReader br = new BufferedReader(new FileReader(indexFile)); String line; while ((line = br.readLine()) != null) { String[] parts = line.split("\t"); @@ -288,9 +288,11 @@ public void computeSignatureCatalogueSNV(File indexFile) throws ToolExecutorExce } public void computeSignatureCatalogueSV() throws ToolExecutorException { + Query query; + File clusteredFile; try { // Get variant iterator - Query query = new Query(); + query = new Query(); if (getQuery() != null) { query.putAll(getQuery()); } @@ -301,16 +303,19 @@ public void computeSignatureCatalogueSV() throws ToolExecutorException { QueryOptions queryOptions = new QueryOptions(QueryOptions.INCLUDE, "id,sv,studies"); - logger.info("Query: {}", query.toJson()); - logger.info("Query options: {}", queryOptions.toJson()); + logger.info("Query: {}", query != null ? query.toJson() : null); + logger.info("Query options: {}", queryOptions != null ? queryOptions.toJson() : null); - File clusteredFile = computeClusteredFile(query, queryOptions); + clusteredFile = computeClusteredFile(query, queryOptions); + } catch (CatalogException | StorageEngineException | ToolException e) { + throw new ToolExecutorException(e); + } - BufferedReader br = FileUtils.newBufferedReader(clusteredFile.toPath()); + Map countMap = new HashMap<>(); + try (BufferedReader br = FileUtils.newBufferedReader(clusteredFile.toPath())) { // Skip header line // chrom1 start1 end1 chrom2 start2 end2 length type sample id is.clustered // 0 1 2 3 4 5 6 7 8 9 10 - Map countMap = new HashMap<>(); // Skip first line String line = br.readLine(); while ((line = br.readLine()) != null) { @@ -333,35 +338,36 @@ public void computeSignatureCatalogueSV() throws ToolExecutorException { countMap.put(key, 1); } } + } catch (IOException e) { + throw new ToolExecutorException(e); + } -// logger.info("Count map size = {}", countMap.size()); -// for (Map.Entry entry : countMap.entrySet()) { -// logger.info("context = {}, count = {}", entry.getKey(), entry.getValue()); -// } - - // Build teh genome context counts object for SV - List genomeContextCounts = new LinkedList<>(); - for (String clustered: new LinkedList<>(Arrays.asList(CLUSTERED, NON_CLUSTERED))) { - for (String type: new LinkedList<>(Arrays.asList(TYPE_DEL, TYPE_TDS, TYPE_INV))) { - for (String length : new LinkedList<>(Arrays.asList(LENGTH_1_10Kb, LENGTH_10Kb_100Kb, LENGTH_100Kb_1Mb, LENGTH_1Mb_10Mb, - LENGTH_10Mb))) { - String key = clustered + "_" + type + "_" + length; - genomeContextCounts.add(new Signature.GenomeContextCount(key, countMap.containsKey(key) ? countMap.get(key) : 0)); - } + // Build teh genome context counts object for SV + List genomeContextCounts = new LinkedList<>(); + for (String clustered: new LinkedList<>(Arrays.asList(CLUSTERED, NON_CLUSTERED))) { + for (String type: new LinkedList<>(Arrays.asList(TYPE_DEL, TYPE_TDS, TYPE_INV))) { + for (String length : new LinkedList<>(Arrays.asList(LENGTH_1_10Kb, LENGTH_10Kb_100Kb, LENGTH_100Kb_1Mb, LENGTH_1Mb_10Mb, + LENGTH_10Mb))) { + String key = clustered + "_" + type + "_" + length; + genomeContextCounts.add(new Signature.GenomeContextCount(key, countMap.containsKey(key) ? countMap.get(key) : 0)); } - String key = clustered + "_" + TYPE_TRANS; - genomeContextCounts.add(new Signature.GenomeContextCount(key, countMap.containsKey(key) ? countMap.get(key) : 0)); } + String key = clustered + "_" + TYPE_TRANS; + genomeContextCounts.add(new Signature.GenomeContextCount(key, countMap.containsKey(key) ? countMap.get(key) : 0)); + } - // Write catalogue file from the genome context counts - PrintWriter pw = new PrintWriter(getOutDir().resolve(CATALOGUES_FILENAME_DEFAULT).toFile()); + // Write catalogue file from the genome context counts + try (PrintWriter pw = new PrintWriter(getOutDir().resolve(CATALOGUES_FILENAME_DEFAULT).toFile())) { pw.write(query.getString(VariantQueryParam.SAMPLE.key())); pw.write("\n"); for (Signature.GenomeContextCount counts : genomeContextCounts) { pw.write(counts.getContext() + "\t" + counts.getTotal() + "\n"); } - pw.close(); + } catch (IOException e) { + throw new ToolExecutorException(e); + } + try { Signature signature = new Signature() .setId(getQueryId()) .setDescription(getQueryDescription()) @@ -371,7 +377,7 @@ public void computeSignatureCatalogueSV() throws ToolExecutorException { JacksonUtils.getDefaultObjectMapper().writerFor(Signature.class).writeValue(getOutDir() .resolve(MutationalSignatureAnalysis.MUTATIONAL_SIGNATURE_DATA_MODEL_FILENAME).toFile(), signature); - } catch (IOException | CatalogException | StorageEngineException | ToolException e) { + } catch (IOException e) { throw new ToolExecutorException(e); } } @@ -383,25 +389,59 @@ private File computeClusteredFile(Query query, QueryOptions queryOptions) throws // $ Rscript sv_clustering.R ./test.bedpe ./out.bedpe File inputFile = getOutDir().resolve("in.clustered.bedpe").toFile(); File outputFile = getOutDir().resolve("out.clustered.bedpe").toFile(); - try { - PrintWriter pw = new PrintWriter(inputFile); + try (PrintWriter pw = new PrintWriter(inputFile);) { + String mateChrom; + int matePosition; + String lengthKey; + boolean processVariant; + + Map> breakendMap = new HashMap<>(); pw.println("chrom1\tstart1\tend1\tchrom2\tstart2\tend2\tlength\ttype\tsample"); while (iterator.hasNext()) { + processVariant = true; Variant variant = iterator.next(); - if (variant.getSv() == null || variant.getSv().getBreakend() == null || variant.getSv().getBreakend().getMate() == null) { - continue; + if (breakendMap.containsKey(variant.getChromosome())) { + for (Integer position : breakendMap.get(variant.getChromosome())) { + if (Math.abs(variant.getStart() - position) <= 20) { + // Skipping since it is a mate + processVariant = false; + break; + } + } } - String typeKey = getTypeKey(variant); - String lengthKey = getLengthKey(variant); - if (typeKey != null && lengthKey != null) { - BreakendMate mate = variant.getSv().getBreakend().getMate(); - pw.println(variant.getChromosome() + "\t" + variant.getStart() + "\t" + variant.getEnd() + "\t" - + mate.getChromosome() + "\t" + mate.getPosition() + "\t" + mate.getPosition() + "\t" - + lengthKey + "\t" + typeKey + "\t" + getSample()); + if (processVariant) { + BreakendMate mate = null; + if (variant.getSv() != null && variant.getSv().getBreakend() != null + && variant.getSv().getBreakend().getMate() != null) { + mate = variant.getSv().getBreakend().getMate(); + if (!breakendMap.containsKey(mate.getChromosome())) { + breakendMap.put(mate.getChromosome(), new ArrayList<>()); + } + breakendMap.get(mate.getChromosome()).add(mate.getPosition()); + } + String typeKey = getTypeKey(variant); + if (mate == null) { + mateChrom = "0"; + matePosition = 0; + lengthKey = LENGTH_NA; + } else { + mateChrom = mate.getChromosome(); + matePosition = mate.getPosition() == null ? 0 : mate.getPosition(); + lengthKey = getLengthKey(variant, typeKey); + } + + if (typeKey != null && lengthKey != null) { + pw.println(variant.getChromosome() + "\t" + variant.getStart() + "\t" + variant.getEnd() + "\t" + + mateChrom + "\t" + matePosition + "\t" + matePosition + "\t" + + lengthKey + "\t" + typeKey + "\t" + getSample()); + } } } - pw.close(); + } catch (Exception e) { + throw new ToolException(e); + } + try { // Build command line to run R script via docker image // Input binding List> inputBindings = new ArrayList<>(); @@ -430,22 +470,17 @@ private File computeClusteredFile(Query query, QueryOptions queryOptions) throws return outputFile; } - private String getClusteredKey(Variant variant) { - return NON_CLUSTERED; - } - private String getTypeKey(Variant variant) { String variantType = variant.getType() != null ? variant.getType().name() : ""; if (CollectionUtils.isNotEmpty(variant.getStudies()) && CollectionUtils.isNotEmpty(variant.getStudies().get(0).getFiles())) { for (FileEntry file : variant.getStudies().get(0).getFiles()) { - if (file.getData() != null) { - if (file.getData().containsKey("EXT_SVTYPE")) { - variantType = file.getData().get("EXT_SVTYPE").toUpperCase(Locale.ROOT); - break; - } else if (file.getData().containsKey("SVCLASS")) { - variantType = file.getData().get("SVCLASS").toUpperCase(Locale.ROOT); - break; + if (file.getData() != null && (file.getData().containsKey(EXT_SVTYPE) || file.getData().containsKey(SVCLASS))) { + if (file.getData().containsKey(EXT_SVTYPE)) { + variantType = file.getData().get(EXT_SVTYPE).toUpperCase(Locale.ROOT); + } else if (file.getData().containsKey(SVCLASS)) { + variantType = file.getData().get(SVCLASS).toUpperCase(Locale.ROOT); } + break; } } } @@ -458,6 +493,7 @@ private String getTypeKey(Variant variant) { case "TDS": case "DUPLICATION": case "TANDEM_DUPLICATION": + case "TANDEM-DUPLICATION": return TYPE_TDS; case "INV": case "INVERSION": @@ -466,30 +502,33 @@ private String getTypeKey(Variant variant) { case "TRANS": case "TRANSLOCATION": return TYPE_TRANS; + default: { + logger.warn("Unknown variant type {}, so this variant will be ignored in mutational signature analysis", variantType); + return null; + } } - return null; } - private String getLengthKey(Variant variant) { - if (variant.getSv() == null || variant.getSv().getBreakend() == null || variant.getSv().getBreakend().getMate() == null) { + private String getLengthKey(Variant variant, String type) { + if (type == null) { return null; } - BreakendMate mate = variant.getSv().getBreakend().getMate(); - if (variant.getChromosome().equals(mate.getChromosome())) { - int length = Math.abs(mate.getPosition() - variant.getStart()); - if (length <= 10000) { - return LENGTH_1_10Kb; - } else if (length <= 100000) { - return LENGTH_10Kb_100Kb; - } else if (length <= 1000000) { - return LENGTH_100Kb_1Mb; - } else if (length <= 10000000) { - return LENGTH_1Mb_10Mb; - } - return LENGTH_10Mb; + if (type.equals(TYPE_TRANS)) { + return LENGTH_NA; } else { - if (variant.getType() == VariantType.TRANSLOCATION) { - return LENGTH_NA; + BreakendMate mate = variant.getSv().getBreakend().getMate(); + if (variant.getChromosome().equals(mate.getChromosome())) { + int length = Math.abs(mate.getPosition() - variant.getStart()); + if (length <= 10000) { + return LENGTH_1_10Kb; + } else if (length <= 100000) { + return LENGTH_10Kb_100Kb; + } else if (length <= 1000000) { + return LENGTH_100Kb_1Mb; + } else if (length <= 10000000) { + return LENGTH_1Mb_10Mb; + } + return LENGTH_10Mb; } } return null; @@ -508,12 +547,12 @@ private void computeSignatureFitting() throws IOException, ToolException, Catalo throw new ToolException("Unable to compute mutational signature analysis. Sample '" + getSample() + "' not found"); } Sample sample = sampleResult.first(); - logger.info("Searching catalogue counts from quality control for sample " + getSample()); + logger.info("Searching catalogue counts from quality control for sample {}", getSample()); if (sample.getQualityControl() != null && sample.getQualityControl().getVariant() != null && CollectionUtils.isNotEmpty(sample.getQualityControl().getVariant().getSignatures())) { - logger.info("Searching in " + sample.getQualityControl().getVariant().getSignatures().size() + " signatures"); + logger.info("Searching in {} signatures", sample.getQualityControl().getVariant().getSignatures().size()); for (Signature signature : sample.getQualityControl().getVariant().getSignatures()) { - logger.info("Matching ? " + getQueryId() + " vs " + signature.getId()); + logger.info("Matching ? {} vs {}", getQueryId(), signature.getId()); if (getQueryId().equals(signature.getId())) { // Write catalogue file try (PrintWriter pw = new PrintWriter(cataloguesFile)) { @@ -521,7 +560,6 @@ private void computeSignatureFitting() throws IOException, ToolException, Catalo for (Signature.GenomeContextCount count : signature.getCounts()) { pw.println(count.getContext() + "\t" + count.getTotal()); } - pw.close(); } catch (Exception e) { throw new ToolException("Error writing catalogue output file: " + cataloguesFile.getName(), e); } @@ -587,7 +625,8 @@ private void computeSignatureFitting() throws IOException, ToolException, Catalo scriptParams.append(" --genomev=hg19"); break; } - case "GRCh38": { + case "GRCh38": + default: { scriptParams.append(" --genomev=hg38"); break; } @@ -595,7 +634,7 @@ private void computeSignatureFitting() throws IOException, ToolException, Catalo String cmdline = DockerUtils.run(R_DOCKER_IMAGE, inputBindings, outputBinding, scriptParams.toString(), null); - logger.info("Docker command line: " + cmdline); + logger.info("Docker command line: {}", cmdline); // Check fitting file before parsing and creating the mutational signature fitting data model File signatureCoeffsFile = getOutDir().resolve(SIGNATURE_COEFFS_FILENAME).toFile(); diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/exomiser/ExomiserWrapperAnalysisExecutor.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/exomiser/ExomiserWrapperAnalysisExecutor.java index e586a47d434..1b19e410b5b 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/exomiser/ExomiserWrapperAnalysisExecutor.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/wrappers/exomiser/ExomiserWrapperAnalysisExecutor.java @@ -130,7 +130,7 @@ public void run() throws ToolException { VariantQuery query = new VariantQuery() .study(studyId) - .sample(sampleId + ":0/1,1/1") + .sample(sampleId) .includeSample(samples) .includeSampleData("GT") .unknownGenotype("./."); diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java index 37ae808c6bf..31a59eebc2e 100644 --- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java +++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantAnalysisTest.java @@ -194,7 +194,9 @@ public void setUp() throws Throwable { null, null, null, token); Phenotype phenotype = new Phenotype("phenotype", "phenotype", ""); - Disorder disorder = new Disorder("disorder", "disorder", "", "", Collections.singletonList(phenotype), Collections.emptyMap()); + Disorder disorder1 = new Disorder("disorder id 1", "disorder name 1", "", "", Collections.singletonList(phenotype), Collections.emptyMap()); + Disorder disorder2 = new Disorder("disorder id 2", "disorder name 2", "", "", Collections.singletonList(phenotype), Collections.emptyMap()); + List disorderList = new ArrayList<>(Arrays.asList(disorder1, disorder2)); List individuals = new ArrayList<>(4); // Father @@ -208,14 +210,14 @@ public void setUp() throws Throwable { // Son individuals.add(catalogManager.getIndividualManager() .create(STUDY, new Individual(son, son, new Individual(), new Individual(), new Location(), SexOntologyTermAnnotation.initMale(), null, null, null, null, "", - Collections.emptyList(), false, 0, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), IndividualInternal.init(), Collections.emptyMap()).setFather(individuals.get(0)).setMother(individuals.get(1)).setDisorders(Collections.singletonList(disorder)), Collections.singletonList(son), new QueryOptions(ParamConstants.INCLUDE_RESULT_PARAM, true), token).first()); + Collections.emptyList(), false, 0, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), IndividualInternal.init(), Collections.emptyMap()).setFather(individuals.get(0)).setMother(individuals.get(1)).setDisorders(disorderList), Collections.singletonList(son), new QueryOptions(ParamConstants.INCLUDE_RESULT_PARAM, true), token).first()); // Daughter individuals.add(catalogManager.getIndividualManager() .create(STUDY, new Individual(daughter, daughter, new Individual(), new Individual(), new Location(), SexOntologyTermAnnotation.initFemale(), null, null, null, null, "", Collections.emptyList(), false, 0, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), IndividualInternal.init(), Collections.emptyMap()).setFather(individuals.get(0)).setMother(individuals.get(1)), Collections.singletonList(daughter), new QueryOptions(ParamConstants.INCLUDE_RESULT_PARAM, true), token).first()); catalogManager.getFamilyManager().create( STUDY, - new Family("f1", "f1", Collections.singletonList(phenotype), Collections.singletonList(disorder), null, null, 3, null, null), + new Family("f1", "f1", Collections.singletonList(phenotype), disorderList, null, null, 3, null, null), individuals.stream().map(Individual::getId).collect(Collectors.toList()), new QueryOptions(), token); @@ -814,9 +816,22 @@ public void testMutationalSignatureCatalogueSV() throws Exception { params.setSample(cancer_sample); params.setId("catalogue-1"); params.setDescription("Catalogue #1"); - VariantQuery query = new VariantQuery(); - query.sample(cancer_sample); - query.type(VariantType.SV.name()); + VariantQuery query = new VariantQuery() + .sample(cancer_sample) + .type(VariantType.SV.name()) + //.file("AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz"); + .fileData("AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz:BAS>=0;BKDIST>=-1") + .region("1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y"); + + //https://ws.opencb.org/opencga-test/webservices/rest/v2/analysis/variant/mutationalSignature/query + // ?study=serena@cancer38:test38 + // &fitting=false + // &sample=AR2.10039966-01T + // &fileData=AR2.10039966-01T_vs_AR2.10039966-01G.annot.brass.vcf.gz:BAS>=0;BKDIST>=-1;EXT_PS_SOM>=4;EXT_RC_SOM>=0 + // ®ion=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y + // &type=SV + + params.setQuery(query.toJson()); params.setSkip("fitting"); @@ -992,8 +1007,12 @@ public void testHRDetect() throws Exception { List hrDetects = sample.getQualityControl().getVariant().getHrDetects(); for (HRDetect hrDetect : hrDetects) { if (hrDetect.getId().equals(hrDetect.getId())) { + System.out.println("HRDetect scores for " + hrDetect.getId()); + for (Map.Entry entry : hrDetect.getScores().entrySet()) { + System.out.println("\t" + entry.getKey() + ": " + entry.getValue()); + } if (hrDetect.getScores().containsKey("del.mh.prop")) { - Assert.assertEquals(hrDetect.getScores().getFloat("del.mh.prop"), 0.172413793103448f, 0.00001f); + Assert.assertEquals(-1.5702984, hrDetect.getScores().getFloat("del.mh.prop"), 0.00001f); return; } } @@ -1018,12 +1037,23 @@ public void testHRDetectParseResults() throws Exception { HRDetect hrDetect = HRDetectAnalysis.parseResult(hrdParams, hrdetectOutDir); for (Map.Entry entry : hrDetect.getScores().entrySet()) { - System.out.println(entry.getKey() + " -> " + entry.getValue()); + System.out.println(entry.getKey() + ": " + entry.getValue()); } assertTrue(hrDetect.getScores().containsKey("hrd")); - assertEquals(-0.102769986f, hrDetect.getScores().getFloat("hrd"), 0.00001f); + assertEquals(-1.95208666666667, hrDetect.getScores().getFloat("hrd"), 0.00001f); assertTrue(hrDetect.getScores().containsKey("Probability")); - assertEquals(0.998444f, hrDetect.getScores().getFloat("Probability"), 0.00001f); + assertEquals(4.21293910790655e-05, hrDetect.getScores().getFloat("Probability"), 0.00001f); + } + + @Test + public void testPedigreeGraph() throws CatalogException { + String base64 = "iVBORw0KGgoAAAANSUhEUgAAAeAAAAHgCAMAAABKCk6nAAAC6FBMVEUAAAABAQECAgIEBAQGBgYHBwcICAgJCQkKCgoLCwsMDAwNDQ0ODg4PDw8QEBARERESEhITExMUFBQVFRUWFhYXFxcYGBgZGRkaGhobGxsdHR0eHh4fHx8gICAhISEiIiIjIyMkJCQlJSUmJiYnJycoKCgpKSkqKiorKyssLCwtLS0uLi4vLy8wMDAxMTEyMjIzMzM0NDQ1NTU2NjY3Nzc4ODg5OTk6Ojo7Ozs8PDw9PT0+Pj4/Pz9AQEBBQUFCQkJDQ0NERERFRUVGRkZHR0dISEhJSUlKSkpLS0tMTExNTU1OTk5PT09QUFBRUVFSUlJTU1NUVFRVVVVWVlZXV1dYWFhZWVlaWlpbW1tcXFxdXV1eXl5fX19gYGBhYWFiYmJjY2NkZGRlZWVmZmZnZ2doaGhpaWlqampra2tsbGxtbW1ubm5vb29wcHBxcXFycnJzc3N0dHR1dXV2dnZ3d3d4eHh5eXl6enp7e3t9fX1+fn5/f3+AgICBgYGCgoKDg4OEhISFhYWGhoaHh4eIiIiJiYmKioqMjIyNjY2Ojo6Pj4+QkJCRkZGSkpKTk5OUlJSVlZWWlpaXl5eYmJiZmZmampqbm5ucnJydnZ2fn5+goKChoaGioqKjo6OkpKSlpaWmpqanp6eoqKipqamqqqqrq6usrKytra2urq6vr6+wsLCxsbGysrKzs7O0tLS1tbW2tra3t7e4uLi5ubm6urq7u7u8vLy+vr6/v7/AwMDBwcHCwsLDw8PExMTFxcXGxsbHx8fIyMjJycnKysrLy8vMzMzNzc3Ozs7Pz8/Q0NDR0dHS0tLT09PU1NTV1dXW1tbX19fY2NjZ2dna2trb29vc3Nzd3d3e3t7f39/g4ODh4eHi4uLj4+Pk5OTl5eXm5ubn5+fo6Ojp6enq6urr6+vs7Ozu7u7v7+/w8PDx8fHy8vLz8/P09PT19fX29vb39/f4+Pj5+fn6+vr7+/v8/Pz9/f3+/v7////lDE73AAARGklEQVR4nO3dfVwUdQLH8T2tDpXQ8ta0uB4OKq87PTpleRAhwMTKhxDsOLWEyDTS4MzrLDMf8PLSUznTyisVNbvOh9My6y6VUCyP81mUTE4BxUpFediF3783M7uzu7LKMTuLs/vt+3m9YoYZ2Pn99s0CvwXMJBh0JqMHwNo3AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8HcA176u9s9S5e9p3QwuQmsu3zfv4iM3oYVwrHcBrf7PUUeKv1b3sAt8NLSA6PDFyzKwfz8mKGrvH6KFcPT3ATszJ6Z7HfhBdem5okbQJkf7bN/o354weztUisI7ODlinbEOUl/+MPG7kYK4Rgb2vbsBu+44dWByLrDFuMNeKwN6Xt8qx4wAWn6df602Ni8BeV5mo7qnAImOvMUNpJQJ73fxCdc8JXDTZmKG0EoG9bvgZdc8JbIszZiit5GPgxUPnGtD4GUZcNcx5TziBBTzwX3O2GdCwZUZctW+zOusfELAxn6Jz9hlx1dHl6p4T+HLi1d/UwAjsde8uVPecwOtnGjGQViOw19VaGhx7TuDECiMG0moE9r6l0xw7IS0P+FEE1tHot+xbB/DmwY3GjKO1CKwj67jJtfJWAW6c81itMcNoNQLrqrD/vFMKcM2blkVNRo2itXQA/+1XIx2F3aHuRS713dDannHAomHl49EpNz0a/cjSi4aNodV0ANvK1crKnLtW3w2t7RkILHex8nw73Or3aybEm0x39BqYsPo7HTcD8Ut3BgO3R6UZJqW+64pSX7/1iU+9viEC+2NnRpscdbcNK1v45oafZnj72yIE9sM+NTmLO5xms1x+snj6T7d6d1sE9r/ecfmanhq7a0X+qYdFXMX9+V7dmAbg3l3OCbE6Qt6NC6mXXi7o0/EZ+bUjCZ3uWivvvBt2Q1iJa6ueb/euC7Dm6V9ID+q1xIsLvefma0obLAZ8l7t1y+++j8mY482wtQB3m+KY4YkOXd+XNh9szJBnaA2feulfnaX7eJN545k9J1xbx/n27/oAa51+VvzZ7Z23a75Okbuv6f5PNr/4bUxzUsWswuLbtd+YNuCZXarsM5wR9cIQ5dBEeYYHOlwWIjVXiL7L7W+obh3n27/rA6xx+o2dPhciM1PrZS5YrgA2NyednrmmJLPOYk0teqBa+7C1AK8bk2Of4d0FX3ZUrqXMcF+HOmmGiaL+R/k9e+TUObcCDFjj9MtM0up4UaTWy/zuCl/ToN1P10VZRxwsKDg6whb+nPZhawIu71whz3BHxxoR/oZ8SJlh493TGnYGRYlyU0x1xS9ecW4FGrC26e81NUtfT3trvMrpK31NTw0/tHjJkVRb1OWsojWvJR7TPGxNwCI7S55hVor0aeqX8iE74IGEblGZaaLStF6Itx90bgUasLbpe/cIntYCePhIyXbcF6tnVQ0SsefSRmketjbgik6zIsTl4E5mc4ipVLgDJswT4tYNdlh1CwesafqNQTukDwaNX4ObLS2A7yleNfuUbDtly9bc87GW5v9/G1emDVjkdIsQhV1PVlVVxeYKa934rDqrEMWVla/dVivElNia031edW3V8+3e9QLWNv3MpG+LgzV+41vawtfUS8R9l/fxx3nnY5sHncxfMe4zrcPWCFwVFCEG5cqvrTVblU8nU4V4OeSmxEPSofqsYPOketdWPd/uXTdgTdO/kBZ0m9Z1cEFL4MekFXBsc/LJOatKnqzv1xj9ktZh85ksv2pCS+Cn5BVwybh6i3XkgTcXlic+rfUWCeyT8qRHcui1/gQ8z/OzmONtPc481hL4EWUFfHDJ4uMjbJEXn9mWovWCBPZJ8l367tlWTrbI8bbqGfVZz+bwlsCh0gr4iGRb+3TR+zOqk8QdTRovSGCfdBVDOetVTrp/16mecTzr2fx0mAewtErKlFbA1cliQM3vN30aMq5J2wUJrLuD/YKHZNo/Y07vcXPYbnEsMeTetUKY5/a913myemT30Dccx4TytuoZe/KKq3nCnFEtgVML7SvgFzdvm3ze0pSYMv+pJk0XJLDebGH5tq03Kvf3l6Fnxdf/td33csP2znuFOaW+WT3ZbMmr+ybsH8ox+Z1C9zjfzZ4MfPZt8QePb7KUFfAL0irp4W/++F7p4Cli+SlNFySw3nbfYhMiRbm/S7tvaxCipKv0iTIzT5i3uk6WBkvbhWOVY3Khe5zvZs/xnMm6lsDJjhXwyn+Pre/XMOql1VovSGC9/b2v9OI5+2fMdx4Mzji7/ufSgZmjhHmf6+SGG8PDw+8crhyTC93jfDd7DuCalsA9T85etUdeAaftX/bn8qFDq7VekMB6291LejFSXbXUPPK88wG133Vyr9n+HKN8TE56BKvvZk991vPhFsA/U1fAw5siL47fnKz5ggTWm+2ev4myIOX+PlBkbRiVZ7v31cadXb5S7m/1ZFPklIu2g7uVY299pHwNVt9NftX1pO6aFsBP2FfAtdk7100/M3DuSs0XJLDu9kdGp45V7u9dfbrcmvqtOJpwc3ih49GqnqzOMIf036ocGzRNefCpZ+RXXU/q2qKuBB4rr4DXyivghJqX1lgaNV+QwH7WxiuB4+wr4KmbPn3+giXnQ+23R2B/a/QVwGZpBfyJvAJO+eb1vCe8uDkC+1sXY92Beysr4LnvlY6pfyDam7+QIbDfdcr9h/5jpBWwvEoatf+VUK/++QAC+1+n49yA5RXw0j+XD1t3/1GvbozAftjliU7gqIXlI6QV8Oik8Ze8uy0C+2VfJDqAu8sr4GcnhO7w9pYI7Kd9MU4BvrfwvvE9n9jp/e0Q2G9r2r9qdua4aa/v0/VPQxAYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcGDAB6i488r0YMA5iP42hEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBg8CeHCR0SPw3yCA+Qi+dgQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDB6BwSMweAQGj8DgERg8AoNHYPAIDJ5PgOvLyg1tzBZjr3/KF3diO+UT4AWx2YaWPMbY69/pizuxnfIJ8NoCX9xK4DbQ6AG0EoF9EIHBIzB4BAaPwOARGDwCg0dg8AgMHoHBIzB4BAaPwOARGDwCg0dg8AgMHoHBIzB4BAaPwOARGDwCg0dg8LCBazfnJ/SfteF7HwwmsPqT8y8buj6UHZM1dMST8dljjR6UR3qBD42Jn/mP9MSP85PS9vpkQIFTjMnZX9Oz/xKx4VdLsnoYPSiP9AE3zRj8b2kzOV16cXhEbqNPhhQoDXQBT3xtS+Lx6M+mvTjA6EF5pAvYOmpes7xVgIV4+5E6H4woYHIDXrAp+VjMF5OmfnKL0YPySBdw7mL71gEsVvvfl6B2zA14zaNH++3KlB7GMUYPyiM9wLvSHDsqsHh2s87hBFJuwCOOWkqfnL8x+WhPowflkR7gYV87dpzANQn6RhNQuQEf+PV/Ri5a/ejRqAeNHpRHOoAvJKl7TmAxskLfcAIpN2DL4ceXrxx2xFIabvSgPNIBvP0P6p4L+I0PdY0moHIDPpqy4s1RBywH06CAP1yg7rmAVy7TN5xAyg148IeLM/bLD+N+Rg/KIx3An8xU91zASwr1DSeQcgP+aG72l1FlKSsKoJ7oOJ2q7rmAx/9H33ACKTfguc+WRB+THsa/jTN6UB7p+S56wAXHjhO4sX+zzvEEUG7AU7cPOB6/cW52CdYTHSumO3acwIvm6xxOIOUG/M+44/Hbpk8oibYYPSiP9AA3Ddpl31GBD8U26B5Q4OQGnHg8dmfui9LD+HajB+WRrqcqq6NKlK0D+GDk17rHE0C5AZfHFD8/VX4Y/9LoQXmk76dJVcl/lB+zCrCtIO6ET4YUKLkBR+0dN3Oz/DD+udGD8kjnz4Ntf4l8ZXfd5PSGr2ZbXv8hfX4WVwDvGzt/Y/LxmOJn/e8fNdT9Gx3WLZOS7r/noYnr630xnEAqPVHtJw/cN7BHQmj/vmHxRg/KI4h/TphdOwKDR2DwCAwegcHTANy7yzkhVkfIu3Eh8vfMC/p0fEZ+7UhCp7vWyjvvht0QViLEhfSgXkuEa4tRm6d/ML5zj7wmIYZI66ebDRywPS3A3aY4ZniiQ9f3pc0HGzPkGVrDp176V+d9Qmwybzyz54QQWfFnt3fe7tpi1Obp9828VBa6TAJeVldn/NpRC/DMLlX2Gc6IemGIcmiiPMMDHS4LkZorzWy5crCx0+dCZGY6tyC1dfoiRJp21iQJeLlhY3VLC/C6MTn2Gd5d8GXHavmQMsN9HeqkGSaK+h/l9+yRUyfKTOeFWBTp3ILU1umLGVmXjt35kQR8++0PfWbokOU0AZd3rpBnuKNjjQh/Qz6kzLDx7mkNO4OiRLkpprriF6+IvaZmId7r7dyC1Nbpiz3hJtNz0onNXx3Ov9Hw/6OTJmCRnSXPMCtF+jBVfm6izFAcSOgWlZkmKk3rhXj7QdxHcNumf+mW2XUVFsevM6W8atyA7WkDrug0K0JcDu5kNoeYSoU6Q7mEeULcukGZYWPQDuleyHRuQWrr9I+ZLkof19H248NeNmi0zrQBi5xuEaKw68mqqqrYXGGtG59VZxWiuLLytdtqhZgSW3O6j/Qhm5n0bXHwdtcWo7ZO3/qTuY2V0RNE7arT5966YbfRw9YIXBUUIQblyq+tNVunKb+OJMTLITclHpIO1WcFmydJC4MLaUG3KetgxxajNk9/V1SX7hnfiYuxNwf1Nf7XxPlMFngEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAwegcEjMHgEBo/A4BEYPAKDR2DwCAze/wCMbFD0pB/BRAAAAABJRU5ErkJggg=="; + + OpenCGAResult results = catalogManager.getFamilyManager().search(STUDY, new Query("id", "f1"), QueryOptions.empty(), token); + Family family = results.first(); + + assertTrue(family.getPedigreeGraph() != null); + assertEquals(base64, family.getPedigreeGraph().getBase64()); } public void checkExecutionResult(ExecutionResult er) { diff --git a/opencga-app/app/cloud/docker/opencga-base/Dockerfile b/opencga-app/app/cloud/docker/opencga-base/Dockerfile index 5132720657c..d3eee2266d2 100644 --- a/opencga-app/app/cloud/docker/opencga-base/Dockerfile +++ b/opencga-app/app/cloud/docker/opencga-base/Dockerfile @@ -1,5 +1,6 @@ -## Based on Debian 11 (bullseye) -FROM openjdk:8-jre +## Based on Ubuntu 22.04 (jammy) +## We are now using OpenJDK 8u372 to support "cgroup v2", see https://developers.redhat.com/articles/2023/04/19/openjdk-8u372-feature-cgroup-v2-support# +FROM eclipse-temurin:8u372-b07-jre-jammy ARG BUILD_PATH="." @@ -7,17 +8,21 @@ ENV OPENCGA_HOME=/opt/opencga ENV OPENCGA_CONFIG_DIR=${OPENCGA_HOME}/conf RUN apt-get update && apt-get -y upgrade && apt-get install -y lsb-release sshpass ca-certificates curl gnupg jq ncurses-bin && \ - # Install Docker repository - curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg && \ - echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian \ - $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null && \ - # Install MongoDB repository - wget -qO - https://www.mongodb.org/static/pgp/server-4.2.asc | apt-key add - && \ - echo "deb http://repo.mongodb.org/apt/debian buster/mongodb-org/4.2 main" | tee /etc/apt/sources.list.d/mongodb-org-4.2.list && \ - apt-get update && apt-get install -y docker-ce docker-ce-cli containerd.io mongodb-org-shell && \ - rm -rf /var/lib/apt/lists/* && \ + ## Install Docker repository + install -m 0755 -d /etc/apt/keyrings && \ + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \ + chmod a+r /etc/apt/keyrings/docker.gpg && \ + echo "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu "$(. /etc/os-release && echo "jammy")" stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null && \ + ## Install MongoDB 6.0 repository + curl -fsSL https://pgp.mongodb.com/server-6.0.asc | gpg --dearmor -o /usr/share/keyrings/mongodb-server-6.0.gpg && \ + echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-6.0.gpg ] https://repo.mongodb.org/apt/ubuntu jammy/mongodb-org/6.0 multiverse" | tee /etc/apt/sources.list.d/mongodb-org-6.0.list && \ + ## Install docker and mongodb packages + apt-get update && apt-get install -y docker-ce docker-ce-cli containerd.io mongodb-mongosh && \ + ## Install yq wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 && \ chmod +x /usr/local/bin/yq && \ + ## Clean downloaded packages to make images smaller + rm -rf /var/lib/apt/lists/* && \ adduser --disabled-password --uid 1001 opencga ## Run Docker images as non root diff --git a/opencga-app/app/cloud/docker/opencga-ext-tools/Dockerfile b/opencga-app/app/cloud/docker/opencga-ext-tools/Dockerfile index 024f4ed2a68..5558259e3f5 100644 --- a/opencga-app/app/cloud/docker/opencga-ext-tools/Dockerfile +++ b/opencga-app/app/cloud/docker/opencga-ext-tools/Dockerfile @@ -15,13 +15,16 @@ RUN apt-get update -y && DEBIAN_FRONTEND="noninteractive" TZ="Europe/London" apt libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev \ gnuplot pandoc samtools bcftools tabix fastqc plink1.9 bwa r-base && \ ## Installation dependencies using R install.packages() is slower than apt-get but final size is 400GB smaller. - R -e "install.packages(c('BiocManager', 'RCircos', 'nnls', 'ggplot2', 'jsonlite', 'optparse', 'knitr', 'configr', 'dplyr', 'rmarkdown', 'tidyr', 'httr', 'kinship2'))" && \ + R -e "install.packages(c('BiocManager', 'RCircos', 'nnls', 'ggplot2', 'jsonlite', 'optparse', 'knitr', 'configr', 'dplyr', 'rmarkdown', 'tidyr', 'httr', 'kinship2', 'limSolve'))" && \ R -e "BiocManager::install('BiocStyle')" && \ - ## signature.tools.lib installation + ## signature.tools.lib installation \ R -e 'install.packages(c("devtools", "getopt"), repos="https://www.stats.bris.ac.uk/R/")' && \ - git clone https://github.com/Nik-Zainal-Group/signature.tools.lib.git /opt/opencga/signature.tools.lib && \ -# git checkout tags/v2.3.0 && \ - git checkout d3d73db497b5b83abc55d6cd55840c34ed542628 && \ + git clone https://github.com/Nik-Zainal-Group/signature.tools.lib.git /opt/opencga/signature.tools.lib + +WORKDIR /opt/opencga/signature.tools.lib + +RUN git fetch origin --tags && \ + git checkout tags/v2.4.1 && \ sed -i '/Mmusculus/d' DESCRIPTION && \ sed -i '/Cfamiliaris/d' DESCRIPTION && \ sed -i '/1000genomes/d' DESCRIPTION && \ @@ -30,4 +33,4 @@ RUN apt-get update -y && DEBIAN_FRONTEND="noninteractive" TZ="Europe/London" apt rm -rf /var/lib/apt/lists/* /tmp/* /opt/opencga/signature.tools.lib/.git && \ strip --remove-section=.note.ABI-tag /usr/lib/x86_64-linux-gnu/libQt5Core.so.5 -WORKDIR /opt/opencga \ No newline at end of file +WORKDIR /opt/opencga diff --git a/opencga-app/app/cloud/docker/opencga-init/Dockerfile b/opencga-app/app/cloud/docker/opencga-init/Dockerfile index f7efd322ac9..60b60f278b7 100644 --- a/opencga-app/app/cloud/docker/opencga-init/Dockerfile +++ b/opencga-app/app/cloud/docker/opencga-init/Dockerfile @@ -8,25 +8,22 @@ ARG INIT_PATH=cloud/docker/opencga-init/ COPY ${INIT_PATH} /opt/opencga/init/ COPY ${BUILD_PATH}/conf/* /opt/opencga/init/test/ - -# Mount volume to copy config into +## Mount volume to copy config into VOLUME /opt/volume USER root -# Install local dependencies -RUN apt install python3 && \ - echo "deb http://ftp.de.debian.org/debian bullseye main" | tee -a /etc/apt/sources.list.d/sources.list && \ - apt-get update && apt-get -y upgrade && \ - apt install -y python3-pip && \ + +## Install local dependencies +RUN apt-get update && apt-get -y upgrade && apt-get install -y python3 python3-pip && \ pip3 install --upgrade pip setuptools && \ pip3 install -r /opt/opencga/init/requirements.txt && \ chmod +x /opt/opencga/init/setup.sh /opt/opencga/init/setup-hadoop.sh && \ echo ">Running init container configuration tests" && \ cd /opt/opencga/init/test && python3 test_override_yaml.py -v && rm -r /opt/opencga/init/test - +## Run Docker images as non root USER opencga -# It is the responsibility of the setup.sh -# script to initialise the volume correctly -# and apply any runtime config transforms. -ENTRYPOINT [ "/bin/sh","/opt/opencga/init/setup.sh" ] + +## It is the responsibility of the setup.sh script to initialise the volume correctly +## and apply any runtime config transforms. +ENTRYPOINT [ "/bin/sh", "/opt/opencga/init/setup.sh" ] diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/PedigreeGraphUtils.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/PedigreeGraphUtils.java index 62394a6e562..b56966a44e8 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/PedigreeGraphUtils.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/PedigreeGraphUtils.java @@ -120,7 +120,7 @@ private static File createPedFile(Family family, Path outDir) throws FileNotFoun StringBuilder sbDisorders = new StringBuilder(); if (CollectionUtils.isNotEmpty(disorders) && disorders.size() > 1) { for (Disorder disorder : family.getDisorders()) { - sbDisorders.append("affected.").append(disorder.getId()).append("\t"); + sbDisorders.append("\"affected.").append(disorder.getId()).append("\"\t"); } } else { sbDisorders.append("affected").append("\t"); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java index 33f6de51fea..cf4477bb4f1 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/VariantStorageMetadataManager.java @@ -185,6 +185,10 @@ public VariantStorageMetadataManager(VariantStorageMetadataDBAdaptorFactory dbAd }); } + public ObjectMap getConfiguration() { + return dbAdaptorFactory.getConfiguration(); + } + public Lock lockGlobal(long lockDuration, long timeout, String lockName) throws StorageEngineException { try { diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/GenotypeClass.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/GenotypeClass.java index d54e03fd3db..d3f5e8dd1ef 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/GenotypeClass.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/GenotypeClass.java @@ -211,6 +211,25 @@ public enum GenotypeClass implements Predicate { return true; }), + /** + * Genotypes containing any secondary alternate. + *

+ * 1/2, 2/3, ./2, 0/2, ... + */ + SEC(str -> { + Genotype gt = parseGenotype(str); + if (gt == null) { + // Skip invalid genotypes + return false; + } + for (int allele : gt.getAllelesIdx()) { + if (allele > 1) { + return true; + } + } + return false; + }), + /** * Genotypes containing reference and secondary alternates only. *

@@ -233,6 +252,7 @@ public enum GenotypeClass implements Predicate { return hasSecondaryAlternate; }), + /** * Contains the main alternate. *

diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQuery.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQuery.java index c2798e0dba8..65c2bda05e6 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQuery.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQuery.java @@ -9,6 +9,7 @@ import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import java.util.Arrays; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -193,6 +194,12 @@ public VariantQuery includeSampleData(String value) { put(VariantQueryParam.INCLUDE_SAMPLE_DATA.key(), value); return this; } + + public VariantQuery includeSampleData(Collection value) { + put(VariantQueryParam.INCLUDE_SAMPLE_DATA.key(), value); + return this; + } + public String includeSampleData() { return getString(VariantQueryParam.INCLUDE_SAMPLE_DATA.key()); } @@ -257,6 +264,12 @@ public VariantQuery includeFile(String value) { put(VariantQueryParam.INCLUDE_FILE.key(), value); return this; } + + public VariantQuery includeFile(Collection value) { + put(VariantQueryParam.INCLUDE_FILE.key(), value); + return this; + } + public VariantQuery includeFileAll() { return includeFile(ParamConstants.ALL); } diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorTest.java index 1476416b6e2..1f305574a64 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorTest.java @@ -2285,7 +2285,12 @@ public void testReturnNoneFiles() { queryResult = query(new Query(INCLUDE_FILE.key(), VariantQueryUtils.NONE).append(INCLUDE_SAMPLE.key(), ALL), new QueryOptions()); assertEquals(allVariants.getResults().size(), queryResult.getResults().size()); for (Variant variant : queryResult.getResults()) { - assertThat(variant.getStudies().get(0).getFiles(), is(Collections.emptyList())); + if (variant.getLengthReference() == 0 || variant.getLengthAlternate() == 0) { + assertThat(variant.getStudies().get(0).getFiles(), is(not(Collections.emptyList()))); + assertThat(variant.getStudies().get(0).getFiles().get(0).getCall(), is(not(nullValue()))); + } else { + assertThat(variant.getStudies().get(0).getFiles(), is(Collections.emptyList())); + } assertThat(new HashSet<>(variant.getStudies().get(0).getSampleDataKeys()), is(FORMAT)); } } diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/DummyTestAnnotator.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/DummyTestAnnotator.java index 71f5135e08c..b36e8fa90ad 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/DummyTestAnnotator.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/DummyTestAnnotator.java @@ -59,7 +59,7 @@ public List annotate(List variants) throws VariantAn public ProjectMetadata.VariantAnnotationMetadata getVariantAnnotationMetadata() throws VariantAnnotatorException { return new ProjectMetadata.VariantAnnotationMetadata(-1, null, null, new ProjectMetadata.VariantAnnotatorProgram("MyAnnotator", key, null), - Collections.singletonList(new ObjectMap("data", "genes")), null); + Collections.singletonList(new ObjectMap("data", "genes")), null,null); } } diff --git a/opencga-storage/opencga-storage-core/src/test/resources/hrdetect_output_38.tsv b/opencga-storage/opencga-storage-core/src/test/resources/hrdetect_output_38.tsv index f69d855e19c..14b9e346363 100644 --- a/opencga-storage/opencga-storage-core/src/test/resources/hrdetect_output_38.tsv +++ b/opencga-storage/opencga-storage-core/src/test/resources/hrdetect_output_38.tsv @@ -1,2 +1,2 @@ intercept del.mh.prop SNV3 SV3 SV5 hrd SNV8 Probability -AR2.10039966-01T -3.364 6.1388694855816 2.93200322296942 -0.876753168376584 1.62647246717118 -0.102769988932733 0.110259929136027 0.998444003582779 +AR2.10039966-01T -3.364 -1.70122966415816 -0.949832911392405 -0.876753168376584 -1.10515509103169 -1.95208666666667 -0.125665303554577 4.21293910790655e-05 diff --git a/opencga-storage/opencga-storage-core/src/test/resources/variant-large-sv.vcf b/opencga-storage/opencga-storage-core/src/test/resources/variant-large-sv.vcf new file mode 100644 index 00000000000..7038eb551fb --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/test/resources/variant-large-sv.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.1 +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##command=seq 1000000 500 3000000 | while read i ; do echo -e "chr1\t$i\t.\tA\tC\t$RANDOM\tPASS\t.\tGT\t0/1\t1/1\t1|0\t0|1" ; done +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA19600 NA19660 NA19661 NA19685 +chr1 1000000 . A 6067 PASS END=4000000 GT 0/1 1/1 1|0 0|1 +chr1 4000200 . A C 5536 PASS . GT 0/1 1/1 1|0 0|1 \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopLocalLoadVariantStoragePipeline.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopLocalLoadVariantStoragePipeline.java index b67fa9887e5..063219f7654 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopLocalLoadVariantStoragePipeline.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopLocalLoadVariantStoragePipeline.java @@ -50,6 +50,7 @@ import org.opencb.opencga.storage.hadoop.variant.archive.VariantHBaseArchiveDataWriter; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDBAdaptor; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDBLoader; +import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema; import org.opencb.opencga.storage.hadoop.variant.load.VariantHadoopDBWriter; import org.opencb.opencga.storage.hadoop.variant.transform.VariantSliceReader; import org.opencb.opencga.storage.hadoop.variant.transform.VariantToVcfSliceConverterTask; @@ -83,6 +84,7 @@ public class HadoopLocalLoadVariantStoragePipeline extends HadoopVariantStorageP private int taskId; private HashSet loadedGenotypes; private int sampleIndexVersion; + private int largestVariantLength; public HadoopLocalLoadVariantStoragePipeline(StorageConfiguration configuration, VariantHadoopDBAdaptor dbAdaptor, IOConnectorProvider ioConnectorProvider, @@ -266,6 +268,7 @@ protected void loadFromProto(URI input, URI outdir, ArchiveTableHelper helper, P VariantHadoopDBWriter variantsWriter = newVariantHadoopDBWriter(); List sampleIds = new ArrayList<>(getMetadataManager().getFileMetadata(getStudyId(), getFileId()).getSamples()); SampleIndexDBLoader sampleIndexDBLoader = newSampleIndexDBLoader(sampleIds); + GetLargestVariantTask largestVariantTask = new GetLargestVariantTask(); // ((TaskMetadata) t -> t) // .then(archiveWriter) @@ -294,6 +297,7 @@ protected void loadFromProto(URI input, URI outdir, ArchiveTableHelper helper, P } List variants = converter.convert(slice); + largestVariantTask.apply(variants); variants = VariantHadoopDBWriter.filterVariantsNotFromThisSlice(slice.getPosition(), variants); variantsWriter.write(variants); if (sampleIndexDBLoader != null) { @@ -327,6 +331,7 @@ protected void loadFromProto(URI input, URI outdir, ArchiveTableHelper helper, P // Update list of loaded genotypes this.loadedGenotypes = sampleIndexDBLoader.getLoadedGenotypes(); this.sampleIndexVersion = sampleIndexDBLoader.getSampleIndexVersion(); + this.largestVariantLength = largestVariantTask.getMaxLength(); } } @@ -354,6 +359,7 @@ protected void loadFromAvroWithArchive(URI input, URI outdir, ArchiveTableHelper boolean stdin = options.getBoolean(STDIN.key(), STDIN.defaultValue()); int sliceBufferSize = options.getInt(ARCHIVE_SLICE_BUFFER_SIZE.key(), ARCHIVE_SLICE_BUFFER_SIZE.defaultValue()); VariantReader variantReader = variantReaderUtils.getVariantReader(input, helper.getStudyMetadata(), stdin); + GetLargestVariantTask largestVariantTask = new GetLargestVariantTask(); AbstractDuplicatedVariantsResolver resolver = new DuplicatedVariantsResolverFactory(getOptions(), ioConnectorProvider) .getResolver(UriUtils.fileName(input), outdir); VariantDeduplicationTask dedupTask = new DuplicatedVariantsResolverFactory(getOptions(), ioConnectorProvider) @@ -374,7 +380,7 @@ protected void loadFromAvroWithArchive(URI input, URI outdir, ArchiveTableHelper String nonRefFilter = options.getString(ARCHIVE_NON_REF_FILTER.key()); // TODO: Move "SampleIndexDBLoader" to Write step so we can increase the number of threads GroupedVariantsTask task = new GroupedVariantsTask(archiveWriter, hadoopDBWriter, sampleIndexDBLoader, - null, archiveFields, nonRefFilter); + null, archiveFields, nonRefFilter, largestVariantTask); ParallelTaskRunner>, Object> ptr = new ParallelTaskRunner<>(sliceReader, task, null, config); @@ -389,6 +395,7 @@ protected void loadFromAvroWithArchive(URI input, URI outdir, ArchiveTableHelper // Update list of loaded genotypes this.loadedGenotypes = sampleIndexDBLoader.getLoadedGenotypes(); this.sampleIndexVersion = sampleIndexDBLoader.getSampleIndexVersion(); + this.largestVariantLength = largestVariantTask.getMaxLength(); } } @@ -397,7 +404,7 @@ protected void loadFromAvroWithoutArchive(URI input, URI outdir, ArchiveTableHel int studyId = helper.getStudyId(); int fileId = Integer.parseInt(helper.getFileMetadata().getId()); - + List sampleIds = new ArrayList<>(getMetadataManager().getFileMetadata(studyId, fileId).getSamples()); // Reader boolean stdin = options.getBoolean(STDIN.key(), STDIN.defaultValue()); @@ -406,12 +413,12 @@ protected void loadFromAvroWithoutArchive(URI input, URI outdir, ArchiveTableHel .getResolver(UriUtils.fileName(input), outdir); VariantDeduplicationTask dedupTask = new DuplicatedVariantsResolverFactory(getOptions(), ioConnectorProvider) .getTask(resolver); - DataReader reader = variantReader.then(dedupTask); + GetLargestVariantTask largestVariantTask = new GetLargestVariantTask(); + DataReader reader = variantReader.then(dedupTask).then(largestVariantTask); // Variants Writer VariantHadoopDBWriter hadoopDBWriter = newVariantHadoopDBWriter(); // Sample Index Writer - List sampleIds = new ArrayList<>(getMetadataManager().getFileMetadata(studyId, fileId).getSamples()); SampleIndexDBLoader sampleIndexDBLoader = newSampleIndexDBLoader(sampleIds); Task progressLoggerTask = progressLogger @@ -436,6 +443,7 @@ protected void loadFromAvroWithoutArchive(URI input, URI outdir, ArchiveTableHel // Update list of loaded genotypes this.loadedGenotypes = sampleIndexDBLoader.getLoadedGenotypes(); this.sampleIndexVersion = sampleIndexDBLoader.getSampleIndexVersion(); + this.largestVariantLength = largestVariantTask.getMaxLength(); } } @@ -490,6 +498,8 @@ private void logLoadResults(VariantFileMetadata variantFileMetadata, int duplica } } } + getLoadStats().put("largestVariantLength", largestVariantLength); + logger.info("Largest variant found in VCF had a length of : {}", largestVariantLength); logger.info("============================================================"); } @@ -503,16 +513,50 @@ public URI postLoad(URI input, URI output) throws StorageEngineException { metadataManager.setStatus(getStudyId(), taskId, Status.READY); boolean loadSampleIndex = YesNoAuto.parse(getOptions(), LOAD_SAMPLE_INDEX.key()).orYes().booleanValue(); - if (loadSampleIndex) { - for (Integer sampleId : metadataManager.getSampleIdsFromFileId(getStudyId(), getFileId())) { - // Worth to check first to avoid too many updates in scenarios like 1000G - SampleMetadata sampleMetadata = metadataManager.getSampleMetadata(getStudyId(), sampleId); - if (sampleMetadata.getSampleIndexStatus(sampleIndexVersion) != Status.READY) { - metadataManager.updateSampleMetadata(getStudyId(), sampleId, - s -> s.setSampleIndexStatus(Status.READY, sampleIndexVersion)); + for (Integer sampleId : metadataManager.getSampleIdsFromFileId(getStudyId(), getFileId())) { + // Worth to check first to avoid too many updates in scenarios like 1000G + SampleMetadata sampleMetadata = metadataManager.getSampleMetadata(getStudyId(), sampleId); + boolean updateSampleIndexStatus = loadSampleIndex && sampleMetadata.getSampleIndexStatus(sampleIndexVersion) != Status.READY; + int actualLargestVariantLength = sampleMetadata.getAttributes().getInt(SampleIndexSchema.LARGEST_VARIANT_LENGTH); + boolean alreadyLoadedFiles = false; + if (sampleMetadata.getFiles().size() > 1) { + int loadedFiles = 0; + for (Integer fileId : sampleMetadata.getFiles()) { + if (metadataManager.isFileIndexed(getStudyId(), fileId)) { + loadedFiles++; + } + } + if (loadedFiles > 1) { + alreadyLoadedFiles = true; + } + metadataManager.getFileIdsFromSampleId(1, 1, true); + } + boolean updateLargestVariantLength; + if (alreadyLoadedFiles) { + if (actualLargestVariantLength > 0) { + // Already loaded files, with a valid value. Update if needed. + updateLargestVariantLength = largestVariantLength > actualLargestVariantLength; + } else { + // Already loaded files without a valid value. Do not set a value, as it might be smaller than previous files. + updateLargestVariantLength = false; } + } else { + // First file loaded. Update value + updateLargestVariantLength = true; + } + + if (updateSampleIndexStatus || updateLargestVariantLength) { + metadataManager.updateSampleMetadata(getStudyId(), sampleId, s -> { + if (updateSampleIndexStatus) { + s.setSampleIndexStatus(Status.READY, sampleIndexVersion); + } + if (updateLargestVariantLength) { + s.getAttributes().put(SampleIndexSchema.LARGEST_VARIANT_LENGTH, largestVariantLength); + } + }); } } + boolean loadArchive = YesNoAuto.parse(getOptions(), LOAD_ARCHIVE.key()).orYes().booleanValue(); if (loadArchive) { metadataManager.updateFileMetadata(getStudyId(), getFileId(), fileMetadata -> { @@ -578,18 +622,20 @@ protected static class GroupedVariantsTask implements Task otherTask; GroupedVariantsTask(VariantHBaseArchiveDataWriter archiveWriter, VariantHadoopDBWriter hadoopDBWriter, - SampleIndexDBLoader sampleIndexDBLoader, ProgressLogger progressLogger) { - this(archiveWriter, hadoopDBWriter, sampleIndexDBLoader, progressLogger, null, null); - } - - GroupedVariantsTask(VariantHBaseArchiveDataWriter archiveWriter, VariantHadoopDBWriter hadoopDBWriter, - SampleIndexDBLoader sampleIndexDBLoader, ProgressLogger progressLogger, String fields, String nonRefFilter) { + SampleIndexDBLoader sampleIndexDBLoader, ProgressLogger progressLogger, String fields, String nonRefFilter, + Task otherTask) { this.converterTask = new VariantToVcfSliceConverterTask(progressLogger, fields, nonRefFilter); this.archiveWriter = Objects.requireNonNull(archiveWriter); this.hadoopDBWriter = Objects.requireNonNull(hadoopDBWriter); this.sampleIndexDBLoader = sampleIndexDBLoader; + if (otherTask == null) { + this.otherTask = t -> t; + } else { + this.otherTask = otherTask; + } } @Override @@ -606,10 +652,11 @@ public void pre() throws Exception { } converterTask.pre(); + otherTask.pre(); } @Override - public List apply(List>> batch) { + public List apply(List>> batch) throws Exception { for (ImmutablePair> pair : batch) { List variants = VariantHadoopDBWriter.filterVariantsNotFromThisSlice(pair.getKey(), pair.getValue()); hadoopDBWriter.write(variants); @@ -617,6 +664,7 @@ public List apply(List>> batch) { if (sampleIndexDBLoader != null) { sampleIndexDBLoader.write(variants); } + otherTask.apply(variants); } List slices = converterTask.apply(batch); @@ -639,7 +687,36 @@ public void post() throws Exception { } converterTask.post(); + otherTask.post(); } } + private static class GetLargestVariantTask implements Task { + + private final AtomicInteger maxLength = new AtomicInteger(); + + @Override + public List apply(List variants) { + int localMax = maxLength.get(); + boolean newValue = false; + for (Variant variant : variants) { + if (variant.getLengthReference() > localMax) { + localMax = variant.getLengthReference(); + newValue = true; + } + } + if (newValue) { + updateMaxLength(localMax); + } + return variants; + } + + private void updateMaxLength(int local) { + maxLength.updateAndGet(v -> Math.max(v, local)); + } + + public int getMaxLength() { + return maxLength.get(); + } + } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java index 3b487eaa7aa..ab6dfaf87ef 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java @@ -95,6 +95,9 @@ public enum HadoopVariantStorageOptions implements ConfigurationOption { SAMPLE_INDEX_BUILD_MAX_SAMPLES_PER_MR("storage.hadoop.sampleIndex.build.maxSamplesPerMR", 2000), SAMPLE_INDEX_ANNOTATION_MAX_SAMPLES_PER_MR("storage.hadoop.sampleIndex.annotation.maxSamplesPerMR", 2000), SAMPLE_INDEX_FAMILY_MAX_TRIOS_PER_MR("storage.hadoop.sampleIndex.family.maxTriosPerMR", 1000), + SAMPLE_INDEX_QUERY_SAMPLE_INDEX_ONLY_PD_BUFFER("storage.hadoop.sampleIndex.query.sampleIndexOnly.partialData.buffer", 10000), + SAMPLE_INDEX_QUERY_SAMPLE_INDEX_ONLY_PD_BATCH("storage.hadoop.sampleIndex.query.sampleIndexOnly.partialData.batch", 250), + SAMPLE_INDEX_QUERY_EXTENDED_REGION_FILTER("storage.hadoop.sampleIndex.query.extendedRegionFilter.default", 5_000_000), ///////////////////////// // Annotation index table configuration diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/VariantHBaseQueryParser.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/VariantHBaseQueryParser.java index 2d6c3ce8593..1de83e96382 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/VariantHBaseQueryParser.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/VariantHBaseQueryParser.java @@ -225,7 +225,8 @@ public List parseQueryMultiRegion(VariantQueryProjection selectElements, Q } List scans; - if ((regions.isEmpty() || regions.size() == 1) && variants.isEmpty() && idIntersect.isEmpty()) { + int numLocusFilters = regions.size() + variants.size() + idIntersect.size(); + if (numLocusFilters <= 1) { scans = Collections.singletonList(parseQuery(selectElements, query, options)); } else { scans = new ArrayList<>(regions.size() + variants.size() + idIntersect.size()); @@ -236,7 +237,7 @@ public List parseQueryMultiRegion(VariantQueryProjection selectElements, Q subQuery.remove(ID.key()); subQuery.remove(ID_INTERSECT.key()); - subQuery.put(REGION.key(), "MULTI_REGION"); + subQuery.put(REGION.key(), "MULTI_REGION (#" + numLocusFilters + ")"); Scan templateScan = parseQuery(selectElements, subQuery, options); for (Region region : regions) { @@ -254,7 +255,7 @@ public List parseQueryMultiRegion(VariantQueryProjection selectElements, Q subQuery.put(ID.key(), variant); try { Scan scan = new Scan(templateScan); - scan.setSmall(true); + scan.setOneRowLimit(); addVariantIdFilter(scan, variant); scans.add(scan); } catch (IOException e) { @@ -301,6 +302,17 @@ public Scan parseQuery(VariantQueryProjection selectElements, Query query, Query Variant variant = VariantQueryUtils.toVariant(ids.get(0)); addVariantIdFilter(scan, variant); regionOrVariant = variant; + scan.setOneRowLimit(); + } + if (isValidParam(query, ID_INTERSECT)) { + List ids = query.getAsStringList(ID_INTERSECT.key()); + if (ids.size() != 1) { + throw VariantQueryException.malformedParam(ID_INTERSECT, ids.toString(), "Unsupported multiple variant ids filter"); + } + Variant variant = VariantQueryUtils.toVariant(ids.get(0)); + addVariantIdFilter(scan, variant); + regionOrVariant = variant; + scan.setOneRowLimit(); } // if (isValidParam(query, ID)) { @@ -587,21 +599,26 @@ public Scan parseQuery(VariantQueryProjection selectElements, Query query, Query } scan.setReversed(options.getString(QueryOptions.ORDER, QueryOptions.ASCENDING).equals(QueryOptions.DESCENDING)); - logger.info("----------------------------"); - logger.info("StartRow = " + Bytes.toStringBinary(scan.getStartRow())); - logger.info("StopRow = " + Bytes.toStringBinary(scan.getStopRow())); - if (regionOrVariant != null) { - logger.info("\tRegion = " + regionOrVariant); - } - logger.info("columns (" + scan.getFamilyMap().getOrDefault(family, Collections.emptyNavigableSet()).size() + ") = " - + scan.getFamilyMap().getOrDefault(family, Collections.emptyNavigableSet()) - .stream().map(Bytes::toString).collect(Collectors.joining(","))); - logger.info("MaxResultSize = " + scan.getMaxResultSize()); - logger.info("Filters = " + scan.getFilter()); - if (!scan.getTimeRange().isAllTime()) { - logger.info("TimeRange = " + scan.getTimeRange()); - } - logger.info("Batch = " + scan.getBatch()); + if (!options.getBoolean(VariantHadoopDBAdaptor.QUIET)) { + logger.info("----------------------------"); + String startRow = Bytes.toStringBinary(scan.getStartRow()); + if (!startRow.startsWith("MULTI_REGION")) { + logger.info("StartRow = " + startRow); + logger.info("StopRow = " + Bytes.toStringBinary(scan.getStopRow())); + } + if (regionOrVariant != null) { + logger.info("\tRegion = " + regionOrVariant); + } + logger.info("columns (" + scan.getFamilyMap().getOrDefault(family, Collections.emptyNavigableSet()).size() + ") = " + + scan.getFamilyMap().getOrDefault(family, Collections.emptyNavigableSet()) + .stream().map(Bytes::toString).collect(Collectors.joining(","))); + logger.info("MaxResultSize = " + scan.getMaxResultSize()); + logger.info("Filters = " + scan.getFilter()); + if (!scan.getTimeRange().isAllTime()) { + logger.info("TimeRange = " + scan.getTimeRange()); + } + logger.info("Batch = " + scan.getBatch()); + } return scan; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/VariantHadoopDBAdaptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/VariantHadoopDBAdaptor.java index e4b1acc4980..3d6fe440da1 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/VariantHadoopDBAdaptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/adaptors/VariantHadoopDBAdaptor.java @@ -90,6 +90,7 @@ public class VariantHadoopDBAdaptor implements VariantDBAdaptor { public static final String NATIVE = "native"; + public static final String QUIET = "quiet"; public static final QueryParam ANNOT_NAME = QueryParam.create("annotName", "", Type.TEXT); protected static Logger logger = LoggerFactory.getLogger(VariantHadoopDBAdaptor.class); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/converters/study/HBaseToStudyEntryConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/converters/study/HBaseToStudyEntryConverter.java index c6921fdfb55..7acd417c78e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/converters/study/HBaseToStudyEntryConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/converters/study/HBaseToStudyEntryConverter.java @@ -240,11 +240,7 @@ protected StudyEntry convert(List sampleDataMap, } Map> alternateFileMap = new HashMap<>(); - for (Pair pair : filesMap) { - String fileId = pair.getKey(); - PhoenixArray fileColumn = pair.getValue(); - addFileEntry(studyMetadata, variant, studyEntry, fileId, fileColumn, alternateFileMap); - } + addFileEntries(filesMap, variant, studyMetadata, studyEntry, alternateFileMap); addSecondaryAlternates(variant, studyEntry, studyMetadata, alternateFileMap); fillEmptySamplesData(studyEntry, studyMetadata, fillMissingColumnValue); @@ -386,8 +382,36 @@ private List remapSamplesData(List sampleData, int[] formatsMap) } } - private void addFileEntry(StudyMetadata studyMetadata, Variant variant, StudyEntry studyEntry, String fileIdStr, - PhoenixArray fileColumn, Map> alternateFileMap) { + private void addFileEntries(List> filesMap, Variant variant, StudyMetadata studyMetadata, + StudyEntry studyEntry, Map> alternateFileMap) { + // Some file entries might be added only for their "OriginalCall" info. + // These would be added at the end, but only if the original call is not already present. + ArrayList filesOnlyCall = new ArrayList<>(); + for (Pair pair : filesMap) { + String fileId = pair.getKey(); + PhoenixArray fileColumn = pair.getValue(); + addFileEntry(studyMetadata, variant, fileId, fileColumn, alternateFileMap, studyEntry.getFiles(), filesOnlyCall); + } + if (!filesOnlyCall.isEmpty()) { + // Create a set of original calls to avoid duplicates + Set variantIds = new HashSet<>(); + for (FileEntry fileEntry : studyEntry.getFiles()) { + if (fileEntry.getCall() != null) { + variantIds.add(fileEntry.getCall().getVariantId()); + } + } + for (FileEntry fileEntry : filesOnlyCall) { + if (variantIds.add(fileEntry.getCall().getVariantId())) { + // Not seen, so add to the list of file entries + studyEntry.getFiles().add(fileEntry); + } + } + } + } + + private void addFileEntry(StudyMetadata studyMetadata, Variant variant, String fileIdStr, + PhoenixArray fileColumn, Map> alternateFileMap, + List files, List filesOnlyCall) { int fileId = Integer.parseInt(fileIdStr); String alternateRaw = (String) (fileColumn.getElement(FILE_SEC_ALTS_IDX)); String alternate = normalizeNonRefAlternateCoordinate(variant, alternateRaw); @@ -399,10 +423,10 @@ private void addFileEntry(StudyMetadata studyMetadata, Variant variant, StudyEnt if (configuration.getProjection() != null && !configuration.getProjection().getStudy(studyMetadata.getId()).getFiles().contains(fileId)) { - // TODO: Should we return the original CALL? -// if (call != null && !call.isEmpty()) { -// studyEntry.getFiles().add(new FileEntry(fileName, call, Collections.emptyMap())); -// } + if (call != null && !call.isEmpty()) { + OriginalCall originalCall = parseOriginalCall(call); + filesOnlyCall.add(new FileEntry(fileName, originalCall, Collections.emptyMap())); + } return; } @@ -412,8 +436,7 @@ private void addFileEntry(StudyMetadata studyMetadata, Variant variant, StudyEnt VariantOverlappingStatus overlappingStatus = VariantOverlappingStatus.valueFromShortString((String) (fileColumn.getElement(FILE_VARIANT_OVERLAPPING_STATUS_IDX))); if (call != null && !call.isEmpty()) { - int i = call.lastIndexOf(':'); - originalCall = new OriginalCall(call.substring(0, i), Integer.valueOf(call.substring(i + 1))); + originalCall = parseOriginalCall(call); } else if (overlappingStatus.equals(VariantOverlappingStatus.MULTI)) { attributes.put(StudyEntry.FILTER, "SiteConflict"); AlternateCoordinate alternateCoordinate = getAlternateCoordinate(alternateRaw); @@ -424,7 +447,13 @@ private void addFileEntry(StudyMetadata studyMetadata, Variant variant, StudyEnt alternateCoordinate.getReference(), alternateCoordinate.getAlternate()).toString(), 0); } - studyEntry.getFiles().add(new FileEntry(fileName, originalCall, attributes)); + files.add(new FileEntry(fileName, originalCall, attributes)); + } + + private OriginalCall parseOriginalCall(String call) { + int i = call.lastIndexOf(':'); + OriginalCall originalCall = new OriginalCall(call.substring(0, i), Integer.valueOf(call.substring(i + 1))); + return originalCall; } public static HashMap convertFileAttributes(PhoenixArray fileColumn, List fixedAttributes) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java index 9211318768a..4e33e778ca4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexOnlyVariantQueryExecutor.java @@ -6,21 +6,22 @@ import org.apache.commons.lang3.time.StopWatch; import org.opencb.biodata.models.variant.StudyEntry; import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.FileEntry; import org.opencb.biodata.models.variant.avro.SampleEntry; import org.opencb.biodata.tools.commons.Converter; import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.commons.run.Task; +import org.opencb.opencga.core.common.BatchUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; import org.opencb.opencga.storage.core.metadata.models.TaskMetadata; import org.opencb.opencga.storage.core.utils.iterators.CloseableIterator; -import org.opencb.opencga.storage.core.variant.adaptors.VariantField; -import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; -import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; +import org.opencb.opencga.storage.core.variant.adaptors.*; import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator; import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery; import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; @@ -41,8 +42,12 @@ import java.io.IOException; import java.util.*; import java.util.concurrent.*; +import java.util.stream.Collectors; +import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.NONE; import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.addSamplesMetadataIfRequested; +import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions.SAMPLE_INDEX_QUERY_SAMPLE_INDEX_ONLY_PD_BATCH; +import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions.SAMPLE_INDEX_QUERY_SAMPLE_INDEX_ONLY_PD_BUFFER; import static org.opencb.opencga.storage.hadoop.variant.index.SampleIndexVariantQueryExecutor.SAMPLE_INDEX_TABLE_SOURCE; /** @@ -61,6 +66,11 @@ public class SampleIndexOnlyVariantQueryExecutor extends VariantQueryExecutor { private static final ExecutorService THREAD_POOL = Executors.newCachedThreadPool(new BasicThreadFactory.Builder() .namingPattern("sample-index-async-count-%s") .build()); + private static final ExecutorService THREAD_POOL_FETCH_CALL = Executors.newCachedThreadPool(new BasicThreadFactory.Builder() + .namingPattern("sample-index-fetch-call-%s") + .build()); + private int partialDataBufferSize; + private int partialDataBatchSize; public SampleIndexOnlyVariantQueryExecutor(VariantHadoopDBAdaptor dbAdaptor, SampleIndexDBAdaptor sampleIndexDBAdaptor, String storageEngineId, ObjectMap options) { @@ -69,6 +79,10 @@ public SampleIndexOnlyVariantQueryExecutor(VariantHadoopDBAdaptor dbAdaptor, Sam this.dbAdaptor = dbAdaptor; variantQueryParser = new VariantQueryParser(null, getMetadataManager()); variantQueryProjectionParser = new VariantQueryProjectionParser(getMetadataManager()); + partialDataBufferSize = options.getInt(SAMPLE_INDEX_QUERY_SAMPLE_INDEX_ONLY_PD_BUFFER.key(), + SAMPLE_INDEX_QUERY_SAMPLE_INDEX_ONLY_PD_BUFFER.defaultValue()); + partialDataBatchSize = options.getInt(SAMPLE_INDEX_QUERY_SAMPLE_INDEX_ONLY_PD_BATCH.key(), + SAMPLE_INDEX_QUERY_SAMPLE_INDEX_ONLY_PD_BATCH.defaultValue()); } @Override @@ -143,9 +157,9 @@ protected Object getOrIterator(Query inputQuery, QueryOptions options, boolean i } private VariantDBIterator getVariantDBIterator(SampleIndexQuery sampleIndexQuery, Query inputQuery, QueryOptions options) { - ParsedVariantQuery parseQuery = variantQueryParser.parseQuery(inputQuery, options, true); + ParsedVariantQuery parsedQuery = variantQueryParser.parseQuery(inputQuery, options, true); VariantDBIterator variantIterator; - if (parseQuery.getProjection().getStudyIds().isEmpty()) { + if (parsedQuery.getProjection().getStudyIds().isEmpty()) { logger.info("Using sample index iterator Iterator"); variantIterator = sampleIndexDBAdaptor.iterator(sampleIndexQuery, options); variantIterator = variantIterator.map(v -> v.setId(v.toString())); @@ -158,8 +172,11 @@ private VariantDBIterator getVariantDBIterator(SampleIndexQuery sampleIndexQuery throw VariantQueryException.internalException(e).setQuery(inputQuery); } SampleVariantIndexEntryToVariantConverter converter = - new SampleVariantIndexEntryToVariantConverter(parseQuery, sampleIndexQuery, dbAdaptor.getMetadataManager()); + new SampleVariantIndexEntryToVariantConverter(parsedQuery, sampleIndexQuery, dbAdaptor.getMetadataManager()); variantIterator = VariantDBIterator.wrapper(Iterators.transform(rawIterator, converter::convert)); + AddMissingDataTask task = new AddMissingDataTask( + parsedQuery, sampleIndexQuery, dbAdaptor.getMetadataManager()); + variantIterator = variantIterator.mapBuffered(task::apply, partialDataBufferSize); variantIterator.addCloseable(rawIterator); } return variantIterator; @@ -201,7 +218,6 @@ private boolean isIncludeCovered(SampleIndexQuery sampleIndexQuery, Query inputQ || includeFields.contains(VariantField.STUDIES_FILES) || includeFields.contains(VariantField.STUDIES_ISSUES) || includeFields.contains(VariantField.STUDIES_SCORES) - || includeFields.contains(VariantField.STUDIES_SECONDARY_ALTERNATES) ) { return false; } @@ -298,7 +314,6 @@ enum FamilyRole { sampleName = sampleIndexQuery.getSamplesMap().keySet().iterator().next(); Integer sampleId = metadataManager.getSampleId(studyId, sampleName); - familyRoleOrder = new ArrayList<>(); samplesPosition = new LinkedHashMap<>(); SampleMetadata sampleMetadata = null; // lazy init @@ -369,4 +384,172 @@ public Variant convert(SampleVariantIndexEntry entry) { return v; } } + + private class AddMissingDataTask implements Task { + private final ParsedVariantQuery parsedQuery; + private final String studyName; + private final List samples; + private final List files; + private final List allFiles; + + AddMissingDataTask(ParsedVariantQuery parsedQuery, SampleIndexQuery sampleIndexQuery, + VariantStorageMetadataManager metadataManager) { + this.parsedQuery = parsedQuery; + VariantQueryProjection projection = this.parsedQuery.getProjection(); + + int studyId = projection.getStudyIds().get(0); // only one study + VariantQueryProjection.StudyVariantQueryProjection projectionStudy = projection.getStudy(studyId); + studyName = projectionStudy.getStudyMetadata().getName(); + + if (sampleIndexQuery.getSamplesMap().size() != 1) { + // This should never happen + throw new IllegalStateException("Unexpected number of samples. Expected one, found " + + sampleIndexQuery.getSamplesMap().keySet()); + } + samples = new ArrayList<>(projectionStudy.getSamples().size()); + for (Integer sample : projectionStudy.getSamples()) { + samples.add(metadataManager.getSampleName(studyId, sample)); + } + Set allFileIds = metadataManager.getFileIdsFromSampleIds(studyId, projectionStudy.getSamples(), true); + allFiles = new ArrayList<>(allFileIds.size()); + for (Integer fileId : allFileIds) { + allFiles.add(metadataManager.getFileName(studyId, fileId)); + } + + String sampleName = sampleIndexQuery.getSamplesMap().keySet().iterator().next(); + Integer sampleId = metadataManager.getSampleId(studyId, sampleName); + List fileIds = metadataManager.getFileIdsFromSampleId(studyId, sampleId, true); + files = new ArrayList<>(fileIds.size()); + for (Integer fileId : fileIds) { + files.add(metadataManager.getFileName(studyId, fileId)); + } + } + + @Override + public List apply(List variants) { + // Multi allelic variants, to be read entirely + List multiAllelic = new ArrayList<>(); + // INDELs (non multiallelic) variants, to fetch the original call + List indels = new ArrayList<>(); + for (Variant variant : variants) { + boolean secAlt = false; + for (SampleEntry sample : variant.getStudies().get(0).getSamples()) { + if (GenotypeClass.SEC.test(sample.getData().get(0))) { + secAlt = true; + break; + } + } + if (secAlt) { + multiAllelic.add(variant); + } else { + if (variant.getLengthReference() == 0 || variant.getLengthAlternate() == 0) { + indels.add(variant); + } + } + } + // Process in multiple treads + List> futures = new ArrayList<>(10); + if (!multiAllelic.isEmpty()) { + List> batches = BatchUtils.splitBatches(multiAllelic, partialDataBatchSize); + for (List batch : batches) { + futures.add(THREAD_POOL_FETCH_CALL.submit(() -> addSecondaryAlternates(batch))); + } + } + if (!indels.isEmpty()) { + List> batches = BatchUtils.splitBatches(indels, partialDataBatchSize); + for (List batch : batches) { + futures.add(THREAD_POOL_FETCH_CALL.submit(() -> addOriginalCall(batch, studyName))); + } + } + + StopWatch stopWatch = StopWatch.createStarted(); + for (Future future : futures) { + try { + // Should end in few seconds + future.get(90, TimeUnit.SECONDS); + } catch (InterruptedException | ExecutionException | TimeoutException e) { + throw new VariantQueryException("Error fetching original call for INDELs"); + } + } + logger.info("Fetch {} partial variants in {} in {} threads", + multiAllelic.size() + indels.size(), + TimeUtils.durationToString(stopWatch), + futures.size()); + return variants; + } + + private void addSecondaryAlternates(List toReadFull) { +// StopWatch stopWatch = StopWatch.createStarted(); + Set includeFields = new HashSet<>(VariantField.getIncludeFields(parsedQuery.getInputOptions())); + includeFields.add(VariantField.STUDIES_SECONDARY_ALTERNATES); + includeFields.add(VariantField.STUDIES_FILES); + + QueryOptions options = new QueryOptions(parsedQuery.getInputOptions()); + options.remove(QueryOptions.EXCLUDE); + options.remove(VariantField.SUMMARY); + options.put(QueryOptions.INCLUDE, includeFields); + options.put(VariantHadoopDBAdaptor.QUIET, true); + options.put(VariantHadoopDBAdaptor.NATIVE, true); + + Map fullVariants = dbAdaptor.get(new VariantQuery() + .id(toReadFull) + .study(studyName) + .includeSample(samples) + .includeSampleData(VariantQueryUtils.getIncludeSampleData(parsedQuery.getInputQuery())) + .includeFile(allFiles), + options) + .getResults().stream().collect(Collectors.toMap(Variant::toString, v -> v)); + + for (Variant variant : toReadFull) { + Variant fullVariant = fullVariants.get(variant.toString()); + if (fullVariant == null) { + // TODO: Should we fail here? +// throw new VariantQueryException("Variant " + variant + " not found!"); + logger.warn("Variant " + variant + " not found!"); + continue; + } + StudyEntry fullStudy = fullVariant.getStudies().get(0); + fullStudy.getFiles().forEach(f -> f.setData(Collections.emptyMap())); + fullStudy.setStats(Collections.emptyList()); + variant.setStudies(Collections.singletonList(fullStudy)); + } +// logger.info(" # Fetch {} SEC_ALTS in {}", toReadFull.size(), TimeUtils.durationToString(stopWatch)); + } + + private void addOriginalCall(List variants, String study) { +// StopWatch stopWatch = StopWatch.createStarted(); + Map> filesMap = new HashMap<>(variants.size()); + for (Variant variant : dbAdaptor.iterable( + new Query() + .append(VariantQueryParam.ID.key(), variants) + .append(VariantQueryParam.INCLUDE_FILE.key(), files) + .append(VariantQueryParam.INCLUDE_SAMPLE.key(), NONE) + .append(VariantQueryParam.INCLUDE_STUDY.key(), study), + new QueryOptions() + .append(VariantHadoopDBAdaptor.NATIVE, true) + .append(VariantHadoopDBAdaptor.QUIET, true) + .append(QueryOptions.INCLUDE, Arrays.asList(VariantField.STUDIES_FILES)))) { + + List fileEntries = variant.getStudies().get(0).getFiles(); + // Remove data, as we only want the original call + fileEntries.forEach(fileEntry -> fileEntry.setData(Collections.emptyMap())); + filesMap.put(variant.toString(), fileEntries); + } + + for (Variant variant : variants) { + List fileEntries = filesMap.get(variant.toString()); + if (fileEntries == null) { + // TODO: Should we fail here? +// throw new VariantQueryException("Variant " + variant + " not found!"); + logger.warn("Variant " + variant + " not found!"); + continue; + } + variant.getStudies().get(0).setFiles(fileEntries); + } +// logger.info(" # Fetch {} INDEL original call in {}", filesMap.size(), TimeUtils.durationToString(stopWatch)); + } + + + } + } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/LocusQuery.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/LocusQuery.java index ff031718ebd..2bd4c84f900 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/LocusQuery.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/LocusQuery.java @@ -2,7 +2,6 @@ import org.opencb.biodata.models.core.Region; import org.opencb.biodata.models.variant.Variant; -import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexSchema; import java.util.ArrayList; import java.util.List; @@ -14,7 +13,7 @@ /** * Sample index queries based on position, aligned to SampleIndex chunks. */ -public class LocusQuery { +public class LocusQuery implements Comparable { /** * Region aligned with sampleIndex chunks covering all locus from regions and variants. */ @@ -34,12 +33,6 @@ public LocusQuery(Region chunkRegion, List regions, List varian this.variants = variants; } - public static LocusQuery buildLocusQuery(Region r) { - LocusQuery locusQuery = new LocusQuery(SampleIndexSchema.getChunkRegion(r)); - locusQuery.getRegions().add(r); - return locusQuery; - } - public Region getChunkRegion() { return chunkRegion; } @@ -96,4 +89,9 @@ public String toString() { sb.append('}'); return sb.toString(); } + + @Override + public int compareTo(LocusQuery o) { + return REGION_COMPARATOR.compare(chunkRegion, o.chunkRegion); + } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SampleIndexQuery.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SampleIndexQuery.java index fddc76a5fc2..664ace07747 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SampleIndexQuery.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SampleIndexQuery.java @@ -33,6 +33,7 @@ public class SampleIndexQuery { private final SampleIndexSchema schema; private final Collection locusQueries; + private final int extendedFilteringRegion; private final Set variantTypes; private final String study; private final Map> samplesMap; @@ -59,6 +60,7 @@ public enum MendelianErrorType { public SampleIndexQuery(Collection locusQueries, SampleIndexQuery query) { this.schema = query.schema; this.locusQueries = locusQueries; + this.extendedFilteringRegion = query.extendedFilteringRegion; this.variantTypes = query.variantTypes; this.study = query.study; this.samplesMap = query.samplesMap; @@ -74,14 +76,8 @@ public SampleIndexQuery(Collection locusQueries, SampleIndexQuery qu this.queryOperation = query.queryOperation; } - public SampleIndexQuery(SampleIndexSchema schema, Collection locusQueries, String study, Map> samplesMap, QueryOperation queryOperation) { - this(schema, locusQueries, null, study, samplesMap, Collections.emptySet(), null, Collections.emptyMap(), Collections.emptyMap(), - Collections.emptyMap(), - new SampleAnnotationIndexQuery(schema), Collections.emptySet(), null, false, queryOperation); - } - - public SampleIndexQuery(SampleIndexSchema schema, Collection locusQueries, Set variantTypes, String study, + public SampleIndexQuery(SampleIndexSchema schema, Collection locusQueries, int extendedFilteringRegion, + Set variantTypes, String study, Map> samplesMap, Set multiFileSamplesSet, Set negatedSamples, Map fatherFilter, Map motherFilter, Map> fileFilterMap, @@ -90,6 +86,7 @@ public SampleIndexQuery(SampleIndexSchema schema, Collection locusQu QueryOperation queryOperation) { this.schema = schema; this.locusQueries = locusQueries; + this.extendedFilteringRegion = extendedFilteringRegion; this.variantTypes = variantTypes; this.study = study; this.samplesMap = samplesMap; @@ -113,6 +110,10 @@ public Collection getLocusQueries() { return locusQueries; } + public int getExtendedFilteringRegion() { + return extendedFilteringRegion; + } + public List getAllRegions() { return locusQueries.stream() .map(LocusQuery::getRegions) @@ -163,7 +164,6 @@ public Set getNegatedSamples() { return negatedSamples; } - public boolean isNegated(String sample) { return getNegatedSamples().contains(sample); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SingleSampleIndexQuery.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SingleSampleIndexQuery.java index 7db8c2d0e8b..35a9e67e718 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SingleSampleIndexQuery.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/query/SingleSampleIndexQuery.java @@ -24,7 +24,7 @@ protected SingleSampleIndexQuery(SampleIndexQuery query, String sample) { protected SingleSampleIndexQuery(SampleIndexQuery query, String sample, List gts) { super(query.getSchema(), query.getLocusQueries() == null ? null : new ArrayList<>(query.getLocusQueries()), - query.getVariantTypes() == null ? null : new HashSet<>(query.getVariantTypes()), + query.getExtendedFilteringRegion(), query.getVariantTypes() == null ? null : new HashSet<>(query.getVariantTypes()), query.getStudy(), Collections.singletonMap(sample, gts), query.getMultiFileSamplesSet(), diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AbstractSampleIndexEntryFilter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AbstractSampleIndexEntryFilter.java index 37dbb0180b3..7cb87773f0c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AbstractSampleIndexEntryFilter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/AbstractSampleIndexEntryFilter.java @@ -459,7 +459,7 @@ private boolean filterLocus(Variant variant) { return true; } for (Region region : locusQuery.getRegions()) { - if (region.contains(variant.getChromosome(), variant.getStart())) { + if (region.overlaps(variant.getChromosome(), variant.getStart(), variant.getEnd())) { return true; } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java index d94ec75cab5..c7abaedcde9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java @@ -369,14 +369,8 @@ private long count(SingleSampleIndexQuery query) { long count = 0; for (LocusQuery locusQuery : locusQueries) { // Split region in countable regions - List subLocusQueries; - if (locusQuery != null && locusQuery.getVariants().isEmpty() && locusQuery.getRegions().size() == 1) { - subLocusQueries = splitRegion(locusQuery.getRegions().get(0)) - .stream().map(LocusQuery::buildLocusQuery).collect(Collectors.toList()); - } else { - // Do not split - subLocusQueries = Collections.singletonList(locusQuery); - } + List subLocusQueries = splitLocusQuery(locusQuery); + for (LocusQuery subLocusQuery : subLocusQueries) { boolean noLocusFilter = subLocusQuery == null || (subLocusQuery.getVariants().isEmpty() @@ -466,36 +460,60 @@ protected List getAllLoadedGenotypes(int study) { /** * Split region into regions that match with batches at SampleIndexTable. * - * @param region Region to split + * @param locusQuery Locus query to split * @return List of regions. */ - protected static List splitRegion(Region region) { - List regions; + protected static List splitLocusQuery(LocusQuery locusQuery) { + if (locusQuery == null || !locusQuery.getVariants().isEmpty() || locusQuery.getRegions().size() != 1) { + // Do not split + return Collections.singletonList(locusQuery); + } + Region region = locusQuery.getRegions().get(0); + List locusQueries; if (region.getEnd() - region.getStart() < SampleIndexSchema.BATCH_SIZE) { // Less than one batch. Do not split region - regions = Collections.singletonList(region); + locusQueries = Collections.singletonList(locusQuery); } else if (region.getStart() / SampleIndexSchema.BATCH_SIZE + 1 == region.getEnd() / SampleIndexSchema.BATCH_SIZE && !startsAtBatch(region) && !endsAtBatch(region)) { // Consecutive partial batches. Do not split region - regions = Collections.singletonList(region); + locusQueries = Collections.singletonList(locusQuery); } else { - // Copy region before modifying + locusQueries = new ArrayList<>(3); +// Copy regions before modifying region = new Region(region.getChromosome(), region.getStart(), region.getEnd()); - regions = new ArrayList<>(3); + Region chunkRegion = new Region(locusQuery.getChunkRegion().getChromosome(), + locusQuery.getChunkRegion().getStart(), + locusQuery.getChunkRegion().getEnd()); if (!startsAtBatch(region)) { - int splitPoint = region.getStart() - region.getStart() % SampleIndexSchema.BATCH_SIZE + SampleIndexSchema.BATCH_SIZE; - regions.add(new Region(region.getChromosome(), region.getStart(), splitPoint - 1)); + int splitPoint = SampleIndexSchema.getChunkStartNext(region.getStart()); + Region startRegion = new Region(region.getChromosome(), region.getStart(), splitPoint - 1); + locusQueries.add(new LocusQuery( + // Keep the exceeded start only for the first split. + new Region(chunkRegion.getChromosome(), chunkRegion.getStart(), splitPoint), + Collections.singletonList(startRegion), + Collections.emptyList())); region.setStart(splitPoint); + chunkRegion.setStart(splitPoint); } - regions.add(region); if (!endsAtBatch(region)) { - int splitPoint = region.getEnd() - region.getEnd() % SampleIndexSchema.BATCH_SIZE; - regions.add(new Region(region.getChromosome(), splitPoint, region.getEnd())); + int splitPoint = SampleIndexSchema.getChunkStart(region.getEnd()); + Region endRegion = new Region(region.getChromosome(), splitPoint, region.getEnd()); + locusQueries.add(new LocusQuery( + SampleIndexSchema.getChunkRegion(endRegion, 0), + Collections.singletonList(endRegion), + Collections.emptyList() + )); region.setEnd(splitPoint - 1); + chunkRegion.setEnd(splitPoint); } + locusQueries.add(new LocusQuery( + chunkRegion, + Collections.singletonList(region), + Collections.emptyList())); + locusQueries.sort(LocusQuery::compareTo); } - return regions; + return locusQueries; } protected static boolean matchesWithBatch(Region region) { @@ -711,7 +729,12 @@ public static void printScan(Scan scan) { public static void printQuery(LocusQuery locusQuery) { if (locusQuery != null) { - logger.info("ChunkRegion: [ " + locusQuery.getChunkRegion() + " )"); + Region chunk = locusQuery.getChunkRegion(); + if (chunk.getStart() == 0 && chunk.getEnd() == Integer.MAX_VALUE) { + logger.info("ChunkRegion: [ " + chunk.getChromosome() + " )"); + } else { + logger.info("ChunkRegion: [ " + chunk.getChromosome() + ":" + chunk.getStart() + "-" + chunk.getEnd() + " )"); + } if (!locusQuery.getRegions().isEmpty()) { logger.info(" - Regions: " + locusQuery.getRegions()); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java index 2dda3e69562..d8488e87a8e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDriver.java @@ -538,7 +538,7 @@ private void countSampleGt(Context context, int sampleId, String gt) { public void flush(Context context, String chromosome, int position) throws IOException, InterruptedException { logger.info("Flush {}:{}-{} with {} variants", chromosome, SampleIndexSchema.getChunkStart(position), - SampleIndexSchema.getChunkStart(position) + SampleIndexSchema.BATCH_SIZE, + SampleIndexSchema.getChunkStartNext(position), variantsInBatch); variantsInBatch = 0; for (SampleIndexEntryPutBuilder builder : samplesMap.values()) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java index 5c0b2e77570..3731d3ff6f4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java @@ -20,6 +20,7 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.query.*; +import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions; import org.opencb.opencga.storage.hadoop.variant.index.annotation.CtBtFtCombinationIndexSchema; import org.opencb.opencga.storage.hadoop.variant.index.core.IndexField; import org.opencb.opencga.storage.hadoop.variant.index.core.RangeIndexField; @@ -51,15 +52,18 @@ public class SampleIndexQueryParser { private static Logger logger = LoggerFactory.getLogger(SampleIndexQueryParser.class); private final VariantStorageMetadataManager metadataManager; private final SampleIndexSchemaFactory schemaFactory; + private final int extendedFilteringRegionDefault; public SampleIndexQueryParser(VariantStorageMetadataManager metadataManager) { - this.metadataManager = metadataManager; - this.schemaFactory = new SampleIndexSchemaFactory(metadataManager); + this(metadataManager, new SampleIndexSchemaFactory(metadataManager)); } public SampleIndexQueryParser(VariantStorageMetadataManager metadataManager, SampleIndexSchemaFactory schemaFactory) { this.metadataManager = metadataManager; this.schemaFactory = schemaFactory; + this.extendedFilteringRegionDefault = metadataManager.getConfiguration().getInt( + HadoopVariantStorageOptions.SAMPLE_INDEX_QUERY_EXTENDED_REGION_FILTER.key(), + HadoopVariantStorageOptions.SAMPLE_INDEX_QUERY_EXTENDED_REGION_FILTER.defaultValue()); } /** @@ -528,9 +532,20 @@ public SampleIndexQuery parse(Query query) { // } } - Collection regionGroups = buildLocusQueries(regions, variants); + int extendedFilteringRegion = 0; + for (String sample : sampleGenotypeQuery.keySet()) { + SampleMetadata sampleMetadata = getSampleMetadata(sampleMetadatas, sample, studyId); + extendedFilteringRegion = Math.max(extendedFilteringRegion, sampleMetadata.getAttributes() + .getInt(SampleIndexSchema.LARGEST_VARIANT_LENGTH, -1)); + } + if (extendedFilteringRegion <= 0) { + extendedFilteringRegion = extendedFilteringRegionDefault; + } + + Collection regionGroups = buildLocusQueries(regions, variants, extendedFilteringRegion); - return new SampleIndexQuery(schema, regionGroups, variantTypes, study, sampleGenotypeQuery, multiFileSamples, negatedSamples, + return new SampleIndexQuery(schema, regionGroups, extendedFilteringRegion, variantTypes, study, + sampleGenotypeQuery, multiFileSamples, negatedSamples, fatherFilterMap, motherFilterMap, fileIndexMap, annotationIndexQuery, mendelianErrorSet, mendelianErrorType, includeParentsField, queryOperation); } @@ -563,13 +578,14 @@ private SampleMetadata getSampleMetadata(Map sampleMetad * * @param regions List of regions to group * @param variants List of variants to group + * @param extendedFilteringRegion number of positions to start filtering before any actual region filter start * @return Locus Queries */ - public static Collection buildLocusQueries(List regions, List variants) { + public static Collection buildLocusQueries(List regions, List variants, int extendedFilteringRegion) { regions = mergeRegions(regions); Map groupsMap = new HashMap<>(); for (Region region : regions) { - Region chunkRegion = SampleIndexSchema.getChunkRegion(region); + Region chunkRegion = SampleIndexSchema.getChunkRegion(region, extendedFilteringRegion); groupsMap.computeIfAbsent(chunkRegion, LocusQuery::new).getRegions().add(region); } for (Variant variant : variants) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java index be24989a7f4..2a82e5d1750 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java @@ -118,6 +118,8 @@ public final class SampleIndexSchema { static final String ANNOTATION_CLINICAL_PREFIX = META_PREFIX + "CL_"; static final byte[] ANNOTATION_CLINICAL_PREFIX_BYTES = Bytes.toBytes(ANNOTATION_CLINICAL_PREFIX); + public static final String LARGEST_VARIANT_LENGTH = "largestVariantLength"; + private final int version; private final SampleIndexConfiguration configuration; private final FileIndexSchema fileIndex; @@ -200,8 +202,12 @@ public static int getChunkStart(Integer start) { return (start / BATCH_SIZE) * BATCH_SIZE; } - public static Region getChunkRegion(Region region) { - return getChunkRegion(region.getChromosome(), region.getStart(), region.getEnd()); + public static int getChunkStartNext(Integer start) { + return getChunkStart(start + SampleIndexSchema.BATCH_SIZE); + } + + public static Region getChunkRegion(Region region, int extendedFilteringRegion) { + return getChunkRegion(region.getChromosome(), Math.max(0, region.getStart() - extendedFilteringRegion), region.getEnd()); } public static Region getChunkRegion(Variant variant) { @@ -209,11 +215,11 @@ public static Region getChunkRegion(Variant variant) { return getChunkRegion(variant.getChromosome(), variant.getStart(), variant.getStart()); } - public static Region getChunkRegion(String chromosome, int start, int end) { + private static Region getChunkRegion(String chromosome, int start, int end) { return new Region(chromosome, SampleIndexSchema.getChunkStart(start), end == Integer.MAX_VALUE ? Integer.MAX_VALUE - : SampleIndexSchema.getChunkStart(end + SampleIndexSchema.BATCH_SIZE)); + : SampleIndexSchema.getChunkStartNext(end)); } public static int getExpectedSize(String chromosome) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/SampleIndexTableRecordReader.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/SampleIndexTableRecordReader.java index 69f4576b7d2..2efd1115846 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/SampleIndexTableRecordReader.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/SampleIndexTableRecordReader.java @@ -20,6 +20,7 @@ import org.opencb.opencga.storage.hadoop.variant.index.query.LocusQuery; import org.opencb.opencga.storage.hadoop.variant.index.query.SampleIndexQuery; import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDBAdaptor; +import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexQueryParser; import org.opencb.opencga.storage.hadoop.variant.metadata.HBaseVariantStorageMetadataDBAdaptorFactory; import org.opencb.opencga.storage.hadoop.variant.utils.HBaseVariantTableNameGenerator; import org.slf4j.Logger; @@ -27,7 +28,6 @@ import java.io.IOException; import java.util.*; -import java.util.stream.Collectors; import static org.opencb.opencga.storage.hadoop.variant.utils.HBaseVariantTableNameGenerator.getDBNameFromVariantsTableName; @@ -222,7 +222,8 @@ public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws if (regions.isEmpty()) { iterator = VariantDBIterator.emptyIterator(); } else { - Collection locusQueries = regions.stream().map(LocusQuery::buildLocusQuery).collect(Collectors.toList()); + Collection locusQueries = SampleIndexQueryParser + .buildLocusQueries(regions, Collections.emptyList(), sampleIndexQuery.getExtendedFilteringRegion()); SampleIndexQuery query = new SampleIndexQuery(locusQueries, sampleIndexQuery); iterator = sampleIndexDBAdaptor.iterator(query); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSplitDataTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSplitDataTest.java index b68a8a85342..5d77e7dc375 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSplitDataTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineSplitDataTest.java @@ -412,7 +412,8 @@ public void testLoadByRegion() throws Exception { assertEquals(TaskMetadata.Status.NONE, sampleMetadata.getSampleIndexAnnotationStatus(1)); } - checkVariantsTable(studyId_split, studyId_normal, new VariantQuery().includeSample(ParamConstants.ALL), new QueryOptions(QueryOptions.EXCLUDE, VariantField.STUDIES_FILES)); + checkVariantsTable(studyId_split, studyId_normal, new VariantQuery().includeSample(ParamConstants.ALL), new QueryOptions(QueryOptions.EXCLUDE, VariantField.STUDIES_FILES), + v -> v.getStudies().get(0).getFiles().forEach(file -> file.setFileId(""))); checkSampleIndex(studyId_split, studyId_normal); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopDBWriterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopDBWriterTest.java index 714435ac771..289052ad00f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopDBWriterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopDBWriterTest.java @@ -268,7 +268,7 @@ private void loadVariantsBasic(StudyMetadata sc, int fileId, List varia fileId, metadataManager, dbAdaptor.getHBaseManager(), false, false); // TaskMetadata - HadoopLocalLoadVariantStoragePipeline.GroupedVariantsTask task = new HadoopLocalLoadVariantStoragePipeline.GroupedVariantsTask(archiveWriter, hadoopDBWriter, null, null); + HadoopLocalLoadVariantStoragePipeline.GroupedVariantsTask task = new HadoopLocalLoadVariantStoragePipeline.GroupedVariantsTask(archiveWriter, hadoopDBWriter, null, null, null, null, null); ParallelTaskRunner.Config config = ParallelTaskRunner.Config.builder().setNumTasks(1).setBatchSize(1).build(); ParallelTaskRunner>, Object> ptr = diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptorTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptorTest.java index fa3e6cfbf75..03e8b3cd4fb 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptorTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptorTest.java @@ -9,9 +9,11 @@ import org.opencb.opencga.core.testclassification.duration.ShortTests; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; -import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.dummy.DummyVariantStorageMetadataDBAdaptorFactory; +import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.hadoop.utils.HBaseManager; +import org.opencb.opencga.storage.hadoop.variant.index.query.LocusQuery; +import org.opencb.opencga.storage.hadoop.variant.index.query.SampleAnnotationIndexQuery; import org.opencb.opencga.storage.hadoop.variant.index.query.SampleIndexQuery; import java.util.Arrays; @@ -40,38 +42,45 @@ public void testSampleIdFF() throws Exception { String sampleName = "FF"; metadataManager.unsecureUpdateSampleMetadata(studyId, new SampleMetadata(studyId, sampleId, sampleName)); - SampleIndexQuery query = new SampleIndexQuery(SampleIndexSchema.defaultSampleIndexSchema(), Collections.emptyList(), "ST", - Collections.singletonMap(sampleName, Collections.singletonList("0/1")), VariantQueryUtils.QueryOperation.AND); + SampleIndexQuery query = new SampleIndexQuery(SampleIndexSchema.defaultSampleIndexSchema(), Collections.emptyList(), 0, null, "ST", + Collections.singletonMap(sampleName, Collections.singletonList("0/1")), Collections.emptySet(), null, Collections.emptyMap(), + Collections.emptyMap(), Collections.emptyMap(), new SampleAnnotationIndexQuery(SampleIndexSchema.defaultSampleIndexSchema()), + Collections.emptySet(), null, false, VariantQueryUtils.QueryOperation.AND); new SampleIndexDBAdaptor(new HBaseManager(new Configuration()), null, metadataManager).parse(query.forSample(sampleName), null); } @Test public void testSplitRegion() { - Region region = new Region("1", 1000, 16400000); - List split = SampleIndexDBAdaptor.splitRegion(region); + Region region = new Region("1", 11001000, 16400000); + List split = SampleIndexDBAdaptor.splitLocusQuery( + new LocusQuery( + SampleIndexSchema.getChunkRegion(region, 3000000), + Collections.singletonList(region), + Collections.emptyList())); // Check region is not modified - Assert.assertEquals("1:1000-16400000", region.toString()); + Assert.assertEquals("1:11001000-16400000", region.toString()); Assert.assertEquals(Arrays.asList( - new Region("1", 1000, 999999), - new Region("1", 1000000, 15999999), - new Region("1", 16000000, 16400000)), + new LocusQuery(new Region("1", 8000000, 12000000), Collections.singletonList(new Region("1", 11001000, 11999999)), Collections.emptyList()), + new LocusQuery(new Region("1", 12000000, 16000000), Collections.singletonList(new Region("1", 12000000, 15999999)), Collections.emptyList()), + new LocusQuery(new Region("1", 16000000, 17000000), Collections.singletonList(new Region("1", 16000000, 16400000)), Collections.emptyList())), split); - Assert.assertFalse(SampleIndexDBAdaptor.startsAtBatch(split.get(0))); - Assert.assertTrue(SampleIndexDBAdaptor.endsAtBatch(split.get(0))); - Assert.assertTrue(SampleIndexDBAdaptor.startsAtBatch(split.get(1))); - Assert.assertTrue(SampleIndexDBAdaptor.endsAtBatch(split.get(1))); - Assert.assertTrue(SampleIndexDBAdaptor.startsAtBatch(split.get(2))); - Assert.assertFalse(SampleIndexDBAdaptor.endsAtBatch(split.get(2))); + Assert.assertFalse(SampleIndexDBAdaptor.startsAtBatch(split.get(0).getRegions().get(0))); + Assert.assertTrue(SampleIndexDBAdaptor.endsAtBatch(split.get(0).getRegions().get(0))); + Assert.assertTrue(SampleIndexDBAdaptor.startsAtBatch(split.get(1).getRegions().get(0))); + Assert.assertTrue(SampleIndexDBAdaptor.endsAtBatch(split.get(1).getRegions().get(0))); + Assert.assertTrue(SampleIndexDBAdaptor.startsAtBatch(split.get(2).getRegions().get(0))); + Assert.assertFalse(SampleIndexDBAdaptor.endsAtBatch(split.get(2).getRegions().get(0))); - split = SampleIndexDBAdaptor.splitRegion(new Region("1", 1000000, 16400000)); + region = new Region("1", 1000000, 16400000); + split = SampleIndexDBAdaptor.splitLocusQuery(new LocusQuery(SampleIndexSchema.getChunkRegion(region, 0), Collections.singletonList(region), Collections.emptyList())); Assert.assertEquals(Arrays.asList( - new Region("1", 1000000, 15999999), - new Region("1", 16000000, 16400000)), + new LocusQuery(new Region("1", 1000000, 16000000), Collections.singletonList(new Region("1", 1000000, 15999999)), Collections.emptyList()), + new LocusQuery(new Region("1", 16000000, 17000000), Collections.singletonList(new Region("1", 16000000, 16400000)), Collections.emptyList())), split); - Assert.assertTrue(SampleIndexDBAdaptor.startsAtBatch(split.get(0))); - Assert.assertTrue(SampleIndexDBAdaptor.endsAtBatch(split.get(0))); - Assert.assertTrue(SampleIndexDBAdaptor.startsAtBatch(split.get(1))); - Assert.assertFalse(SampleIndexDBAdaptor.endsAtBatch(split.get(1))); + Assert.assertTrue(SampleIndexDBAdaptor.startsAtBatch(split.get(0).getRegions().get(0))); + Assert.assertTrue(SampleIndexDBAdaptor.endsAtBatch(split.get(0).getRegions().get(0))); + Assert.assertTrue(SampleIndexDBAdaptor.startsAtBatch(split.get(1).getRegions().get(0))); + Assert.assertFalse(SampleIndexDBAdaptor.endsAtBatch(split.get(1).getRegions().get(0))); } } \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryFilterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryFilterTest.java index 063727ddd84..f4989f9ff79 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryFilterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryFilterTest.java @@ -279,7 +279,7 @@ private SingleSampleIndexQuery getSingleSampleIndexQuery(SampleAnnotationIndexQu private SingleSampleIndexQuery getSingleSampleIndexQuery(SampleAnnotationIndexQuery annotationIndexQuery, Map> fileFilterMap) { return new SampleIndexQuery( - schema, Collections.emptyList(), null, "study", Collections.singletonMap("S1", Arrays.asList("0/1", "1/1")), Collections.emptySet(), null, Collections.emptyMap(), Collections.emptyMap(), fileFilterMap, annotationIndexQuery, Collections.emptySet(), null, false, VariantQueryUtils.QueryOperation.AND) + schema, Collections.emptyList(), 0, null, "study", Collections.singletonMap("S1", Arrays.asList("0/1", "1/1")), Collections.emptySet(), null, Collections.emptyMap(), Collections.emptyMap(), fileFilterMap, annotationIndexQuery, Collections.emptySet(), null, false, VariantQueryUtils.QueryOperation.AND) .forSample("S1"); } } \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java index 1f8763e0ccf..0eafe0ca875 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java @@ -1851,7 +1851,7 @@ public void testBuildLocusQueries() { new Region("6", 33_200_000, 34_800_000), new Region("8", 144_671_680, 144_690_000), new Region("6", 31_200_000, 31_800_000), - new Region("8", 145_100_000, 146_100_000)), Collections.emptyList()); + new Region("8", 145_100_000, 146_100_000)), Collections.emptyList(), 0); assertEquals(Arrays.asList( new LocusQuery(new Region("6", 31_000_000, 35_000_000), Arrays.asList(new Region("6", 31_200_000, 31_800_000),new Region("6", 33_200_000, 34_800_000)), Collections.emptyList()), // new LocusQuery(new Region("6", 31_000_000, 32_000_000), Collections.singletonList(new Region("6", 31_200_000, 31_800_000)), Collections.emptyList()), @@ -1866,7 +1866,7 @@ public void testBuildLocusQueries() { new Region("6", 31_200_000, 33_800_000), new Region("8", 144_671_680, 144_690_000), new Region("8", 144_700_000, 144_995_738), - new Region("8", 145_100_000, 146_100_000)), Collections.emptyList()); + new Region("8", 145_100_000, 146_100_000)), Collections.emptyList(), 0); assertEquals(Arrays.asList( new LocusQuery(new Region("6", 31_000_000, 35_000_000), Collections.singletonList(new Region("6", 31_200_000, 34_800_000)), Collections.emptyList()), new LocusQuery(new Region("8", 144_000_000, 147_000_000), Arrays.asList(new Region("8", 144_671_680, 144_690_000), @@ -1885,7 +1885,7 @@ public void testBuildLocusQueries() { new Variant("6:35001000:A:T"), new Variant("7:35001000:A:T"), new Variant("7:35002000:A:T") - )); + ), 0); assertEquals(Arrays.asList( new LocusQuery(new Region("6", 31_000_000, 36_000_000), Collections.singletonList(new Region("6", 31_200_000, 34_800_000)), Arrays.asList(new Variant("6:35001000:A:T"))), @@ -1895,5 +1895,38 @@ public void testBuildLocusQueries() { new Region("8", 144_700_000, 144_995_738), new Region("8", 145_100_000, 146_100_000)), Collections.emptyList()) ), queries); + + queries = buildLocusQueries( + Arrays.asList( + new Region("6", 31_200_000, 31_800_000), + new Region("6", 41_200_000, 43_800_000), + new Region("6", 35_200_000, 36_800_000), + new Region("8", 144_671_680, 144_690_000), + new Region("8", 144_700_000, 144_995_738), + new Region("8", 145_100_000, 146_100_000)), + Arrays.asList( + new Variant("6:35001000:A:T"), + new Variant("7:35001000:A:T"), + new Variant("7:35002000:A:T") + ), 2000000); + assertEquals(Arrays.asList( + new LocusQuery(new Region("6", 29_000_000, 44_000_000), + Arrays.asList( + new Region("6", 31_200_000, 31_800_000), + new Region("6", 35_200_000, 36_800_000), + new Region("6", 41_200_000, 43_800_000)), + Arrays.asList( + new Variant("6:35001000:A:T"))), + new LocusQuery(new Region("7", 35_000_000, 36_000_000), + Collections.emptyList(), + Arrays.asList( + new Variant("7:35001000:A:T"), + new Variant("7:35002000:A:T"))), + new LocusQuery(new Region("8", 142_000_000, 147_000_000), + Arrays.asList( + new Region("8", 144_671_680, 144_690_000), + new Region("8", 144_700_000, 144_995_738), + new Region("8", 145_100_000, 146_100_000)), Collections.emptyList()) + ), queries); } } \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java index 4ab4180d2ba..7693fc1a53e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java @@ -84,7 +84,8 @@ public class SampleIndexTest extends VariantStorageBaseTest implements HadoopVar private static boolean loaded = false; public static final String STUDY_NAME_3 = "study_3"; public static final String STUDY_NAME_4 = "study_4"; - private static final List studies = Arrays.asList(STUDY_NAME, STUDY_NAME_2, STUDY_NAME_3, STUDY_NAME_4); + public static final String STUDY_NAME_5 = "study_5"; + private static final List studies = Arrays.asList(STUDY_NAME, STUDY_NAME_2, STUDY_NAME_3, STUDY_NAME_4, STUDY_NAME_5); private static final Map> sampleNames = new HashMap>() {{ put(STUDY_NAME, Arrays.asList("NA19600", "NA19660", "NA19661", "NA19685")); put(STUDY_NAME_2, Arrays.asList("NA19600", "NA19660", "NA19661", "NA19685")); @@ -183,6 +184,14 @@ public void load() throws Exception { runETL(engine, getResourceUri("variant-test-dense.vcf.gz"), outputUri, params, true, true, true); engine.familyIndex(STUDY_NAME_4, trios, new ObjectMap()); + // Study 5, large SV + params = new ObjectMap() + .append(VariantStorageOptions.STUDY.key(), STUDY_NAME_5) + .append(VariantStorageOptions.ANNOTATE.key(), false) + .append(VariantStorageOptions.STATS_CALCULATE.key(), false); + runETL(engine, getResourceUri("variant-large-sv.vcf"), outputUri, params, true, true, true); + engine.familyIndex(STUDY_NAME_5, trios, new ObjectMap()); + // ---------------- Annotate // variantStorageEngine.getConfiguration().getCellbase().setUrl(ParamConstants.CELLBASE_URL); @@ -427,6 +436,36 @@ public void testMultiFileFilters() throws Exception { .append(SAMPLE_DATA.key(), "NA19600:DS=1.005")); } + @Test + public void testLocusQueryOverlap() throws Exception { + + VariantQuery query = new VariantQuery().study(STUDY_NAME_5).sample("NA19600"); +// System.out.println("query = " + query.toJson()); + List variants = sampleIndexDBAdaptor.iterator(new Query(query), new QueryOptions()) + .toDataResult().getResults(); + assertEquals(2, variants.size()); + + query.region("1:2000200-5500000"); +// System.out.println("query = " + query.toJson()); + variants = sampleIndexDBAdaptor.iterator(new Query(query), new QueryOptions()) + .toDataResult().getResults(); + assertEquals(2, variants.size()); + + query.region("1:200-2500000"); +// System.out.println("query = " + query.toJson()); + variants = sampleIndexDBAdaptor.iterator(new Query(query), new QueryOptions()) + .toDataResult().getResults(); + assertEquals(1, variants.size()); + assertEquals("1:1000001-4000000:-:", variants.get(0).toString()); + + query.region("1:2000200-2500000"); +// System.out.println("query = " + query.toJson()); + variants = sampleIndexDBAdaptor.iterator(new Query(query), new QueryOptions()) + .toDataResult().getResults(); + assertEquals(1, variants.size()); + assertEquals("1:1000001-4000000:-:", variants.get(0).toString()); + } + @Test public void testQueryAnnotationIndex() throws Exception { testQueryAnnotationIndex(new Query(ANNOT_PROTEIN_SUBSTITUTION.key(), "sift=tolerated")); @@ -1114,16 +1153,32 @@ private void testSampleIndexOnlyVariantQueryExecutor(VariantQuery query, QueryOp assertEquals( expectedVariants.stream().map(String::valueOf).collect(toList()), actualVariants.stream().map(String::valueOf).collect(toList())); - System.out.println("DBAdaptor " + expectedVariants.get(0).toJson()); - System.out.println("Actual " + actualVariants.get(0).toJson()); - assertEquals(expectedVariants, actualVariants); +// System.out.println("DBAdaptor " + expectedVariants.get(0).toJson()); +// System.out.println("Actual " + actualVariants.get(0).toJson()); + for (int i = 0; i < actualVariants.size(); i++) { + Variant av = actualVariants.get(i); + Variant ev = expectedVariants.get(i); + if (!ev.getStudies().isEmpty()) { + if (av.getLengthAlternate() == 0 || av.getLengthReference() == 0) { +// System.out.println("-------" + av + "----------"); +// System.out.println("DBAdaptor " + ev.toJson()); +// System.out.println("Actual " + av.toJson()); + assertNotNull(av.toString(), av.getStudies().get(0).getFiles().get(0).getCall()); + assertNotNull(ev.toString(), ev.getStudies().get(0).getFiles().get(0).getCall()); + ev.getStudies().get(0).getSamples().forEach(s -> s.setFileIndex(null)); + av.getStudies().get(0).getSamples().forEach(s -> s.setFileIndex(null)); + } + } + try { + assertEquals(ev, av); + } catch (AssertionError error) { + System.out.println("-------" + av + "----------"); + System.out.println("DBAdaptor " + ev.toJson()); + System.out.println("Actual " + av.toJson()); + throw error; + } + } +// assertEquals(expectedVariants, actualVariants); assertEquals(count, actualVariants.size()); } } - -/* - -10:101166544:C:T, 10:101473218:A:G, 10:104229785:C:T, 10:105194086:C:T, 10:11805354:A:G, 10:119043554:C:T, 10:121086097:A:T, 10:123976285:G:A, 10:129899922:T:C, 10:134013974:C:A, 10:135000148:T:C, 10:14941654:C:T, 10:21104613:T:C, 10:26463043:A:T, 10:27964470:G:A, 10:28345418:T:C, 10:33137551:T:C, 10:45953767:A:G, 10:46965018:C:G, 10:49659637:T:C, 10:5435918:G:A, 10:6063508:G:A, 10:61665886:C:A, 10:64573771:C:T, 10:69926319:C:A, 10:71027231:G:C, 10:73044580:G:A, 10:75871735:C:G, 10:7601810:TTTTG:-, 10:81065938:C:T, 10:85997105:G:T, 10:88696622:C:G, 10:88702350:G:C, 10:88730312:T:C, 10:92456132:T:C, 10:93841227:A:G, 10:95552653:T:C, 10:96163039:C:G, 10:99006083:G:A, 10:99130282:T:G, 11:103229027:T:C, 11:1087972:C:T, 11:111324266:T:A, 11:1158073:T:C, 11:117376413:G:A, 11:118529069:G:C, 11:121491782:G:A, 11:123777498:C:T, 11:124015994:C:T, 11:125707761:A:C, 11:1272559:C:T, 11:128781978:T:G, 11:134158745:A:G, 11:1502097:A:G, 11:18210580:C:T, 11:1858262:C:T, 11:233067:C:T, 11:284299:A:G, 11:2869188:C:T, 11:31703352:C:T, 11:36458997:A:G, 11:46745003:C:T, 11:4730985:C:T, 11:47469439:A:G, 11:4791111:T:C, 11:48285982:CTT:-, 11:4976554:A:G, 11:5079946:C:T, 11:51516000:C:T, 11:5248243:A:G, 11:5510598:A:T, 11:5510626:T:C, 11:551753:G:A, 11:55340379:T:C, 11:57982832:T:A, 11:5809548:G:A, 11:6007899:C:T, 11:60285575:A:G, 11:60698054:G:A, 11:60701987:G:A, 11:62010863:C:T, 11:6243804:A:G, 11:62847453:T:C, 11:62951221:C:G, 11:63991581:G:A, 11:64367862:T:C, 11:64591972:T:C, 11:64808682:C:-, 11:67258391:A:G, 11:6789929:C:A, 11:68030173:C:A, 11:68703959:A:G, 11:70279766:C:T, 11:7324475:T:C, 11:73681135:G:A, 11:77635882:A:C, 11:77920930:A:G, 11:870446:G:A, 11:87908448:A:G, 11:89224131:-:TC, 11:92088177:C:T, 11:93517874:C:G, 12:10206925:A:G, 12:104709559:C:T, 12:105568176:G:A, 12:10588530:C:G, 12:10958658:T:C, 12:109696838:G:A, 12:109937534:G:A, 12:110893682:C:A, 12:112580071:C:T, 12:113357209:G:A, 12:11420941:G:T, 12:114377885:G:C, 12:117693817:G:A, 12:118199286:C:G, 12:11905443:G:T, 12:122674758:G:A, 12:123345736:C:G, 12:123799974:A:G, 12:124325977:T:G, 12:124417889:C:T, 12:124968359:C:T, 12:129189702:C:G, 12:129566340:G:A, 12:13214537:A:G, 12:133202004:C:T, 12:133331459:G:C, 12:18443809:C:A, 12:19506870:C:G, 12:26217567:T:C, 12:27077409:G:A, 12:49390677:T:C, 12:50189807:C:A, 12:50190653:C:T, 12:50744119:G:A, 12:51237816:G:A, 12:52885350:T:C, 12:52886911:T:C, 12:52981512:G:A, 12:53217701:C:A, 12:55808469:C:G, 12:55820121:C:T, 12:56335107:A:G, 12:57109931:A:T, 12:57619362:G:A, 12:6091000:A:G, 12:63083521:G:A, 12:6424188:T:C, 12:7981462:T:A, 12:9083336:A:G, 12:93147907:A:T, 12:94976084:T:C, 12:97254620:G:A, 13:110436232:G:A, 13:111319754:T:C, 13:111368164:T:G, 13:113979969:-:CACA, 13:19999913:G:A, 13:23907677:C:T, 13:24243200:T:C, 13:25265139:T:C, 13:26043182:A:C, 13:28009031:G:C, 13:31495179:G:A, 13:31729729:A:G, 13:33590851:T:C, 13:47243196:C:G, 13:52660472:G:A, 13:52951802:T:C, 13:64417500:C:G, 13:95726541:A:G, 13:95863008:C:A, 14:100625902:C:T, 14:101200645:T:C, 14:101350721:T:C, 14:103986255:C:T, 14:105187469:G:A, 14:105344823:G:A, 14:105414252:C:T, 14:105419234:T:C, 14:20528362:A:G, 14:20586432:C:T, 14:21467913:T:G, 14:21511497:C:T, 14:22133997:A:G, 14:39648629:C:T, 14:51057727:G:A, 14:51368610:A:G, 14:52186966:G:A, 14:60574539:A:G, 14:69704553:G:T, 14:75513883:T:C, 14:77942316:G:A, 14:88477882:A:C, 14:90398907:G:A, 14:92088016:G:A, 14:95903306:GTA:-, 14:97002317:G:A, 15:100269796:A:G, 15:29415698:A:G, 15:34673722:C:T, 15:40914177:T:C, 15:41148199:C:T, 15:41149161:G:C, 15:41991315:A:T, 15:42139642:C:T, 15:51217361:T:C, 15:52353498:C:G, 15:52901977:G:A, 15:56959028:C:T, 15:63340647:A:G, 15:63433766:G:A, 15:65715171:G:A, 15:68596203:C:A, 15:68624290:G:A, 15:73994847:C:A, 15:78632830:C:G, 15:79058968:T:C, 15:80215597:G:T, 15:82443939:G:A, 15:84255758:T:C, 15:90126121:C:T, 15:90168693:T:A, 15:90226947:C:A, 15:90628591:G:A, 16:1291250:C:G, 16:16278863:G:T, 16:18872050:C:T, 16:19509305:C:G, 16:1961674:G:C, 16:20376755:T:C, 16:20648702:G:A, 16:23546561:G:C, 16:2815237:A:C, 16:29814234:G:A, 16:3085335:G:C, 16:31004169:T:C, 16:31091209:T:C, 16:3199713:C:T, 16:3297181:C:T, 16:33961918:G:T, 16:3639139:A:G, 16:427820:C:T, 16:4432029:A:C, 16:4938160:T:G, 16:50342658:C:T, 16:51173559:G:A, 16:5140541:G:C, 16:53636000:G:A, 16:68598007:A:G, 16:68712730:C:A, 16:69364437:G:A, 16:70161263:T:C, 16:71660310:G:A, 16:72110541:G:A, 16:76311603:-:T, 16:81211548:G:A, 16:83984844:A:C, 16:84229559:T:C, 16:84229580:C:T, 16:84516309:G:A, 16:84691433:C:T, 16:8738579:A:G, 16:88713236:A:G, 16:88724347:G:T, 16:88805183:G:A, 16:89590168:-:TA, 17:10544416:G:T, 17:11523082:A:G, 17:15341183:A:C, 17:17696531:G:C, 17:21203964:C:G, 17:21318770:G:A, 17:29161358:C:T, 17:30469423:C:A, 17:34328461:A:G, 17:3594277:G:-, 17:3628362:T:C, 17:36478450:G:T, 17:36963226:G:T, 17:38122686:G:A, 17:38955961:G:A, 17:3909383:G:C, 17:39135207:A:G, 17:39334133:T:C, 17:3947533:G:A, 17:39633349:G:C, 17:39661689:G:A, 17:39983849:G:C, 17:41891589:G:A, 17:4463713:A:G, 17:47210506:C:A, 17:47572518:C:T, 17:48452776:A:C, 17:4926882:A:G, 17:56232675:G:A, 17:56598439:T:C, 17:60503892:A:G, 17:6157:A:G, 17:62019103:G:T, 17:65104743:G:A, 17:6515454:C:T, 17:66416357:C:T, 17:6943266:G:A, 17:71390366:C:A, 17:72346855:T:C, 17:7293715:C:T, 17:73949555:C:T, 17:74287204:C:G, 17:74468111:G:A, 17:76230729:T:C, 17:76462770:G:A, 17:7681412:C:G, 17:79477830:C:T, 17:79478019:G:A, 17:8021608:G:C, 17:80391684:A:G, 17:8243661:G:A, 17:8416901:C:T, 18:11609978:C:T, 18:13056682:G:A, 18:13069782:C:T, 18:14752957:G:A, 18:166819:T:C, 18:21100240:C:T, 18:2707619:G:A, 18:28710615:C:A, 18:3164385:C:T, 18:42529996:G:C, 18:56205262:A:C, 18:60191428:G:A, 18:60237388:A:G, 18:61390361:T:C, 18:76753768:C:G, 18:77171061:T:G, 18:77894844:G:A, 18:8784612:A:G, 18:9887205:A:G, 19:10106787:G:A, 19:10221642:A:G, 19:1032689:G:T, 19:10450285:G:A, 19:1047002:A:G, 19:11465316:G:C, 19:12936617:C:G, 19:14495661:A:C, 19:14512489:G:A, 19:14580328:A:G, 19:14589378:C:T, 19:14817548:T:A, 19:14877102:C:T, 19:14910321:A:G, 19:14952017:T:A, 19:15905661:C:A, 19:17450016:T:C, 19:17648350:T:C, 19:18054110:G:A, 19:1811547:A:G, 19:18337260:G:A, 19:18562438:C:T, 19:19168542:A:G, 19:19655670:C:T, 19:1969882:G:T, 19:20229486:G:A, 19:20807047:A:G, 19:21477431:T:C, 19:21606429:G:T, 19:30199200:C:T, 19:33110204:T:C, 19:33792748:G:A, 19:35719106:A:G, 19:36940760:G:T, 19:36981137:A:-, 19:37488499:T:C, 19:39923952:G:A, 19:41018832:A:G, 19:41133643:C:T, 19:42085873:A:G, 19:42301763:A:C, 19:4251069:T:C, 19:4322990:G:A, 19:43243238:A:G, 19:4333711:C:T, 19:43585111:C:G, 19:43585325:G:A, 19:43922060:C:T, 19:43983740:T:C, 19:44470420:T:C, 19:44740602:G:A, 19:44934489:G:A, 19:4511278:C:T, 19:48622427:A:G, 19:4910889:G:T, 19:49513273:C:T, 19:49526191:G:A, 19:49869051:T:G, 19:49950298:C:T, 19:49954803:C:T, 19:501725:G:A, 19:501900:C:A, 19:50312653:C:T, 19:51330932:G:T, 19:51850290:G:A, 19:52004792:-:C, 19:52090100:T:C, 19:5212482:G:A, 19:52887247:-:CAA, 19:52942445:C:T, 19:53995004:G:A, 19:5455976:C:T, 19:54599222:G:T, 19:54677759:T:C, 19:55045042:G:A, 19:55526345:T:G, 19:55871019:A:G, 19:55993876:T:C, 19:56114237:G:A, 19:57036012:G:T, 19:57840547:A:G, 19:57931303:C:A, 19:58002964:G:C, 19:58565233:G:A, 19:58904396:G:A, 19:6731057:T:C, 19:7528734:A:G, 19:7571030:T:A, 19:7755056:G:A, 19:8564288:C:T, 19:8576670:C:T, 19:8645786:A:C, 19:9024994:C:T, 19:9065632:T:C, 19:9072742:T:C, 1:100203648:C:T, 1:100515497:T:C, 1:101704674:T:C, 1:104076462:C:T, 1:104116413:T:C, 1:107979396:A:C, 1:109735416:A:C, 1:116534852:C:T, 1:117487710:A:G, 1:11839971:C:T, 1:11848068:G:C, 1:120611554:T:C, 1:120611715:A:G, 1:13910417:C:T, 1:144917841:T:C, 1:146758054:G:A, 1:150526044:G:C, 1:150970577:G:T, 1:152079989:T:G, 1:152185864:G:A, 1:152192825:C:T, 1:152278689:C:A, 1:152325732:G:A, 1:152770533:G:A, 1:156760887:C:A, 1:158735691:T:C, 1:158813081:T:C, 1:15909850:C:G, 1:160648875:C:T, 1:16386447:G:C, 1:16451767:G:A, 1:1650807:T:C, 1:165389129:G:A, 1:167097739:C:A, 1:169391154:A:G, 1:17586134:A:G, 1:176992553:A:G, 1:17739586:G:A, 1:177902388:G:A, 1:179533915:G:A, 1:182850483:C:T, 1:183184616:C:T, 1:1849529:A:G, 1:18808526:A:C, 1:19175846:T:C, 1:19597420:G:T, 1:200880978:C:T, 1:202129826:T:C, 1:204402500:C:T, 1:208391085:G:A, 1:208391254:C:T, 1:209811886:T:G, 1:21050958:C:T, 1:212873074:C:T, 1:214820299:A:G, 1:215799210:A:G, 1:223717496:C:T, 1:223951857:G:A, 1:225266966:C:G, 1:226553676:C:T, 1:227171487:C:T, 1:228496014:G:A, 1:228504670:C:T, 1:228559994:C:T, 1:229622162:A:G, 1:229623338:T:C, 1:23111551:A:T, 1:234745009:A:G, 1:235652513:T:C, 1:237890437:C:T, 1:2441358:T:C, 1:245133550:GC:-, 1:245704130:G:C, 1:2458010:G:C, 1:247902448:G:A, 1:248436611:G:A, 1:248458876:T:-, 1:248487016:C:T, 1:248525060:T:A, 1:248737511:A:G, 1:25291010:A:T, 1:25890189:A:G, 1:27320356:T:C, 1:29189597:C:T, 1:29475648:T:G, 1:32148571:A:G, 1:33161212:T:C, 1:34330067:A:C, 1:40881041:C:G, 1:40980731:G:T, 1:41285553:T:C, 1:43296173:C:T, 1:47133811:T:C, 1:54605320:-:C, 1:55075062:C:T, 1:59125683:C:T, 1:60381646:C:T, 1:6162054:T:C, 1:62603421:C:T, 1:65860687:A:C, 1:67313249:G:A, 1:67558739:C:T, 1:67852335:G:A, 1:75672376:A:G, 1:76198785:G:A, 1:7797503:C:G, 1:7909737:C:T, 1:84880380:T:C, 1:84944989:A:G, 1:89426902:G:A, 1:89847411:C:T, 1:9009406:C:T, 1:90178336:G:A, 1:92445257:C:G, 20:1600524:T:C, 20:23017017:T:C, 20:25255338:C:T, 20:30795819:T:C, 20:31024207:C:T, 20:31044088:C:T, 20:31897554:G:C, 20:3209072:A:C, 20:36979265:T:C, 20:40714479:G:A, 20:4163302:A:G, 20:42815190:G:A, 20:43379268:C:A, 20:44680412:C:T, 20:44996182:A:G, 20:57045667:T:C, 20:57244493:G:A, 20:60293919:C:A, 20:60640306:GCCAGG:-, 20:61167883:G:A, 20:61881296:A:G, 20:62903550:A:G, 20:7980390:C:T, 21:15954528:G:A, 21:28216674:G:C, 21:33717877:G:A, 21:33956579:T:C, 21:34883618:T:C, 21:37420650:G:A, 21:38437917:A:C, 21:40191638:A:G, 21:43547788:T:C, 21:43824123:G:C, 21:45107518:A:G, 21:45959386:G:A, 21:46020527:C:T, 21:46271452:C:T, 21:47831866:G:A, 22:17469026:G:A, 22:18835221:A:G, 22:19753449:A:G, 22:20780030:-:C, 22:20800835:A:G, 22:21044998:C:T, 22:24109774:T:G, 22:24891380:C:T, 22:26862041:G:A, 22:29454778:G:A, 22:32334229:A:C, 22:36591380:A:G, 22:38369976:A:G, 22:38485540:A:G, 22:41548008:A:G, 22:42416056:A:G, 22:44681612:A:G, 22:45685002:A:G, 22:45813687:G:A, 22:50547247:A:G, 22:50962208:T:G, 2:101096960:C:T, 2:101594191:C:T, 2:107073469:T:A, 2:109381927:A:C, 2:112686988:G:A, 2:120199140:A:G, 2:129025758:C:A, 2:130897620:A:G, 2:136594158:G:A, 2:158636910:G:A, 2:159663599:T:C, 2:159954175:C:T, 2:160035207:T:C, 2:160738677:G:A, 2:160968628:A:G, 2:169985338:C:T, 2:170053505:C:T, 2:170218847:C:G, 2:170354138:G:A, 2:171822466:C:T, 2:172725301:A:G, 2:172945107:C:T, 2:174097106:A:G, 2:176945176:C:G, 2:17698678:A:G, 2:178417142:C:T, 2:179464527:T:C, 2:179578704:G:A, 2:185800905:A:T, 2:186673485:C:T, 2:189932831:T:C, 2:197004439:A:G, 2:203686202:T:C, 2:207041933:T:C, 2:216242917:T:A, 2:219602819:G:A, 2:220283277:T:C, 2:225719693:G:A, 2:228776996:T:C, 2:233712227:ACA:-, 2:233750074:C:T, 2:234707460:G:C, 2:238277795:A:G, 2:240982131:A:G, 2:240985099:G:A, 2:241404317:C:T, 2:241451351:G:A, 2:24149439:G:A, 2:24432839:A:G, 2:277003:A:G, 2:32713706:A:T, 2:32822957:G:A, 2:3392295:A:G, 2:38298139:T:C, 2:55096321:T:C, 2:61647901:A:G, 2:71190384:C:T, 2:71221822:A:G, 2:73339708:G:A, 2:75937801:C:T, 2:96626292:C:T, 2:97637905:T:C, 2:98274527:G:C, 2:99778985:T:C, 3:100712249:T:C, 3:100963154:G:A, 3:101283792:C:G, 3:108634973:C:A, 3:108719470:C:G, 3:111981878:T:C, 3:112993367:G:A, 3:121100283:G:A, 3:121263720:C:A, 3:121351338:C:T, 3:121416623:G:C, 3:123452838:G:A, 3:124379817:T:C, 3:125726048:G:C, 3:1262474:T:C, 3:128369596:A:C, 3:130368301:G:A, 3:13421150:C:T, 3:14183188:G:A, 3:146177815:C:T, 3:14755572:A:G, 3:148847613:G:T, 3:151090424:G:A, 3:154002714:A:G, 3:183823576:T:C, 3:183951431:C:T, 3:183975408:G:A, 3:184003317:C:T, 3:18427924:G:T, 3:190578566:A:G, 3:191093310:A:G, 3:194081635:T:C, 3:195510217:A:G, 3:195701388:G:A, 3:195938177:A:G, 3:196046830:A:G, 3:27472936:C:T, 3:38151731:T:C, 3:38798171:C:T, 3:49690199:G:A, 3:51990315:A:G, 3:52544470:A:G, 3:52825585:T:C, 3:56627598:A:G, 3:56716922:T:G, 3:66287056:G:A, 3:97365074:A:G, 3:97983257:C:G, 3:9798773:C:G, 3:98073313:A:G, 4:104004064:T:C, 4:106155185:C:G, 4:107168431:G:C, 4:109010342:G:A, 4:113352397:G:A, 4:123664204:G:A, 4:129043204:C:G, 4:1388583:A:G, 4:154479430:T:C, 4:155256177:A:G, 4:15569018:G:A, 4:164435265:A:C, 4:173852389:C:T, 4:189012728:G:C, 4:3519881:C:T, 4:39094738:G:A, 4:42003671:A:G, 4:4249884:G:T, 4:42895308:G:A, 4:46086060:T:C, 4:56262374:A:G, 4:5785442:G:A, 4:5991384:T:C, 4:5991476:G:A, 4:69095197:T:C, 4:69687987:C:A, 4:70160342:T:C, 4:7043945:G:T, 4:71469604:C:T, 4:75719517:A:C, 4:7717012:G:A, 4:80905990:C:G, 4:83838262:G:T, 4:84376743:A:T, 4:95578588:G:A, 5:10282396:A:G, 5:109181682:A:T, 5:111611076:A:G, 5:121488506:C:G, 5:122425832:G:T, 5:125802027:G:A, 5:127609633:G:A, 5:134782450:T:A, 5:135388663:A:G, 5:136961566:C:A, 5:13701525:T:C, 5:138861078:C:T, 5:140346468:T:A, 5:140531374:T:C, 5:140605162:C:G, 5:140772427:T:G, 5:141335284:G:A, 5:145508340:A:G, 5:148207447:G:C, 5:154271948:G:A, 5:159835658:A:G, 5:16794916:G:A, 5:169454941:C:G, 5:171723739:T:C, 5:175792605:G:C, 5:176863519:G:C, 5:177422908:A:G, 5:180472498:C:T, 5:32087253:A:G, 5:35861068:T:C, 5:40998196:T:C, 5:41158863:G:A, 5:42719239:A:C, 5:476353:C:T, 5:54253615:C:T, 5:54404015:C:T, 5:56526783:G:A, 5:57751443:A:G, 5:57753149:A:G, 5:73076511:C:A, 5:78340257:C:G, 5:82833391:A:G, 5:89943571:G:T, 5:89985882:A:G, 5:9190404:G:A, 5:95234392:A:C, 5:96237326:G:A, 6:107113715:G:A, 6:111696257:G:A, 6:117246719:C:T, 6:129691132:C:G, 6:133035098:G:A, 6:136683828:A:G, 6:146112348:T:C, 6:151669875:A:G, 6:152470752:C:A, 6:152489294:T:C, 6:155141313:C:T, 6:155597147:C:T, 6:160858188:G:A, 6:166720806:G:C, 6:166873010:C:T, 6:167790110:C:T, 6:170485571:T:C, 6:17665479:G:C, 6:26056549:A:G, 6:26104217:T:C, 6:26370605:T:C, 6:27279852:T:C, 6:29080450:G:A, 6:29911064:A:G, 6:30313268:G:A, 6:30893127:G:A, 6:31110391:G:C, 6:31324864:G:A, 6:31378977:G:A, 6:31540784:C:A, 6:31555657:A:G, 6:31839309:C:T, 6:32370908:T:A, 6:32489748:-:CC, 6:32551959:-:TT, 6:32609271:G:C, 6:32632714:G:C, 6:32802938:C:T, 6:32826233:A:G, 6:32974551:G:T, 6:33756532:G:A, 6:36198421:T:C, 6:36446975:G:C, 6:38746176:G:A, 6:4122249:C:A, 6:41773735:G:A, 6:43251912:A:G, 6:47649265:T:A, 6:51483961:T:C, 6:54186147:T:C, 6:56470690:G:A, 6:62390916:T:C, 6:65300143:G:C, 6:656555:G:T, 6:7246998:G:A, 6:72889472:A:G, 6:74354175:C:T, 6:79656570:G:A, 6:82461520:A:G, 6:83949261:T:C, 6:84799059:C:T, 6:90459454:G:A, 6:9900600:-:GAG, 7:100391581:T:C, 7:100807230:G:T, 7:102112980:G:A, 7:104110492:C:T, 7:106524689:C:T, 7:107834734:C:T, 7:117282644:A:G, 7:12417407:C:T, 7:134925411:G:A, 7:138732497:G:A, 7:150696111:T:G, 7:150935430:G:C, 7:154681216:G:A, 7:156742675:C:T, 7:20698270:A:G, 7:20778646:G:A, 7:2645526:G:A, 7:30915262:C:T, 7:31009576:G:T, 7:36366483:G:C, 7:37907304:T:C, 7:43664280:A:G, 7:44620836:C:A, 7:45124465:A:T, 7:47872845:A:G, 7:50435777:T:G, 7:5112057:C:G, 7:5518331:A:G, 7:55433884:A:C, 7:6026988:G:A, 7:63225873:C:T, 7:6550540:G:A, 7:66098384:G:A, 7:66703328:G:A, 7:75659815:T:C, 7:87160618:A:C, 7:91503228:C:T, 7:92098776:C:T, 7:97823125:G:A, 7:99580907:C:G, 8:104337096:A:G, 8:10480268:A:C, 8:110302047:T:G, 8:11996150:C:G, 8:124448804:T:A, 8:124665124:C:T, 8:12878807:T:G, 8:142367400:G:A, 8:142488837:G:A, 8:144332082:T:C, 8:144671685:G:C, 8:144697041:A:G, 8:144946252:C:T, 8:144995736:G:A, 8:144998514:C:T, 8:145693720:A:G, 8:146033347:T:C, 8:146115367:A:G, 8:146156247:C:A, 8:18257854:T:C, 8:2021421:G:T, 8:22864622:T:C, 8:23150878:T:G, 8:27634589:T:C, 8:27925796:A:T, 8:30585310:T:C, 8:30695226:C:T, 8:3200877:C:T, 8:41132742:A:T, 8:48173561:G:A, 8:57026229:C:A, 8:74005131:A:G, 8:74888616:G:C, 8:75157094:C:T, 8:75737733:A:G, 8:8234192:G:C, 8:977600:C:T, 9:103064530:G:A, 9:107361439:G:C, 9:112069477:T:A, 9:113169630:T:C, 9:115968797:C:T, 9:116028559:C:A, 9:117835931:G:A, 9:125637471:A:T, 9:125920376:G:A, 9:126520068:T:C, 9:127220952:T:C, 9:131403096:A:G, 9:131689361:G:A, 9:132382596:C:A, 9:132591509:A:G, 9:133710820:-:C, 9:133761001:A:G, 9:133951230:C:T, 9:135139901:T:C, 9:136340200:T:G, 9:138591266:A:G, 9:139100805:T:C, 9:139273288:C:T, 9:139391636:G:A, 9:139413908:C:T, 9:139650678:A:G, 9:139656670:T:C, 9:139937795:T:C, 9:139990813:C:T, 9:14775859:G:A, 9:17466802:A:G, 9:18681821:A:G, 9:19058483:C:A, 9:27524731:A:G, 9:33935736:A:G, 9:34379692:C:T, 9:35606884:G:A, 9:35870001:T:C, 9:37441650:T:C, 9:429719:T:C, 9:90343780:A:C, 9:91978397:C:T, X:107976940:G:C, X:11316892:C:T, X:117700141:A:G, X:13677862:G:A, X:153151285:T:-, X:2408437:G:A, X:295231:A:G, X:3241791:G:A, X:45051111:C:T, X:48460314:A:G, X:70146475:G:C -1:1650807:T:C, 1:1849529:A:G, 1:2441358:T:C, 1:2458010:G:C, 1:6162054:T:C, 1:7797503:C:G, 1:7909737:C:T, 1:9009406:C:T, 1:11839971:C:T, 1:11848068:G:C, 1:13910417:C:T, 1:15909850:C:G, 1:16386447:G:C, 1:16451767:G:A, 1:17586134:A:G, 1:17739586:G:A, 1:18808526:A:C, 1:19175846:T:C, 1:19597420:G:T, 1:21050958:C:T, 1:23111551:A:T, 1:25291010:A:T, 1:25890189:A:G, 1:27320356:T:C, 1:29189597:C:T, 1:29475648:T:G, 1:32148571:A:G, 1:33161212:T:C, 1:34330067:A:C, 1:40881041:C:G, 1:40980731:G:T, 1:41285553:T:C, 1:43296173:C:T, 1:47133811:T:C, 1:54605320:-:C, 1:55075062:C:T, 1:59125683:C:T, 1:60381646:C:T, 1:62603421:C:T, 1:65860687:A:C, 1:67313249:G:A, 1:67558739:C:T, 1:67852335:G:A, 1:75672376:A:G, 1:76198785:G:A, 1:84880380:T:C, 1:84944989:A:G, 1:89426902:G:A, 1:89847411:C:T, 1:90178336:G:A, 1:92445257:C:G, 1:100203648:C:T, 1:100515497:T:C, 1:101704674:T:C, 1:104076462:C:T, 1:104116413:T:C, 1:107979396:A:C, 1:109735416:A:C, 1:116534852:C:T, 1:117487710:A:G, 1:120611554:T:C, 1:120611715:A:G, 1:144917841:T:C, 1:146758054:G:A, 1:150526044:G:C, 1:150970577:G:T, 1:152079989:T:G, 1:152185864:G:A, 1:152192825:C:T, 1:152278689:C:A, 1:152325732:G:A, 1:152770533:G:A, 1:156760887:C:A, 1:158735691:T:C, 1:158813081:T:C, 1:160648875:C:T, 1:165389129:G:A, 1:167097739:C:A, 1:169391154:A:G, 1:176992553:A:G, 1:177902388:G:A, 1:179533915:G:A, 1:182850483:C:T, 1:183184616:C:T, 1:200880978:C:T, 1:202129826:T:C, 1:204402500:C:T, 1:208391085:G:A, 1:208391254:C:T, 1:209811886:T:G, 1:212873074:C:T, 1:214820299:A:G, 1:215799210:A:G, 1:223717496:C:T, 1:223951857:G:A, 1:225266966:C:G, 1:226553676:C:T, 1:227171487:C:T, 1:228496014:G:A, 1:228504670:C:T, 1:228559994:C:T, 1:229622162:A:G, 1:229623338:T:C, 1:234745009:A:G, 1:235652513:T:C, 1:237890437:C:T, 1:245133550:GC:-, 1:245704130:G:C, 1:247902448:G:A, 1:248436611:G:A, 1:248458876:T:-, 1:248487016:C:T, 1:248525060:T:A, 1:248737511:A:G, 10:5435918:G:A, 10:6063508:G:A, 10:7601810:TTTTG:-, 10:11805354:A:G, 10:14941654:C:T, 10:21104613:T:C, 10:26463043:A:T, 10:27964470:G:A, 10:28345418:T:C, 10:33137551:T:C, 10:45953767:A:G, 10:46965018:C:G, 10:49659637:T:C, 10:61665886:C:A, 10:64573771:C:T, 10:69926319:C:A, 10:71027231:G:C, 10:73044580:G:A, 10:75871735:C:G, 10:81065938:C:T, 10:85997105:G:T, 10:88696622:C:G, 10:88702350:G:C, 10:88730312:T:C, 10:92456132:T:C, 10:93841227:A:G, 10:95552653:T:C, 10:96163039:C:G, 10:99006083:G:A, 10:99130282:T:G, 10:101166544:C:T, 10:101473218:A:G, 10:104229785:C:T, 10:105194086:C:T, 10:119043554:C:T, 10:121086097:A:T, 10:123976285:G:A, 10:129899922:T:C, 10:134013974:C:A, 10:135000148:T:C, 11:233067:C:T, 11:284299:A:G, 11:551753:G:A, 11:870446:G:A, 11:1087972:C:T, 11:1158073:T:C, 11:1272559:C:T, 11:1502097:A:G, 11:1858262:C:T, 11:2869188:C:T, 11:4730985:C:T, 11:4791111:T:C, 11:4976554:A:G, 11:5079946:C:T, 11:5248243:A:G, 11:5510598:A:T, 11:5510626:T:C, 11:5809548:G:A, 11:6007899:C:T, 11:6243804:A:G, 11:6789929:C:A, 11:7324475:T:C, 11:18210580:C:T, 11:31703352:C:T, 11:36458997:A:G, 11:46745003:C:T, 11:47469439:A:G, 11:48285982:CTT:-, 11:51516000:C:T, 11:55340379:T:C, 11:57982832:T:A, 11:60285575:A:G, 11:60698054:G:A, 11:60701987:G:A, 11:62010863:C:T, 11:62847453:T:C, 11:62951221:C:G, 11:63991581:G:A, 11:64367862:T:C, 11:64591972:T:C, 11:64808682:C:-, 11:67258391:A:G, 11:68030173:C:A, 11:68703959:A:G, 11:70279766:C:T, 11:73681135:G:A, 11:77635882:A:C, 11:77920930:A:G, 11:87908448:A:G, 11:89224131:-:TC, 11:92088177:C:T, 11:93517874:C:G, 11:103229027:T:C, 11:111324266:T:A, 11:117376413:G:A, 11:118529069:G:C, 11:121491782:G:A, 11:123777498:C:T, 11:124015994:C:T, 11:125707761:A:C, 11:128781978:T:G, 11:134158745:A:G, 12:6091000:A:G, 12:6424188:T:C, 12:7981462:T:A, 12:9083336:A:G, 12:10206925:A:G, 12:10588530:C:G, 12:10958658:T:C, 12:11420941:G:T, 12:11905443:G:T, 12:13214537:A:G, 12:18443809:C:A, 12:19506870:C:G, 12:26217567:T:C, 12:27077409:G:A, 12:49390677:T:C, 12:50189807:C:A, 12:50190653:C:T, 12:50744119:G:A, 12:51237816:G:A, 12:52885350:T:C, 12:52886911:T:C, 12:52981512:G:A, 12:53217701:C:A, 12:55808469:C:G, 12:55820121:C:T, 12:56335107:A:G, 12:57109931:A:T, 12:57619362:G:A, 12:63083521:G:A, 12:93147907:A:T, 12:94976084:T:C, 12:97254620:G:A, 12:104709559:C:T, 12:105568176:G:A, 12:109696838:G:A, 12:109937534:G:A, 12:110893682:C:A, 12:112580071:C:T, 12:113357209:G:A, 12:114377885:G:C, 12:117693817:G:A, 12:118199286:C:G, 12:122674758:G:A, 12:123345736:C:G, 12:123799974:A:G, 12:124325977:T:G, 12:124417889:C:T, 12:124968359:C:T, 12:129189702:C:G, 12:129566340:G:A, 12:133202004:C:T, 12:133331459:G:C, 13:19999913:G:A, 13:23907677:C:T, 13:24243200:T:C, 13:25265139:T:C, 13:26043182:A:C, 13:28009031:G:C, 13:31495179:G:A, 13:31729729:A:G, 13:33590851:T:C, 13:47243196:C:G, 13:52660472:G:A, 13:52951802:T:C, 13:64417500:C:G, 13:95726541:A:G, 13:95863008:C:A, 13:110436232:G:A, 13:111319754:T:C, 13:111368164:T:G, 13:113979969:-:CACA, 14:20528362:A:G, 14:20586432:C:T, 14:21467913:T:G, 14:21511497:C:T, 14:22133997:A:G, 14:39648629:C:T, 14:51057727:G:A, 14:51368610:A:G, 14:52186966:G:A, 14:60574539:A:G, 14:69704553:G:T, 14:75513883:T:C, 14:77942316:G:A, 14:88477882:A:C, 14:90398907:G:A, 14:92088016:G:A, 14:95903306:GTA:-, 14:97002317:G:A, 14:100625902:C:T, 14:101200645:T:C, 14:101350721:T:C, 14:103986255:C:T, 14:105187469:G:A, 14:105344823:G:A, 14:105414252:C:T, 14:105419234:T:C, 15:29415698:A:G, 15:34673722:C:T, 15:40914177:T:C, 15:41148199:C:T, 15:41149161:G:C, 15:41991315:A:T, 15:42139642:C:T, 15:51217361:T:C, 15:52353498:C:G, 15:52901977:G:A, 15:56959028:C:T, 15:63340647:A:G, 15:63433766:G:A, 15:65715171:G:A, 15:68596203:C:A, 15:68624290:G:A, 15:73994847:C:A, 15:78632830:C:G, 15:79058968:T:C, 15:80215597:G:T, 15:82443939:G:A, 15:84255758:T:C, 15:90126121:C:T, 15:90168693:T:A, 15:90226947:C:A, 15:90628591:G:A, 15:100269796:A:G, 16:427820:C:T, 16:1291250:C:G, 16:1961674:G:C, 16:2815237:A:C, 16:3085335:G:C, 16:3199713:C:T, 16:3297181:C:T, 16:3639139:A:G, 16:4432029:A:C, 16:4938160:T:G, 16:5140541:G:C, 16:8738579:A:G, 16:16278863:G:T, 16:18872050:C:T, 16:19509305:C:G, 16:20376755:T:C, 16:20648702:G:A, 16:23546561:G:C, 16:29814234:G:A, 16:31004169:T:C, 16:31091209:T:C, 16:33961918:G:T, 16:50342658:C:T, 16:51173559:G:A, 16:53636000:G:A, 16:68598007:A:G, 16:68712730:C:A, 16:69364437:G:A, 16:70161263:T:C, 16:71660310:G:A, 16:72110541:G:A, 16:76311603:-:T, 16:81211548:G:A, 16:83984844:A:C, 16:84229559:T:C, 16:84229580:C:T, 16:84516309:G:A, 16:84691433:C:T, 16:88713236:A:G, 16:88724347:G:T, 16:88805183:G:A, 16:89590168:-:TA, 17:6157:A:G, 17:3594277:G:-, 17:3628362:T:C, 17:3909383:G:C, 17:3947533:G:A, 17:4463713:A:G, 17:4926882:A:G, 17:6515454:C:T, 17:6943266:G:A, 17:7293715:C:T, 17:7681412:C:G, 17:8021608:G:C, 17:8243661:G:A, 17:8416901:C:T, 17:10544416:G:T, 17:11523082:A:G, 17:15341183:A:C, 17:17696531:G:C, 17:21203964:C:G, 17:21318770:G:A, 17:29161358:C:T, 17:30469423:C:A, 17:34328461:A:G, 17:36478450:G:T, 17:36963226:G:T, 17:38122686:G:A, 17:38955961:G:A, 17:39135207:A:G, 17:39334133:T:C, 17:39633349:G:C, 17:39661689:G:A, 17:39983849:G:C, 17:41891589:G:A, 17:47210506:C:A, 17:47572518:C:T, 17:48452776:A:C, 17:56232675:G:A, 17:56598439:T:C, 17:60503892:A:G, 17:62019103:G:T, 17:65104743:G:A, 17:66416357:C:T, 17:71390366:C:A, 17:72346855:T:C, 17:73949555:C:T, 17:74287204:C:G, 17:74468111:G:A, 17:76230729:T:C, 17:76462770:G:A, 17:79477830:C:T, 17:79478019:G:A, 17:80391684:A:G, 18:166819:T:C, 18:2707619:G:A, 18:3164385:C:T, 18:8784612:A:G, 18:9887205:A:G, 18:11609978:C:T, 18:13056682:G:A, 18:13069782:C:T, 18:14752957:G:A, 18:21100240:C:T, 18:28710615:C:A, 18:42529996:G:C, 18:56205262:A:C, 18:60191428:G:A, 18:60237388:A:G, 18:61390361:T:C, 18:76753768:C:G, 18:77171061:T:G, 18:77894844:G:A, 19:501725:G:A, 19:501900:C:A, 19:1032689:G:T, 19:1047002:A:G, 19:1811547:A:G, 19:1969882:G:T, 19:4251069:T:C, 19:4322990:G:A, 19:4333711:C:T, 19:4511278:C:T, 19:4910889:G:T, 19:5212482:G:A, 19:5455976:C:T, 19:6731057:T:C, 19:7528734:A:G, 19:7571030:T:A, 19:7755056:G:A, 19:8564288:C:T, 19:8576670:C:T, 19:8645786:A:C, 19:9024994:C:T, 19:9065632:T:C, 19:9072742:T:C, 19:10106787:G:A, 19:10221642:A:G, 19:10450285:G:A, 19:11465316:G:C, 19:12936617:C:G, 19:14495661:A:C, 19:14512489:G:A, 19:14580328:A:G, 19:14589378:C:T, 19:14817548:T:A, 19:14877102:C:T, 19:14910321:A:G, 19:14952017:T:A, 19:15905661:C:A, 19:17450016:T:C, 19:17648350:T:C, 19:18054110:G:A, 19:18337260:G:A, 19:18562438:C:T, 19:19168542:A:G, 19:19655670:C:T, 19:20229486:G:A, 19:20807047:A:G, 19:21477431:T:C, 19:21606429:G:T, 19:30199200:C:T, 19:33110204:T:C, 19:33792748:G:A, 19:35719106:A:G, 19:36940760:G:T, 19:36981137:A:-, 19:37488499:T:C, 19:39923952:G:A, 19:41018832:A:G, 19:41133643:C:T, 19:42085873:A:G, 19:42301763:A:C, 19:43243238:A:G, 19:43585111:C:G, 19:43585325:G:A, 19:43922060:C:T, 19:43983740:T:C, 19:44470420:T:C, 19:44740602:G:A, 19:44934489:G:A, 19:48622427:A:G, 19:49513273:C:T, 19:49526191:G:A, 19:49869051:T:G, 19:49950298:C:T, 19:49954803:C:T, 19:50312653:C:T, 19:51330932:G:T, 19:51850290:G:A, 19:52004792:-:C, 19:52090100:T:C, 19:52887247:-:CAA, 19:52942445:C:T, 19:53995004:G:A, 19:54599222:G:T, 19:54677759:T:C, 19:55045042:G:A, 19:55526345:T:G, 19:55871019:A:G, 19:55993876:T:C, 19:56114237:G:A, 19:57036012:G:T, 19:57840547:A:G, 19:57931303:C:A, 19:58002964:G:C, 19:58565233:G:A, 19:58904396:G:A, 2:277003:A:G, 2:3392295:A:G, 2:17698678:A:G, 2:24149439:G:A, 2:24432839:A:G, 2:32713706:A:T, 2:32822957:G:A, 2:38298139:T:C, 2:55096321:T:C, 2:61647901:A:G, 2:71190384:C:T, 2:71221822:A:G, 2:73339708:G:A, 2:75937801:C:T, 2:96626292:C:T, 2:97637905:T:C, 2:98274527:G:C, 2:99778985:T:C, 2:101096960:C:T, 2:101594191:C:T, 2:107073469:T:A, 2:109381927:A:C, 2:112686988:G:A, 2:120199140:A:G, 2:129025758:C:A, 2:130897620:A:G, 2:136594158:G:A, 2:158636910:G:A, 2:159663599:T:C, 2:159954175:C:T, 2:160035207:T:C, 2:160738677:G:A, 2:160968628:A:G, 2:169985338:C:T, 2:170053505:C:T, 2:170218847:C:G, 2:170354138:G:A, 2:171822466:C:T, 2:172725301:A:G, 2:172945107:C:T, 2:174097106:A:G, 2:176945176:C:G, 2:178417142:C:T, 2:179464527:T:C, 2:179578704:G:A, 2:185800905:A:T, 2:186673485:C:T, 2:189932831:T:C, 2:197004439:A:G, 2:203686202:T:C, 2:207041933:T:C, 2:216242917:T:A, 2:219602819:G:A, 2:220283277:T:C, 2:225719693:G:A, 2:228776996:T:C, 2:233712227:ACA:-, 2:233750074:C:T, 2:234707460:G:C, 2:238277795:A:G, 2:240982131:A:G, 2:240985099:G:A, 2:241404317:C:T, 2:241451351:G:A, 20:1600524:T:C, 20:3209072:A:C, 20:4163302:A:G, 20:7980390:C:T, 20:23017017:T:C, 20:25255338:C:T, 20:30795819:T:C, 20:31024207:C:T, 20:31044088:C:T, 20:31897554:G:C, 20:36979265:T:C, 20:40714479:G:A, 20:42815190:G:A, 20:43379268:C:A, 20:44680412:C:T, 20:44996182:A:G, 20:57045667:T:C, 20:57244493:G:A, 20:60293919:C:A, 20:60640306:GCCAGG:-, 20:61167883:G:A, 20:61881296:A:G, 20:62903550:A:G, 21:15954528:G:A, 21:28216674:G:C, 21:33717877:G:A, 21:33956579:T:C, 21:34883618:T:C, 21:37420650:G:A, 21:38437917:A:C, 21:40191638:A:G, 21:43547788:T:C, 21:43824123:G:C, 21:45107518:A:G, 21:45959386:G:A, 21:46020527:C:T, 21:46271452:C:T, 21:47831866:G:A, 22:17469026:G:A, 22:18835221:A:G, 22:19753449:A:G, 22:20780030:-:C, 22:20800835:A:G, 22:21044998:C:T, 22:24109774:T:G, 22:24891380:C:T, 22:26862041:G:A, 22:29454778:G:A, 22:32334229:A:C, 22:36591380:A:G, 22:38369976:A:G, 22:38485540:A:G, 22:41548008:A:G, 22:42416056:A:G, 22:44681612:A:G, 22:45685002:A:G, 22:45813687:G:A, 22:50547247:A:G, 22:50962208:T:G, 3:1262474:T:C, 3:9798773:C:G, 3:13421150:C:T, 3:14183188:G:A, 3:14755572:A:G, 3:18427924:G:T, 3:27472936:C:T, 3:38151731:T:C, 3:38798171:C:T, 3:49690199:G:A, 3:51990315:A:G, 3:52544470:A:G, 3:52825585:T:C, 3:56627598:A:G, 3:56716922:T:G, 3:66287056:G:A, 3:97365074:A:G, 3:97983257:C:G, 3:98073313:A:G, 3:100712249:T:C, 3:100963154:G:A, 3:101283792:C:G, 3:108634973:C:A, 3:108719470:C:G, 3:111981878:T:C, 3:112993367:G:A, 3:121100283:G:A, 3:121263720:C:A, 3:121351338:C:T, 3:121416623:G:C, 3:123452838:G:A, 3:124379817:T:C, 3:125726048:G:C, 3:128369596:A:C, 3:130368301:G:A, 3:146177815:C:T, 3:148847613:G:T, 3:151090424:G:A, 3:154002714:A:G, 3:183823576:T:C, 3:183951431:C:T, 3:183975408:G:A, 3:184003317:C:T, 3:190578566:A:G, 3:191093310:A:G, 3:194081635:T:C, 3:195510217:A:G, 3:195701388:G:A, 3:195938177:A:G, 3:196046830:A:G, 4:1388583:A:G, 4:3519881:C:T, 4:4249884:G:T, 4:5785442:G:A, 4:5991384:T:C, 4:5991476:G:A, 4:7043945:G:T, 4:7717012:G:A, 4:15569018:G:A, 4:39094738:G:A, 4:42003671:A:G, 4:42895308:G:A, 4:46086060:T:C, 4:56262374:A:G, 4:69095197:T:C, 4:69687987:C:A, 4:70160342:T:C, 4:71469604:C:T, 4:75719517:A:C, 4:80905990:C:G, 4:83838262:G:T, 4:84376743:A:T, 4:95578588:G:A, 4:104004064:T:C, 4:106155185:C:G, 4:107168431:G:C, 4:109010342:G:A, 4:113352397:G:A, 4:123664204:G:A, 4:129043204:C:G, 4:154479430:T:C, 4:155256177:A:G, 4:164435265:A:C, 4:173852389:C:T, 4:189012728:G:C, 5:476353:C:T, 5:9190404:G:A, 5:10282396:A:G, 5:13701525:T:C, 5:16794916:G:A, 5:32087253:A:G, 5:35861068:T:C, 5:40998196:T:C, 5:41158863:G:A, 5:42719239:A:C, 5:54253615:C:T, 5:54404015:C:T, 5:56526783:G:A, 5:57751443:A:G, 5:57753149:A:G, 5:73076511:C:A, 5:78340257:C:G, 5:82833391:A:G, 5:89943571:G:T, 5:89985882:A:G, 5:95234392:A:C, 5:96237326:G:A, 5:109181682:A:T, 5:111611076:A:G, 5:121488506:C:G, 5:122425832:G:T, 5:125802027:G:A, 5:127609633:G:A, 5:134782450:T:A, 5:135388663:A:G, 5:136961566:C:A, 5:138861078:C:T, 5:140346468:T:A, 5:140531374:T:C, 5:140605162:C:G, 5:140772427:T:G, 5:141335284:G:A, 5:145508340:A:G, 5:148207447:G:C, 5:154271948:G:A, 5:159835658:A:G, 5:169454941:C:G, 5:171723739:T:C, 5:175792605:G:C, 5:176863519:G:C, 5:177422908:A:G, 5:180472498:C:T, 6:656555:G:T, 6:4122249:C:A, 6:7246998:G:A, 6:9900600:-:GAG, 6:17665479:G:C, 6:26056549:A:G, 6:26104217:T:C, 6:26370605:T:C, 6:27279852:T:C, 6:29080450:G:A, 6:29911064:A:G, 6:30313268:G:A, 6:30893127:G:A, 6:31110391:G:C, 6:31324864:G:A, 6:31378977:G:A, 6:31540784:C:A, 6:31555657:A:G, 6:31839309:C:T, 6:32370908:T:A, 6:32489748:-:CC, 6:32551959:-:TT, 6:32609271:G:C, 6:32632714:G:C, 6:32802938:C:T, 6:32826233:A:G, 6:32974551:G:T, 6:33756532:G:A, 6:36198421:T:C, 6:36446975:G:C, 6:38746176:G:A, 6:41773735:G:A, 6:43251912:A:G, 6:47649265:T:A, 6:51483961:T:C, 6:54186147:T:C, 6:56470690:G:A, 6:62390916:T:C, 6:65300143:G:C, 6:72889472:A:G, 6:74354175:C:T, 6:79656570:G:A, 6:82461520:A:G, 6:83949261:T:C, 6:84799059:C:T, 6:90459454:G:A, 6:107113715:G:A, 6:111696257:G:A, 6:117246719:C:T, 6:129691132:C:G, 6:133035098:G:A, 6:136683828:A:G, 6:146112348:T:C, 6:151669875:A:G, 6:152470752:C:A, 6:152489294:T:C, 6:155141313:C:T, 6:155597147:C:T, 6:160858188:G:A, 6:166720806:G:C, 6:166873010:C:T, 6:167790110:C:T, 6:170485571:T:C, 7:2645526:G:A, 7:5112057:C:G, 7:5518331:A:G, 7:6026988:G:A, 7:6550540:G:A, 7:12417407:C:T, 7:20698270:A:G, 7:20778646:G:A, 7:30915262:C:T, 7:31009576:G:T, 7:36366483:G:C, 7:37907304:T:C, 7:43664280:A:G, 7:44620836:C:A, 7:45124465:A:T, 7:47872845:A:G, 7:50435777:T:G, 7:55433884:A:C, 7:63225873:C:T, 7:66098384:G:A, 7:66703328:G:A, 7:75659815:T:C, 7:87160618:A:C, 7:91503228:C:T, 7:92098776:C:T, 7:97823125:G:A, 7:99580907:C:G, 7:100391581:T:C, 7:100807230:G:T, 7:102112980:G:A, 7:104110492:C:T, 7:106524689:C:T, 7:107834734:C:T, 7:117282644:A:G, 7:134925411:G:A, 7:138732497:G:A, 7:150696111:T:G, 7:150935430:G:C, 7:154681216:G:A, 7:156742675:C:T, 8:977600:C:T, 8:2021421:G:T, 8:3200877:C:T, 8:8234192:G:C, 8:10480268:A:C, 8:11996150:C:G, 8:12878807:T:G, 8:18257854:T:C, 8:22864622:T:C, 8:23150878:T:G, 8:27634589:T:C, 8:27925796:A:T, 8:30585310:T:C, 8:30695226:C:T, 8:41132742:A:T, 8:48173561:G:A, 8:57026229:C:A, 8:74005131:A:G, 8:74888616:G:C, 8:75157094:C:T, 8:75737733:A:G, 8:104337096:A:G, 8:110302047:T:G, 8:124448804:T:A, 8:124665124:C:T, 8:142367400:G:A, 8:142488837:G:A, 8:144332082:T:C, 8:144671685:G:C, 8:144697041:A:G, 8:144946252:C:T, 8:144995736:G:A, 8:144998514:C:T, 8:145693720:A:G, 8:146033347:T:C, 8:146115367:A:G, 8:146156247:C:A, 9:429719:T:C, 9:14775859:G:A, 9:17466802:A:G, 9:18681821:A:G, 9:19058483:C:A, 9:27524731:A:G, 9:33935736:A:G, 9:34379692:C:T, 9:35606884:G:A, 9:35870001:T:C, 9:37441650:T:C, 9:90343780:A:C, 9:91978397:C:T, 9:103064530:G:A, 9:107361439:G:C, 9:112069477:T:A, 9:113169630:T:C, 9:115968797:C:T, 9:116028559:C:A, 9:117835931:G:A, 9:125637471:A:T, 9:125920376:G:A, 9:126520068:T:C, 9:127220952:T:C, 9:131403096:A:G, 9:131689361:G:A, 9:132382596:C:A, 9:132591509:A:G, 9:133710820:-:C, 9:133761001:A:G, 9:133951230:C:T, 9:135139901:T:C, 9:136340200:T:G, 9:138591266:A:G, 9:139100805:T:C, 9:139273288:C:T, 9:139391636:G:A, 9:139413908:C:T, 9:139650678:A:G, 9:139656670:T:C, 9:139937795:T:C, 9:139990813:C:T, X:295231:A:G, X:2408437:G:A, X:3241791:G:A, X:11316892:C:T, X:13677862:G:A, X:45051111:C:T, X:48460314:A:G, X:70146475:G:C, X:107976940:G:C, X:117700141:A:G, X:153151285:T:- - - */ \ No newline at end of file diff --git a/pom.xml b/pom.xml index 061edc3cead..c0b9f02fd71 100644 --- a/pom.xml +++ b/pom.xml @@ -25,7 +25,7 @@ 2.11.0-SNAPSHOT pom OpenCGA - OenCGA projects implements a big data platform for genomic data analysis + OpenCGA projects implements a big data platform for genomic data analysis http://docs.opencb.org/display/opencga/ @@ -42,12 +42,13 @@ - 2.9.2 - 2.9.2 - 5.5.0 - 2.9.0 - 4.9.0 - 2.9.2 + 2.11.0_dev + 2.11.0_dev + 5.7.0-SNAPSHOT + 2.11.0-SNAPSHOT + 4.11.0-SNAPSHOT + 2.11.0-SNAPSHOT + 0.2.0 2.11.4