Skip to content

Commit

Permalink
lib: add splice builder to compute splice scores based on SpliceAI, #558
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Jul 12, 2021
1 parent 28364b2 commit 617e64c
Show file tree
Hide file tree
Showing 7 changed files with 716 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public class BuildCommandOptions {

@Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to build: genome, genome_info, "
+ "gene, variation, variation_functional_score, regulation, protein, ppi, conservation, drug, "
+ "clinical_variants, repeats, svs. 'all' builds everything.", required = true, arity = 1)
+ "clinical_variants, repeats, svs, splice. 'all' builds everything.", required = true, arity = 1)
public String data;

@Parameter(names = {"-s", "--species"}, description = "Name of the species to be built, valid formats include 'Homo sapiens' or 'hsapiens'", required = false, arity = 1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ public void execute() {
case EtlCommons.OBO_DATA:
parser = buildObo();
break;
case EtlCommons.SPLICE_DATA:
parser = buildSplice();
break;
default:
logger.error("Build option '" + buildCommandOptions.data + "' is not valid");
break;
Expand Down Expand Up @@ -371,4 +374,12 @@ private Path getFastaReferenceGenome() {
}
return fastaFile;
}

private CellBaseBuilder buildSplice() {
Path genePath = buildFolder.resolve("gene.json.gz");
Path genomeInfoPath = buildFolder.resolve("genome_info.json");
Path fastaPath = getFastaReferenceGenome();
CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "splice");
return new SpliceBuilder(genePath, genomeInfoPath, fastaPath, serializer);
}
}
1 change: 1 addition & 0 deletions cellbase-core/src/main/resources/configuration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ species:
- regulation
- repeats
- variation_functional_score
- splice
shards:
- collection: "variation"
key:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public class EtlCommons {
public static final String PROTEIN_DATA = "protein";
public static final String CONSERVATION_DATA = "conservation";
public static final String CLINICAL_VARIANTS_DATA = "clinical_variants";
public static final String SPLICE_DATA = "splice";

public static final String CLINICAL_VARIANTS_FOLDER = "clinicalVariant";
public static final String CLINVAR_XML_FILE = "ClinVarFullRelease_2020-02.xml.gz";
Expand Down
Loading

0 comments on commit 617e64c

Please sign in to comment.