Skip to content

Commit

Permalink
extract-pdbids-drugbank-xsdata
Browse files Browse the repository at this point in the history
  • Loading branch information
ndonyapour committed Jun 26, 2024
1 parent 23fb84d commit 682ea9a
Show file tree
Hide file tree
Showing 19 changed files with 1,202 additions and 0 deletions.
29 changes: 29 additions & 0 deletions utils/extract-pdbids-drugbank-xsdata-plugin/.bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
[bumpversion]
current_version = 0.1.0
commit = False
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
serialize =
{major}.{minor}.{patch}-{release}{dev}
{major}.{minor}.{patch}

[bumpversion:part:release]
optional_value = _
first_value = dev
values =
dev
_

[bumpversion:part:dev]

[bumpversion:file:pyproject.toml]
search = version = "{current_version}"
replace = version = "{new_version}"

[bumpversion:file:VERSION]

[bumpversion:file:README.md]

[bumpversion:file:plugin.json]

[bumpversion:file:src/polus/mm/utils/extract_pdbids_drugbank_xsdata/__init__.py]
4 changes: 4 additions & 0 deletions utils/extract-pdbids-drugbank-xsdata-plugin/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.venv
out
tests
__pycache__
1 change: 1 addition & 0 deletions utils/extract-pdbids-drugbank-xsdata-plugin/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
poetry.lock
5 changes: 5 additions & 0 deletions utils/extract-pdbids-drugbank-xsdata-plugin/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# CHANGELOG

## 0.1.0

Initial release.
27 changes: 27 additions & 0 deletions utils/extract-pdbids-drugbank-xsdata-plugin/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
FROM condaforge/mambaforge

ENV EXEC_DIR="/opt/executables"
ENV POLUS_LOG="INFO"
RUN mkdir -p ${EXEC_DIR}


# Work directory defined in the base container
# WORKDIR ${EXEC_DIR}

COPY pyproject.toml ${EXEC_DIR}
COPY VERSION ${EXEC_DIR}
COPY README.md ${EXEC_DIR}
COPY CHANGELOG.md ${EXEC_DIR}

# Install needed packages here
# errors installing rdkit from poetry so using conda
COPY environment.yml ${EXEC_DIR}
RUN mamba env create -f ${EXEC_DIR}/environment.yml
RUN echo "source activate project_env" > ~/.bashrc
ENV PATH /opt/conda/envs/env/bin:$PATH

COPY src ${EXEC_DIR}/src

RUN pip3 install ${EXEC_DIR} --no-cache-dir

CMD ["--help"]
18 changes: 18 additions & 0 deletions utils/extract-pdbids-drugbank-xsdata-plugin/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# extract_pdbids_drugbank_xsdata (0.1.0)

Filter Drugbank database using xsData

## Options

This plugin takes 5 input arguments and 3 output argument:

| Name | Description | I/O | Type | Default |
|---------------|-------------------------|--------|--------|---------|
| drugbank_xml_file_path | Path to the Drugbank xml file | Input | File | File |
| smiles | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] |
| inchi | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] |
| inchi_keys | List of input SMILES, Type: string[], File type: input, Accepted formats: list[string] | Input | ['null', {'type': 'array', 'items': 'string'}] | ['null', {'type': 'array', 'items': 'string'}] |
| output_txt_path | Path to the text dataset file, Type: string, File type: output, Accepted formats: txt | Input | string | string |
| output_txt_path | Path to the txt file | Output | File | File |
| output_smiles | The Smiles of small molecules | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} |
| output_pdb_ids | The PDB IDs of target structures | Output | {'type': 'array', 'items': 'string'} | {'type': 'array', 'items': 'string'} |
1 change: 1 addition & 0 deletions utils/extract-pdbids-drugbank-xsdata-plugin/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.0
4 changes: 4 additions & 0 deletions utils/extract-pdbids-drugbank-xsdata-plugin/build-docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

version=$(<VERSION)
docker build . -t polusai/extract-pdbids-drugbank-xsdata-plugin:${version}
14 changes: 14 additions & 0 deletions utils/extract-pdbids-drugbank-xsdata-plugin/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: project_env
channels:
- conda-forge
dependencies:
- python==3.10
- rdkit==2024.03.1
- pytest==8.1.1
- cwltool==3.1.20240404144621
- cwl-utils==0.33
- pip
- pip:
- xsdata-pydantic[cli,lxml,soap]
- --extra-index-url=https://test.pypi.org/simple
- drugbank-schemas
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: Filter Drugbank database using xsData

doc: |-
Filter Drugbank database using xsData

baseCommand: ["conda", "run", "-n", "project_env", "python", "-m", "polus.mm.utils.extract_pdbids_drugbank_xsdata"]

hints:
DockerRequirement:
dockerPull: ndonyapour/extract_pdbids_drugbank_xsdata

requirements:
InlineJavascriptRequirement: {}
# Enabling InitialWorkDirRequirement will stage the input Drugbank xml file
InitialWorkDirRequirement:
listing:
- $(inputs.drugbank_xml_file_path)
- $(inputs.drugbank_xsd_file_path)

inputs:
drugbank_xml_file_path:
label: Path to the Drugbank xml file
doc: |-
Path to the Drugbank xml file
type: File
format: edam:format_2332
inputBinding:
prefix: --drugbank_xml_file_path
default:
class: File
location: ../../../drugbank/drugbank_5.1.10.xml

drugbank_xsd_file_path:
label: Path to the Drugbank schema XSD file
doc: |-
Path to the Drugbank schema XSD file
type: File
format: edam:format_3804
inputBinding:
prefix: --drugbank_xsd_file_path
default:
class: File
location: ../../drugbank/drugbank_5.1.10.xsd

smiles:
label: List of input SMILES # type:
doc: |-
List of input SMILES
Type: string[]
File type: input
Accepted formats: list[string]
type: ["null", {"type": "array", "items": "string"}]
format: edam:format_2330
inputBinding:
prefix: --smiles
default: []

inchi:
label: List of input SMILES # type:
doc: |-
List of input SMILES
Type: string[]
File type: input
Accepted formats: list[string]
type: ["null", {"type": "array", "items": "string"}]
format:
- edam:format_2330
inputBinding:
prefix: --inchi
default: []

inchi_keys:
label: List of input SMILES # type:
doc: |-
List of input SMILES
Type: string[]
File type: input
Accepted formats: list[string]
type: ["null", {"type": "array", "items": "string"}]
format:
- edam:format_2330
inputBinding:
prefix: --inchi_keys
default: []

output_txt_path:
label: Path to the text dataset file
doc: |-
Path to the text dataset file
Type: string
File type: output
Accepted formats: txt
type: string
format:
- edam:format_2330
inputBinding:
prefix: --output_txt_path
default: system.log

outputs:
output_txt_path:
label: Path to the txt file
doc: |-
Path to the txt file
type: File
outputBinding:
glob: $(inputs.output_txt_path)
format: edam:format_2330

output_smiles:
label: The Smiles of small molecules
doc: |-
The Smiles of small molecules
type:
type: array
items: string
outputBinding:
glob: $(inputs.output_txt_path)
loadContents: true
outputEval: |
${
var lines = self[0].contents.split("\n");
// remove black lines
lines = lines.filter(function(line) {return line.trim() !== '';});
var smiles = [];
for (var i = 0; i < lines.length; i++) {
// The format of the lines is as follows: NC1=NC=NN2C1=CC=C2[C@@]1(O[C@H](CO)[C@@H](O)[C@H]1O)C#N,7bf6,7qg7
// The first item is the SMILES notation. We need to duplicate it, so each SMILES string
// corresponds to a PDB ID in the PDB IDs array.
var words = lines[i].split(",").map(function(item) {return item.trim();});
for (var j = 1; j < words.length; j++) {
smiles.push(words[0]);
}
}
return smiles;
}

output_pdb_ids:
label: The PDB IDs of target structures
doc: |-
The PDB IDs of target structures
type:
type: array
items: string
outputBinding:
glob: $(inputs.output_txt_path)
loadContents: true
outputEval: |
${
var lines = self[0].contents.split("\n");
// remove black lines
lines = lines.filter(function(line) {return line.trim() !== '';});
var pdbids = [];
for (var i = 0; i < lines.length; i++) {
// The format of the lines is as follows: NC1=NC=NN2C1=CC=C2[C@@]1(O[C@H](CO)[C@@H](O)[C@H]1O)C#N,7bf6,7qg7
// The first item is the SMILES notation and the rest are the target structure PDB IDs.
var words = lines[i].split(",").map(function(item) {return item.trim();});
for (var j = 1; j < words.length; j++) {
pdbids.push(words[j]);
}
}
return pdbids;
}

$namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
93 changes: 93 additions & 0 deletions utils/extract-pdbids-drugbank-xsdata-plugin/ict.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
specVersion: "0.1.0"
name: extract_pdbids_drugbank_xsdata
version: 0.1.0
container: extract-pdbids-drugbank-xsdata-plugin
entrypoint:
title: extract_pdbids_drugbank_xsdata
description: Filter Drugbank database using xsData
author: Brandon Walker, Nazanin Donyapour
contact: [email protected], [email protected]
repository:
documentation:
citation:

inputs:
- name: drugbank_xml_file_path
required: true
description: Path to the Drugbank xml file
type: File
defaultValue: {'class': 'File', 'location': '../../../drugbank/drugbank_5.1.10.xml'}
format:
uri: edam:format_2332
- name: drugbank_xsd_file_path
required: true
description: Path to the Drugbank schema XSD file
type: File
defaultValue: {'class': 'File', 'location': '../../drugbank/drugbank_5.1.10.xsd'}
format:
uri: edam:format_3804
- name: smiles
required: true
description: List of input SMILES, Type string[], File type input, Accepted formats list[string]
type: ['null', {'type': 'array', 'items': 'string'}]
format:
uri: edam:format_2330
- name: inchi
required: true
description: List of input SMILES, Type string[], File type input, Accepted formats list[string]
type: ['null', {'type': 'array', 'items': 'string'}]
format:
uri: edam:format_2330
- name: inchi_keys
required: true
description: List of input SMILES, Type string[], File type input, Accepted formats list[string]
type: ['null', {'type': 'array', 'items': 'string'}]
format:
uri: edam:format_2330
- name: output_txt_path
required: true
description: Path to the text dataset file, Type string, File type output, Accepted formats txt
type: string
defaultValue: system.log
format:
uri: edam:format_2330
outputs:
- name: output_txt_path
required: true
description: Path to the txt file
type: File
format:
uri: edam:format_2330
- name: output_smiles
required: true
description: The Smiles of small molecules
type: {'type': 'array', 'items': 'string'}
- name: output_pdb_ids
required: true
description: The PDB IDs of target structures
type: {'type': 'array', 'items': 'string'}
ui:
- key: inputs.drugbank_xml_file_path
title: "drugbank_xml_file_path: "
description: "Path to the Drugbank xml file"
type: File
- key: inputs.drugbank_xsd_file_path
title: "drugbank_xsd_file_path: "
description: "Path to the Drugbank schema XSD file"
type: File
- key: inputs.smiles
title: "smiles: "
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]"
type: ['null', {'type': 'array', 'items': 'string'}]
- key: inputs.inchi
title: "inchi: "
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]"
type: ['null', {'type': 'array', 'items': 'string'}]
- key: inputs.inchi_keys
title: "inchi_keys: "
description: "List of input SMILES, Type string[], File type input, Accepted formats list[string]"
type: ['null', {'type': 'array', 'items': 'string'}]
- key: inputs.output_txt_path
title: "output_txt_path: "
description: "Path to the text dataset file, Type string, File type output, Accepted formats txt"
type: string
3 changes: 3 additions & 0 deletions utils/extract-pdbids-drugbank-xsdata-plugin/out.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SMILES5443,PDB8720
SMILES3441,PDB5533
SMILES2803,PDB3506
Loading

0 comments on commit 682ea9a

Please sign in to comment.