Skip to content

Commit

Permalink
created custom merge for chembl
Browse files Browse the repository at this point in the history
  • Loading branch information
jal347 committed Oct 9, 2024
1 parent d90d129 commit fb95660
Showing 1 changed file with 34 additions and 2 deletions.
36 changes: 34 additions & 2 deletions src/hub/dataload/sources/chembl/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,43 @@
from .parser import load_data


class ChemblMergerStorage(storage.MergerStorage):
@classmethod
def merge_func(klass, doc1, doc2, **kwargs):
merged_doc = {"_id": doc1["_id"], "chembl": []}
chembl_dict = {}

for doc in [doc1, doc2]:
chembl_list = doc.get("chembl", [])
if not isinstance(chembl_list, list):
chembl_list = [chembl_list]

for chembl in chembl_list:
uniprot = chembl.get("uniprot")
chembl_target = chembl.get("chembl_target")

if uniprot in chembl_dict:
if isinstance(chembl_dict[uniprot]["chembl_target"], list):
chembl_dict[uniprot]["chembl_target"].append(chembl_target)
else:
chembl_dict[uniprot]["chembl_target"] = [
chembl_dict[uniprot]["chembl_target"],
chembl_target,
]
else:
chembl_dict[uniprot] = {
"chembl_target": chembl_target,
"uniprot": uniprot,
}

merged_doc["chembl"] = list(chembl_dict.values())
return merged_doc


class ChemblUploader(uploader.BaseSourceUploader):
name = "chembl"

# storage_class = storage.RootKeyMergerStorage
storage_class = storage.MergerStorage
storage_class = ChemblMergerStorage
TARGET_FILENAME_PATTERN = "target.*.json"

keylookup = MyGeneKeyLookup(
Expand Down

0 comments on commit fb95660

Please sign in to comment.