Skip to content

Commit

Permalink
mend
Browse files Browse the repository at this point in the history
  • Loading branch information
amva13 committed Mar 6, 2024
1 parent cd65f41 commit 88d7e59
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 27 deletions.
22 changes: 15 additions & 7 deletions tdc/resource/cellxgene_census.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os

os.environ['KMP_DUPLICATE_LIB_OK'] = "TRUE"
# TODO: remove
# TODO: remove
import cellxgene_census
import gget
import tiledbsoma
Expand All @@ -13,14 +13,15 @@ class CensusResource:
_CENSUS_DATA = "census_data"
_CENSUS_META = "census_info"
_FEATURE_PRESENCE = "feature_dataset_presence_matrix"
_LATEST_CENSUS = "2023-12-15" # TODO: maybe change to 'latest'
_LATEST_CENSUS = "2023-12-15" # TODO: maybe change to 'latest'
_HUMAN = "homo_sapiens"

class decorators:

@classmethod
def check_dataset_is_census_data(cls, func):
"""Sets self.dataset to census_data"""

def check(*args, **kwargs):
self = args[0]
self.dataset = self._CENSUS_DATA
Expand All @@ -31,6 +32,7 @@ def check(*args, **kwargs):
@classmethod
def check_dataset_is_census_info(cls, func):
"""Sets self.dataset to census_data"""

def check(*args, **kwargs):
self = args[0]
self.dataset = self._CENSUS_META
Expand All @@ -46,9 +48,11 @@ def slice_checks_X_and_FM(cls, func):
3. functions requiring a measurement name provide a measurement name
4. fmt is a valid format
asserts these requirements hold in input arguments."""

def check(*args, **kwargs):
if "upper" in kwargs:
upper, lower = kwargs.get('upper', None), kwargs.get("lower", None)
upper, lower = kwargs.get('upper',
None), kwargs.get("lower", None)
if upper is None or lower is None:
raise Exception(
"No upper and/or lower bound for slicing was provided. Dataset is too large to fit in memory. \
Expand All @@ -57,18 +61,22 @@ def check(*args, **kwargs):
fmt = fmt if fmt is not None else "pandas"
if "todense" in kwargs:
todense = kwargs.get("todense")
kwargs["todense"] = todense if todense is not None else False
kwargs[
"todense"] = todense if todense is not None else False
if todense and fmt != "scipy":
raise ValueError(
"dense representation only available in scipy format")
"dense representation only available in scipy format"
)
measurement_name = kwargs.get("measurement_name")
if measurement_name is None:
raise Exception("measurement_name was not provided.")
elif fmt is not None and fmt not in ["scipy", "pyarrow"]:
raise ValueError(
"measurement_matrix only supports 'scipy' or 'pyarrow' format")
"measurement_matrix only supports 'scipy' or 'pyarrow' format"
)
kwargs["fmt"] = fmt if fmt is not None else "pandas"
return func(*args, **kwargs)

return check

def __init__(self, census_version=None, organism=None):
Expand Down Expand Up @@ -238,7 +246,7 @@ def get_cell_count_metadata(self):
census_version=self.census_version) as census:
return census[self.dataset]["summary_cell_counts"]

@decorators.slice_checks_X_and_FM
@decorators.slice_checks_X_and_FM
@decorators.check_dataset_is_census_data
def query_measurement_matrix(self,
value_filter=None,
Expand Down
50 changes: 30 additions & 20 deletions tdc/test/test_resources.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
import os
import sys

sys.path.append(
os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
import unittest

from pandas import DataFrame
# from pyarrow import SparseCOOTensor
# from pyarrow import SparseCOOTensor
from tdc.resource import cellxgene_census


class TestResources(unittest.TestCase):
pass


class TestCellXGene(unittest.TestCase):

def setUp(self):
Expand All @@ -19,33 +22,40 @@ def setUp(self):
self.gene_column_names = ["feature_name", "feature_length"]
self.cell_value_filter = "tissue == 'brain' and sex == 'male'"
self.cell_column_names = ["assay", "cell_type", "tissue"]

def test_get_cell_metadata(self):
obsdf = self.resource.get_cell_metadata(value_filter=self.cell_value_filter,
column_names=self.cell_column_names,
fmt="pandas")
obsdf = self.resource.get_cell_metadata(
value_filter=self.cell_value_filter,
column_names=self.cell_column_names,
fmt="pandas")
assert isinstance(obsdf, DataFrame)

def test_get_gene_metadata(self):
varpyarrow = self.resource.get_gene_metadata(value_filter=self.gene_value_filter,
column_names=self.gene_column_names,
fmt="pyarrow",
measurement_name="RNA")
varpyarrow = self.resource.get_gene_metadata(
value_filter=self.gene_value_filter,
column_names=self.gene_column_names,
fmt="pyarrow",
measurement_name="RNA")
print(varpyarrow)
# assert isinstance(varpyarrow, SparseCOOTensor)
# assert isinstance(varpyarrow, SparseCOOTensor)

def test_get_measurement_matrix(self):
Xslice = self.resource.get_measurement_matrix(upper=5,
lower=0,
measurement_name="RNA",
fmt="scipy",
todense=True)
lower=0,
measurement_name="RNA",
fmt="scipy",
todense=True)
print("x", Xslice)

def test_get_feature_dataset_presence_matrix(self):
FMslice = self.resource.get_feature_dataset_presence_matrix(
upper=5, lower=0, measurement_name="RNA", fmt="pyarrow", todense=False)
upper=5,
lower=0,
measurement_name="RNA",
fmt="pyarrow",
todense=False)
print("f", FMslice)



if __name__ == "__main__":
unittest.main()
unittest.main()

0 comments on commit 88d7e59

Please sign in to comment.