Skip to content

Commit

Permalink
CU-8693az82g Remove cdb tests side effects (#380)
Browse files Browse the repository at this point in the history
* 8693az82g: Add method to CDBMaker to reset the CDB

* 8693az82g: Add test in CDB tests to ensure a new CDB is used for each test

* 8693az82g: Reset CDB in CDB tests before each test to avoid side effects
  • Loading branch information
mart-r authored Dec 18, 2023
1 parent 70305f4 commit 9e5fca1
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
14 changes: 14 additions & 0 deletions medcat/cdb_maker.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ def __init__(self, config: Config, cdb: Optional[CDB] = None) -> None:
name='skip_and_punct',
additional_fields=['is_punct'])

def reset_cdb(self) -> None:
"""This will re-create a new internal CDB based on the same config.
This will be necessary if/when you're wishing to call `prepare_csvs`
multiple times on the same object `CDBMaker` instance.
"""
self.cdb = CDB(config=self.config)

def prepare_csvs(self,
csv_paths: Union[pd.DataFrame, List[str]],
sep: str = ',',
Expand All @@ -59,6 +67,12 @@ def prepare_csvs(self,
only_existing_cuis: bool = False, **kwargs) -> CDB:
r"""Compile one or multiple CSVs into a CDB.
Note: This class/method generally uses the same instance of the CDB.
So if you're using the same CDBMaker and calling `prepare_csvs`
multiple times, you are likely to get leakage from prior calls
into new ones.
To reset the CDB, call `reset_cdb`.
Args:
csv_paths (Union[pd.DataFrame, List[str]]):
An array of paths to the csv files that should be processed. Can also be an array of pd.DataFrames
Expand Down
10 changes: 10 additions & 0 deletions tests/test_cdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,21 @@ def setUp(self) -> None:
cdb_2_csv = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "examples", "cdb_2.csv")
self.tmp_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "tmp")
os.makedirs(self.tmp_dir, exist_ok=True)
# resetting the CDB because otherwise the CDBMaker
# will refer to and modify the same instance of the CDB
# and this can (and does!) create side effects
CDBTests.cdb_maker.reset_cdb()
self.undertest = CDBTests.cdb_maker.prepare_csvs([cdb_csv, cdb_2_csv], full_build=True)

def tearDown(self) -> None:
shutil.rmtree(self.tmp_dir)

def test_setup_changes_cdb(self):
id1 = id(self.undertest)
self.setUp()
id2 = id(self.undertest)
self.assertNotEqual(id1, id2)

def test_name2cuis(self):
self.assertEqual({
'second~csv': ['C0000239'],
Expand Down

0 comments on commit 9e5fca1

Please sign in to comment.