Skip to content

Commit

Permalink
Temp
Browse files Browse the repository at this point in the history
  • Loading branch information
EthanSteinberg committed Jul 10, 2024
1 parent 95e0712 commit abb9b98
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 26 deletions.
2 changes: 1 addition & 1 deletion src/femr/featurizers/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def _features_map_func(
# Construct CSR sparse matrix
# non-zero entries in sparse matrix
data_and_indices = np.zeros((1024, 2), np.float64)
data_and_indices_arrays = []
data_and_indices_arrays: List[np.ndarray] = []

current_index = 0

Expand Down
2 changes: 1 addition & 1 deletion src/femr/featurizers/featurizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def get_initial_preprocess_data(self) -> OnlineStatistics:

def add_preprocess_data(
self, age_statistics: OnlineStatistics, patient: meds_reader.Patient, label_map: Mapping[int, List[meds.Label]]
) -> OnlineStatistics:
):
"""Save the age of this patient (in years) at each label, to use for normalization."""
patient_birth_date: Optional[datetime.datetime] = get_patient_birthdate(patient)
assert patient_birth_date, "Patients must have a birth date"
Expand Down
20 changes: 9 additions & 11 deletions src/femr/post_etl_pipelines/stanford.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import meds
import meds_reader
import meds_reader.transform

from femr.transforms import delta_encode, remove_nones
from femr.transforms.stanford import (
Expand All @@ -23,10 +24,12 @@ def _is_visit_measurement(e: meds_reader.Event) -> bool:
return e.table == "visit"


def _get_stanford_transformations() -> Callable[[meds_reader.Patient], meds_reader.Patient]:
def _get_stanford_transformations() -> (
Callable[[meds_reader.transform.MutablePatient], meds_reader.transform.MutablePatient]
):
"""Get the list of current OMOP transformations."""
# All of these transformations are information preserving
transforms: Sequence[Callable[[meds_reader.Patient], meds_reader.Patient]] = [
transforms: Sequence[Callable[[meds_reader.transform.MutablePatient], meds_reader.transform.MutablePatient]] = [
move_pre_birth,
move_visit_start_to_first_event_start,
move_to_day_end,
Expand Down Expand Up @@ -72,16 +75,11 @@ def femr_stanford_omop_fixer_program() -> None:

args = parser.parse_args()

os.mkdir(args.target_dataset)

dataset = datasets.Dataset.from_parquet(os.path.join(args.source_dataset, "data", "*"))

fixed_patient = dataset.map(_get_stanford_transformations(), num_proc=args.num_proc)

os.mkdir(os.path.join(args.target_dataset, "data"))
fixed_patient.to_parquet(os.path.join(args.target_dataset, "data", "data.parquet"))
meds_reader.transform.transform_meds_dataset(
args.source_dataset, args.target_dataset, _get_stanford_transformations(), num_threads=args.num_proc
)

with open(os.path.join(args.source_dataset, "metadata.json")) as f:
with open(os.path.join(args.target_dataset, "metadata.json")) as f:
metadata = json.load(f)

# Let's mark that we modified this dataset
Expand Down
15 changes: 8 additions & 7 deletions src/femr/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@

import meds
import meds_reader
import meds_reader.transform


def remove_nones(
patient: meds_reader.Patient,
patient: meds_reader.transform.MutablePatient,
do_not_apply_to_filter: Optional[Callable[[meds_reader.Event], bool]] = None,
) -> meds_reader.Patient:
) -> meds_reader.transform.MutablePatient:
"""Remove duplicate codes w/in same day if duplicate code has None value.
There is no point having a NONE value in a timeline when we have an actual value within the same day.
Expand All @@ -25,7 +26,7 @@ def remove_nones(
if any(v is not None for v in value):
has_value.add((event.code, event.time.date()))

new_events: List[meds_reader.Event] = []
new_events: List[meds_reader.transform.MutableEvent] = []
for event in patient.events:
value = (event.numeric_value, event.text_value)
if (
Expand All @@ -45,9 +46,9 @@ def remove_nones(


def delta_encode(
patient: meds_reader.Patient,
patient: meds_reader.transform.MutablePatient,
do_not_apply_to_filter: Optional[Callable[[meds_reader.Event], bool]] = None,
) -> meds_reader.Patient:
) -> meds_reader.transform.MutablePatient:
"""Delta encodes the patient.
The idea behind delta encoding is that if we get duplicate values within a short amount of time
Expand All @@ -59,7 +60,7 @@ def delta_encode(

last_value: Dict[Tuple[str, datetime.date], Any] = {}

new_events: List[meds_reader.Event] = []
new_events: List[meds_reader.transform.MutableEvent] = []
for event in patient.events:
key = (event.code, event.time.date())
value = (event.numeric_value, event.text_value)
Expand All @@ -74,7 +75,7 @@ def delta_encode(
return patient


def fix_events(patient: meds_reader.Patient) -> meds_reader.Patient:
def fix_events(patient: meds_reader.transform.MutablePatient) -> meds_reader.transform.MutablePatient:
"""After a series of transformations, sometimes the patient structure gets a bit messed up.
The usual issues are either duplicate event times or missorted events.
Expand Down
14 changes: 8 additions & 6 deletions src/femr/transforms/stanford.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Dict, List, Tuple

import meds
import meds_reader
import meds_reader.transform


def _move_date_to_end(
Expand All @@ -18,7 +18,9 @@ def _move_date_to_end(
return d


def move_visit_start_to_first_event_start(patient: meds_reader.Patient) -> meds_reader.Patient:
def move_visit_start_to_first_event_start(
patient: meds_reader.transform.MutablePatient,
) -> meds_reader.transform.MutablePatient:
"""Assign visit start times to equal start time of first event in visit
This function assigns the start time associated with each visit to be
Expand Down Expand Up @@ -78,7 +80,7 @@ def move_visit_start_to_first_event_start(patient: meds_reader.Patient) -> meds_
return patient


def move_to_day_end(patient: meds_reader.Patient) -> meds_reader.Patient:
def move_to_day_end(patient: meds_reader.transform.MutablePatient) -> meds_reader.transform.MutablePatient:
"""We assume that everything coded at midnight should actually be moved to the end of the day."""
for event in patient.events:
event.time = _move_date_to_end(event.time)
Expand All @@ -91,7 +93,7 @@ def move_to_day_end(patient: meds_reader.Patient) -> meds_reader.Patient:
return patient


def switch_to_icd10cm(patient: meds_reader.Patient) -> meds_reader.Patient:
def switch_to_icd10cm(patient: meds_reader.transform.MutablePatient) -> meds_reader.transform.MutablePatient:
"""Switch from ICD10 to ICD10CM."""
for event in patient.events:
if event.code.startswith("ICD10/"):
Expand All @@ -100,7 +102,7 @@ def switch_to_icd10cm(patient: meds_reader.Patient) -> meds_reader.Patient:
return patient


def move_pre_birth(patient: meds_reader.Patient) -> meds_reader.Patient:
def move_pre_birth(patient: meds_reader.transform.MutablePatient) -> meds_reader.transform.MutablePatient:
"""Move all events to after the birth of a patient."""
birth_date = None
for event in patient.events:
Expand Down Expand Up @@ -129,7 +131,7 @@ def move_pre_birth(patient: meds_reader.Patient) -> meds_reader.Patient:
return patient


def move_billing_codes(patient: meds_reader.Patient) -> meds_reader.Patient:
def move_billing_codes(patient: meds_reader.transform.MutablePatient) -> meds_reader.transform.MutablePatient:
"""Move billing codes to the end of each visit.
One issue with our OMOP extract is that billing codes are incorrectly assigned at the start of the visit.
Expand Down

0 comments on commit abb9b98

Please sign in to comment.