-
Notifications
You must be signed in to change notification settings - Fork 0
/
wsi_deidentification.py
93 lines (78 loc) · 3.54 KB
/
wsi_deidentification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import pydicom
from pydicom.uid import ExplicitVRLittleEndian
import logging
import os
import numpy as np
from glob import glob
from pathlib import Path
import shutil
logging.basicConfig(level=logging.INFO)
class WSIDeidentifier:
"""
Class for deidentifying the image information of whole slide images (WSI).
Args:
verbose (bool, optional): If True, enables verbose logging. Defaults to False.
out_path (str, optional): The output path for the deidentified images. Defaults to None.
Methods:
__call__(self, dataset: str) -> None:
Deidentifies the specified dataset by calling the _deidentify method.
_deidentify(dcm_file: str, out: str, verbose: bool = False) -> None:
Deidentifies a DICOM file by anonymizing the scan label and overview (if present).
Saves the deidentified file to the specified output path.
"""
def __init__(self, verbose: bool = False, out_path: str = None) -> None:
self._verbose = verbose if verbose is not None else False
self._out = out_path
def __call__(self, dataset: str) -> None:
"""
Deidentifies the specified dataset by calling the _deidentify method.
Args:
dataset (str): The path to the dataset to be deidentified.
Returns:
None
"""
if self._verbose:
logging.info(f"Deidentifying {dataset}")
if os.path.isdir(dataset):
for dcm in glob(f"{dataset}/*.dcm"):
self._deidentify(dcm, self._out, self._verbose)
elif os.path.isfile(dataset):
self._deidentify(dataset, self._out, self._verbose)
@staticmethod
def _deidentify(dcm_file: str, out: str, verbose: bool = False) -> None:
"""
Deidentifies a DICOM file by anonymizing the scan label and overview (if present).
Saves the deidentified file to the specified output path.
Args:
dcm_file (str): The path to the DICOM file to be deidentified.
out (str): The output path for the deidentified file.
verbose (bool, optional): If True, enables verbose logging. Defaults to False.
Returns:
None
"""
dcm = pydicom.dcmread(dcm_file, stop_before_pixels=True)
# Anonymize scan label
if "LABEL" in dcm[0x0008, 0x0008].value:
dcm = pydicom.dcmread(dcm_file)
try:
dcm.PixelData = np.zeros_like(dcm.pixel_array).tobytes()
dcm.save_as(os.path.join(out, Path(dcm_file).name))
if verbose:
logging.info(f"Label file anonymized!")
except:
logging.info("Label file does not contain pixel array, remove file instead of overwriting ...")
if os.path.exists(os.path.join(out, Path(dcm_file).name)):
os.remove(os.path.join(out, Path(dcm_file).name))
# Anonymize overview
elif "OVERVIEW" in dcm[0x0008, 0x0008].value:
dcm = pydicom.dcmread(dcm_file)
dcm.pixel_array[:, :250, :] = 0
dcm.file_meta.TransferSyntaxUID = ExplicitVRLittleEndian
dcm.PixelData = dcm.pixel_array.tobytes()
dcm.save_as(os.path.join(out, Path(dcm_file).name))
if verbose:
logging.info(f"Overview file anonymized!")
else:
# Copy the file if it doesn't exist in the output folder
if not os.path.exists(os.path.join(out, Path(dcm_file).name)):
shutil.copy2(dcm_file, os.path.join(out, Path(dcm_file).name))