-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset_h5.py
100 lines (77 loc) · 2.42 KB
/
dataset_h5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import numpy as np
import pandas as pd
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import h5py
class Whole_Slide_Bag(Dataset):
def __init__(self,
file_path,
img_transforms=None):
"""
Args:
file_path (string): Path to the .h5 file containing patched data.
roi_transforms (callable, optional): Optional transform to be applied on a sample
"""
self.roi_transforms = img_transforms
self.file_path = file_path
with h5py.File(self.file_path, "r") as f:
dset = f['imgs']
self.length = len(dset)
self.summary()
def __len__(self):
return self.length
def summary(self):
with h5py.File(self.file_path, "r") as hdf5_file:
dset = hdf5_file['imgs']
for name, value in dset.attrs.items():
print(name, value)
print('transformations:', self.roi_transforms)
def __getitem__(self, idx):
with h5py.File(self.file_path,'r') as hdf5_file:
img = hdf5_file['imgs'][idx]
coord = hdf5_file['coords'][idx]
img = Image.fromarray(img)
img = self.roi_transforms(img)
return {'img': img, 'coord': coord}
class Whole_Slide_Bag_FP(Dataset):
def __init__(self,
file_path,
wsi,
img_transforms=None):
"""
Args:
file_path (string): Path to the .h5 file containing patched data.
img_transforms (callable, optional): Optional transform to be applied on a sample
"""
self.wsi = wsi
self.roi_transforms = img_transforms
self.file_path = file_path
with h5py.File(self.file_path, "r") as f:
dset = f['coords']
self.patch_level = f['coords'].attrs['patch_level']
self.patch_size = f['coords'].attrs['patch_size']
self.length = len(dset)
self.summary()
def __len__(self):
return self.length
def summary(self):
hdf5_file = h5py.File(self.file_path, "r")
dset = hdf5_file['coords']
for name, value in dset.attrs.items():
print(name, value)
print('\nfeature extraction settings')
print('transformations: ', self.roi_transforms)
def __getitem__(self, idx):
with h5py.File(self.file_path,'r') as hdf5_file:
coord = hdf5_file['coords'][idx]
img = self.wsi.read_region(coord, self.patch_level, (self.patch_size, self.patch_size)).convert('RGB')
img = self.roi_transforms(img)
return {'img': img, 'coord': coord}
class Dataset_All_Bags(Dataset):
def __init__(self, csv_path):
self.df = pd.read_csv(csv_path)
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
return self.df['slide_id'][idx]