This repository has been archived by the owner on Nov 27, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset.py
154 lines (100 loc) · 4.35 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import pandas as pd
import numpy as np
import os
from torchvision.datasets import VisionDataset
from torchvision.datasets.folder import default_loader
from torchvision.datasets.utils import download_url
from torchvision.datasets.utils import extract_archive
import csv
import shutil
# if you are using CUB dataset, use this function
def make_cub_dataset(folder):
split = {}
train_count = {}
test_count = {}
with open('{}/classes.txt'.format(folder)) as fp:
line = fp.readline()
while line:
line = line.strip()
class_id, class_name = line.split(' ')
folder_name = class_name.split('.')[0]
train_count[folder_name] = 1
test_count[folder_name] = 1
cwd = os.getcwd()
train_path = os.path.join(cwd, '{}/train'.format(folder))
test_path = os.path.join(cwd, '{}/test'.format(folder))
train_folder = '{}/{}'.format(train_path,
'class_' + str(folder_name))
test_folder = '{}/{}'.format(test_path,
'class_' + str(folder_name))
if not os.path.exists(train_folder):
os.makedirs(train_folder)
if not os.path.exists(test_folder):
os.makedirs(test_folder)
line = fp.readline()
print('done making folders')
with open('{}/train_test_split.txt'.format(folder)) as fp:
line = fp.readline()
while line:
line = line.strip()
image_id, image_split = line.split(' ')
split[int(image_id)] = 'train' if int(image_split) else 'test'
line = fp.readline()
with open('{}/images.txt'.format(folder)) as fp:
line = fp.readline()
while line:
line = line.strip()
image_id, image_path = line.split(' ')
img_class = image_path.split('.')[0]
image_split = split[int(image_id)]
full_image_path = r'{}/images/{}'.format(folder, image_path)
if image_split == 'train':
iter = train_count[img_class]
os.system(
'cp {} {}/train/class_{}/{}.jpg'.format(full_image_path, folder, img_class, str(iter)))
train_count[img_class] += 1
else:
iter = test_count[img_class]
os.system(
'cp {} {}/test/class_{}/{}.jpg'.format(full_image_path, folder, img_class, str(iter)))
test_count[img_class] += 1
line = fp.readline()
print('completed setting up image dataset')
def make_custom_dataset(folder):
with open('datasets/classes.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
for row in csv_reader:
print('working on {}th element'.format(line_count))
line_count += 1
train_folder = '{}/{}'.format('datasets/train', row[1])
test_folder = '{}/{}'.format('datasets/test', row[1])
if not os.path.exists(train_folder):
os.makedirs(train_folder)
if not os.path.exists(test_folder):
os.makedirs(test_folder)
with open('datasets/train.txt') as fp:
line = fp.readline()
while line:
line = line.strip()
image_path, img_id = line.split(' ')
full_image_path = os.path.join(folder, image_path)
new_path = image_path.split('images/')[1]
new_path = os.path.join(folder, 'train', new_path)
print('processing image from:{}'.format(new_path))
shutil.copyfile(full_image_path, new_path )
line = fp.readline()
with open('datasets/test.txt') as fp:
line = fp.readline()
while line:
line = line.strip()
image_path, img_id = line.split(' ')
full_image_path = os.path.join(folder, image_path)
new_path = image_path.split('images/')[1]
new_path = os.path.join(folder, 'test', new_path)
print('processing image from:{}'.format(new_path))
shutil.copyfile(full_image_path, new_path)
line = fp.readline()
if __name__ == '__main__':
folder = 'datasets'
make_cub_dataset(folder)