-
Notifications
You must be signed in to change notification settings - Fork 1
/
ssd_r101_300_voc.yml
155 lines (147 loc) · 3.23 KB
/
ssd_r101_300_voc.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
architecture: SSD
use_gpu: true
max_iters: 150001
snapshot_iter: 10000
log_smooth_window: 20
log_iter: 20
metric: VOC
map_type: 11point
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar
save_dir: output
weights: output/ssd_vgg16_300_voc/model_final
# 20(label_class) + 1(background)
num_classes: 21
SSD:
backbone: ResNet
multi_box_head: MultiBoxHead
output_decoder:
background_label: 0
keep_top_k: 200
nms_eta: 1.0
nms_threshold: 0.45
nms_top_k: 400
score_threshold: 0.01
ResNet:
depth: 101
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: bn
variant: d
MultiBoxHead:
base_size: 300
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]]
min_ratio: 20
max_ratio: 90
min_sizes: [30.0, 60.0, 111.0, 162.0, 213.0, 264.0]
max_sizes: [60.0, 111.0, 162.0, 213.0, 264.0, 315.0]
steps: [8, 16, 32, 64, 100, 300]
offset: 0.5
flip: true
min_max_aspect_ratios_order: true
kernel_size: 3
pad: 1
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [110000, 130000]
- !LinearWarmup
start_factor: 0.
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
TrainReader:
inputs_def:
image_shape: [3, 300, 300]
fields: ['image', 'gt_bbox', 'gt_class']
dataset:
!VOCDataSet
dataset_dir: dataset/voc/pascalvoc
anno_path: trainval.txt
use_default_label: true
sample_transforms:
- !DecodeImage
to_rgb: true
- !RandomDistort
brightness_lower: 0.875
brightness_upper: 1.125
is_order: true
- !RandomExpand
fill_value: [104, 117, 123]
- !RandomCrop
allow_no_crop: true
- !NormalizeBox {}
- !ResizeImage
interp: 1
target_size: 300
use_cv2: false
- !RandomFlipImage
is_normalized: true
- !Permute
to_bgr: false
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [1,1,1]
batch_size: 16
shuffle: true
worker_num: 8
bufsize: 16
use_process: true
EvalReader:
inputs_def:
image_shape: [3, 300, 300]
fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']
dataset:
!VOCDataSet
anno_path: test.txt
dataset_dir: dataset/voc/pascalvoc
use_default_label: true
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeBox {}
- !ResizeImage
interp: 1
target_size: 300
use_cv2: false
- !Permute
to_bgr: false
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [1,1,1]
batch_size: 32
worker_num: 8
bufsize: 16
TestReader:
inputs_def:
image_shape: [3,300,300]
fields: ['image', 'im_id', 'im_shape']
dataset:
!ImageFolder
anno_path: test.txt
use_default_label: true
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !ResizeImage
interp: 1
max_size: 0
target_size: 300
use_cv2: false
- !Permute
to_bgr: false
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [1,1,1]
batch_size: 1