-
Notifications
You must be signed in to change notification settings - Fork 1
/
ssd_r50_512_voc.yml
156 lines (148 loc) · 3.26 KB
/
ssd_r50_512_voc.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
architecture: SSD_512
use_gpu: true
max_iters: 360001
snapshot_iter: 10000
log_smooth_window: 20
log_iter: 20
metric: VOC
map_type: 11point
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar
save_dir: output
weights: output/ssd_r101_512/best_model
# 20(label_class) + 1(background)
num_classes: 21
SSD_512:
backbone: ResNet
multi_box_head: MultiBoxHead
output_decoder:
background_label: 0
keep_top_k: 200
nms_eta: 1.0
nms_threshold: 0.45
nms_top_k: 400
score_threshold: 0.01
ResNet:
norm_type: bn
# norm_decay: 0.
depth: 50
feature_maps: [2, 3, 4, 5]
freeze_at: 2
variant: d
# repeat_num: 2
MultiBoxHead:
base_size: 512
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]]
min_ratio: 20
max_ratio: 90
min_sizes: [20.0, 51.0, 133.0, 215.0, 296.0, 378.0, 460.0]
max_sizes: [51.0, 133.0, 215.0, 296.0, 378.0, 460.0, 542.0]
steps: [8, 16, 32, 64, 128, 256, 512]
offset: 0.5
flip: true
kernel_size: 3
pad: 1
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [240000, 320000]
- !LinearWarmup
start_factor: 0
steps: 2000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
TrainReader:
inputs_def:
image_shape: [3, 512, 512]
fields: ['image', 'gt_bbox', 'gt_class']
dataset:
!VOCDataSet
dataset_dir: dataset/voc/pascalvoc
anno_path: trainval.txt
use_default_label: true
sample_transforms:
- !DecodeImage
to_rgb: true
- !RandomDistort
brightness_lower: 0.875
brightness_upper: 1.125
is_order: true
- !RandomExpand
fill_value: [123, 117, 104]
- !RandomCrop
allow_no_crop: true
- !NormalizeBox {}
- !ResizeImage
interp: 1
target_size: 512
use_cv2: false
- !RandomFlipImage
is_normalized: true
- !Permute
to_bgr: false
- !NormalizeImage
is_scale: false
mean: [123, 117, 104]
std: [1, 1, 1]
batch_size: 8
shuffle: true
worker_num: 8
bufsize: 16
use_process: true
EvalReader:
inputs_def:
image_shape: [3, 512, 512]
fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']
dataset:
!VOCDataSet
anno_path: test.txt
dataset_dir: dataset/voc/pascalvoc
use_default_label: true
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeBox {}
- !ResizeImage
interp: 1
target_size: 512
use_cv2: false
- !Permute
to_bgr: false
- !NormalizeImage
is_scale: false
mean: [123, 117, 104]
std: [1, 1, 1]
batch_size: 32
worker_num: 8
bufsize: 16
TestReader:
inputs_def:
image_shape: [3,512,512]
fields: ['image', 'im_id', 'im_shape']
dataset:
!ImageFolder
anno_path: test.txt
use_default_label: true
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !ResizeImage
interp: 1
max_size: 0
target_size: 512
use_cv2: false
- !Permute
to_bgr: false
- !NormalizeImage
is_scale: false
mean: [123, 117, 104]
std: [1, 1, 1]
batch_size: 1