-
Notifications
You must be signed in to change notification settings - Fork 1
/
base_audio_only_task.yaml
113 lines (96 loc) · 2.31 KB
/
base_audio_only_task.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# @package _group_
common:
fp16: true
log_format: json
log_interval: 200
tensorboard_logdir: tb
min_loss_scale: 1e-6
fp16_no_flatten_grads: false
user_dir: ${env:PWD}/examples/data2vec
checkpoint:
save_interval: 1
save_interval_updates: 25000
keep_interval_updates: 1
no_epoch_checkpoints: true
restore_file: /nlsasfs/home/nltm-pilot/arjung/ash_exp_acl/fairseq_new_version/new_fairseq/fairseq/multirun/2024-05-15/22-35-38/0/checkpoints/checkpoint_last.pt
task:
_name: audio_pretraining
data: /private/home/abaevski/data/librispeech/full
max_sample_size: 320000
min_sample_size: 32000
normalize: true
dataset:
num_workers: 6
max_tokens: 1000000
skip_invalid_size_inputs_valid_test: true
validate_interval: 5
required_batch_size_multiple: 1
disable_validation: true
distributed_training:
distributed_world_size: 1
ddp_backend: legacy_ddp
criterion:
_name: model
log_keys:
- ema_decay
- target_var
- pred_var
- model_norm
- ema_norm
- masked_pct
optimization:
max_update: 800000
max_epoch: 600
lr: [0.00075]
optimizer:
_name: adam
adam_betas: [ 0.9,0.98 ]
adam_eps: 1e-06
weight_decay: 0.01
lr_scheduler:
_name: cosine
warmup_updates: 8000
model:
_name: data2vec_multi
loss_beta: 0
loss_scale: null
depth: 8
embed_dim: 768
clone_batch: 1
ema_decay: 0.999
ema_end_decay: 0.99999
ema_anneal_end_step: 75000
ema_encoder_only: false
average_top_k_layers: 8
instance_norm_target_layer: true
layer_norm_target_layer: false
layer_norm_targets: false
layerdrop: 0.05
norm_eps: 1e-5
supported_modality: AUDIO
modalities:
audio:
feature_encoder_spec: '[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]'
conv_pos_depth: 5
conv_pos_width: 95
conv_pos_groups: 16
prenet_depth: 4
mask_prob: 0.5
mask_prob_adjust: 0.05
inverse_mask: false
mask_length: 5
mask_noise_std: 0.01
mask_dropout: 0
add_masks: false
ema_local_encoder: false
use_alibi_encoder: true
prenet_layerdrop: 0.05
prenet_dropout: 0.1
learned_alibi_scale: true
learned_alibi_scale_per_head: true
decoder:
input_dropout: 0.1
decoder_dim: 384
decoder_groups: 16
decoder_kernel: 7
decoder_layers: 4