-
Notifications
You must be signed in to change notification settings - Fork 0
/
MMSESTSA85.m
226 lines (190 loc) · 8.09 KB
/
MMSESTSA85.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
function output=MMSESTSA85(signal,fs,IS)
% output=MMSESTSA85(signal,fs,IS)
% Shor time Spectral Amplitude Minimum Mean Square Error Method for
% Denoising noisy speech. based on log-Spectral MMSE Ephraim et al (1985)
% paper under the same title. signal is the input noisy speech, fs is its
% sampling frequency and IS (which is optional) is the initial silence
% estimate (The default value is 0.25 which means that it is assumed that
% the first 0.25 seconds of the signal is speech inactive period and may be
% used for initial noise parameter estimation. IS may also be used as a
% structure for compatibility with my other functions but please do not try
% to use IS it in that way as its functionality is not reliable.
% The output is the restored estimate of clean speech.
% Author: Esfandiar Zavarehei
% Created: Jan-04
% Last Modified: 24-01-05
if (nargin<3 | isstruct(IS))
IS=.25; %Initial Silence or Noise Only part in seconds
end
W=fix(.025*fs); %Window length is 25 ms
SP=.4; %Shift percentage is 40% (10ms) %Overlap-Add method works good with this value(.4)
wnd=hamming(W);
%IGNORE FROM HERE ...............................
if (nargin>=3 & isstruct(IS))%This option is for compatibility with another programme
W=IS.windowsize
SP=IS.shiftsize/W;
%nfft=IS.nfft;
wnd=IS.window;
if isfield(IS,'IS')
IS=IS.IS;
else
IS=.25;
end
end
% ......................................UP TO HERE
pre_emph=0;
signal=filter([1 -pre_emph],1,signal);
NIS=fix((IS*fs-W)/(SP*W) +1);%number of initial silence segments
y=segment(signal,W,SP,wnd); % This function chops the signal into frames
Y=fft(y);
YPhase=angle(Y(1:fix(end/2)+1,:)); %Noisy Speech Phase
Y=abs(Y(1:fix(end/2)+1,:));%Specrogram
numberOfFrames=size(Y,2);
FreqResol=size(Y,1);
N=mean(Y(:,1:NIS)')'; %initial Noise Power Spectrum mean
LambdaD=mean((Y(:,1:NIS)').^2)';%initial Noise Power Spectrum variance
alpha=.99; %used in smoothing xi (For Deciesion Directed method for estimation of A Priori SNR)
NoiseCounter=0;
NoiseLength=9;%This is a smoothing factor for the noise updating
G=ones(size(N));%Initial Gain used in calculation of the new xi
Gamma=G;
X=zeros(size(Y)); % Initialize X (memory allocation)
% h=waitbar(0,'Wait...');
for i=1:numberOfFrames
%%%%%%%%%%%%%%%%VAD and Noise Estimation START
if i<=NIS % If initial silence ignore VAD
SpeechFlag=0;
NoiseCounter=100;
else % Else Do VAD
[NoiseFlag, SpeechFlag, NoiseCounter, Dist]=vad(Y(:,i),N,NoiseCounter); %Magnitude Spectrum Distance VAD
end
if SpeechFlag==0 % If not Speech Update Noise Parameters
N=(NoiseLength*N+Y(:,i))/(NoiseLength+1); %Update and smooth noise mean
LambdaD=(NoiseLength*LambdaD+(Y(:,i).^2))./(1+NoiseLength); %Update and smooth noise variance
end
%%%%%%%%%%%%%%%%%%%VAD and Noise Estimation END
gammaNew=(Y(:,i).^2)./LambdaD; %A postiriori SNR
xi=alpha*(G.^2).*Gamma+(1-alpha).*max(gammaNew-1,0); %Decision Directed Method for A Priori SNR
Gamma=gammaNew;
nu=Gamma.*xi./(1+xi); % A Function used in Calculation of Gain
G= (xi./(1+xi)).*exp(.5*expint(nu)); % Log spectral MMSE [Ephraim 1985]
X(:,i)=G.*Y(:,i); %Obtain the new Cleaned value
% waitbar(i/numberOfFrames,h,num2str(fix(100*i/numberOfFrames)));
end
% close(h);
output=OverlapAdd2(X,YPhase,W,SP*W); %Overlap-add Synthesis of speech
output=filter(1,[1 -pre_emph],output); %Undo the effect of Pre-emphasis
ol=length(output);
if ol<length(signal)
output=[output;zeros(length(signal)-ol,1)];
end
function ReconstructedSignal=OverlapAdd2(XNEW,yphase,windowLen,ShiftLen);
%Y=OverlapAdd(X,A,W,S);
%Y is the signal reconstructed signal from its spectrogram. X is a matrix
%with each column being the fft of a segment of signal. A is the phase
%angle of the spectrum which should have the same dimension as X. if it is
%not given the phase angle of X is used which in the case of real values is
%zero (assuming that its the magnitude). W is the window length of time
%domain segments if not given the length is assumed to be twice as long as
%fft window length. S is the shift length of the segmentation process ( for
%example in the case of non overlapping signals it is equal to W and in the
%case of %50 overlap is equal to W/2. if not givven W/2 is used. Y is the
%reconstructed time domain signal.
%Sep-04
%Esfandiar Zavarehei
if nargin<2
yphase=angle(XNEW);
end
if nargin<3
windowLen=size(XNEW,1)*2;
end
if nargin<4
ShiftLen=windowLen/2;
end
if fix(ShiftLen)~=ShiftLen
ShiftLen=fix(ShiftLen);
disp('The shift length have to be an integer as it is the number of samples.')
disp(['shift length is fixed to ' num2str(ShiftLen)])
end
[FreqRes FrameNum]=size(XNEW);
Spec=XNEW.*exp(j*yphase);
if mod(windowLen,2) %if FreqResol is odd
Spec=[Spec;flipud(conj(Spec(2:end,:)))];
else
Spec=[Spec;flipud(conj(Spec(2:end-1,:)))];
end
sig=zeros((FrameNum-1)*ShiftLen+windowLen,1);
weight=sig;
for i=1:FrameNum
start=(i-1)*ShiftLen+1;
spec=Spec(:,i);
sig(start:start+windowLen-1)=sig(start:start+windowLen-1)+real(ifft(spec,windowLen));
end
ReconstructedSignal=sig;
function Seg=segment(signal,W,SP,Window)
% SEGMENT chops a signal to overlapping windowed segments
% A= SEGMENT(X,W,SP,WIN) returns a matrix which its columns are segmented
% and windowed frames of the input one dimentional signal, X. W is the
% number of samples per window, default value W=256. SP is the shift
% percentage, default value SP=0.4. WIN is the window that is multiplied by
% each segment and its length should be W. the default window is hamming
% window.
% 06-Sep-04
% Esfandiar Zavarehei
if nargin<3
SP=.4;
end
if nargin<2
W=256;
end
if nargin<4
Window=hamming(W);
end
Window=Window(:); %make it a column vector
L=length(signal);
SP=fix(W.*SP);
N=fix((L-W)/SP +1); %number of segments
Index=(repmat(1:W,N,1)+repmat((0:(N-1))'*SP,1,W))';
hw=repmat(Window,1,N);
Seg=signal(Index).*hw;
function [NoiseFlag, SpeechFlag, NoiseCounter, Dist]=vad(signal,noise,NoiseCounter,NoiseMargin,Hangover)
%[NOISEFLAG, SPEECHFLAG, NOISECOUNTER, DIST]=vad(SIGNAL,NOISE,NOISECOUNTER,NOISEMARGIN,HANGOVER)
%Spectral Distance Voice Activity Detector
%SIGNAL is the the current frames magnitude spectrum which is to labeld as
%noise or speech, NOISE is noise magnitude spectrum template (estimation),
%NOISECOUNTER is the number of imediate previous noise frames, NOISEMARGIN
%(default 3)is the spectral distance threshold. HANGOVER ( default 8 )is
%the number of noise segments after which the SPEECHFLAG is reset (goes to
%zero). NOISEFLAG is set to one if the the segment is labeld as noise
%NOISECOUNTER returns the number of previous noise segments, this value is
%reset (to zero) whenever a speech segment is detected. DIST is the
%spectral distance.
%Saeed Vaseghi
%edited by Esfandiar Zavarehei
%Sep-04
if nargin<4
NoiseMargin=3;
end
if nargin<5
Hangover=8;
end
if nargin<3
NoiseCounter=0;
end
FreqResol=length(signal);
SpectralDist= 20*(log10(signal)-log10(noise));
SpectralDist(find(SpectralDist<0))=0;
Dist=mean(SpectralDist);
if (Dist < NoiseMargin)
NoiseFlag=1;
NoiseCounter=NoiseCounter+1;
else
NoiseFlag=0;
NoiseCounter=0;
end
% Detect noise only periods and attenuate the signal
if (NoiseCounter > Hangover)
SpeechFlag=0;
else
SpeechFlag=1;
end