-
Notifications
You must be signed in to change notification settings - Fork 1
/
HFCMCostW2x.m
151 lines (134 loc) · 6.34 KB
/
HFCMCostW2x.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
% function [cost,grad] = HFCMCostW2x(theta, visibleSize, hiddenSize, ...
% lambda, data, order, thetaW1, W3)
function [cost,grad] = HFCMCostW2x(theta, NumSAE, NumHFCM, SAEFeaturesTotal, hiddenSize, ...
lambda, Labels, order, W3, allWFCM)
% visibleSize: the number of input units (probably 64)
% hiddenSize: the number of hidden units (probably 25)
% lambda: weight decay parameter
% sparsityParam: The desired average activation for the hidden units (denoted in the lecture
% notes by the greek alphabet rho, which looks like a lower-case "p").
% beta: weight of sparsity penalty term
% data: Our 64x10000 matrix containing the training data. So, data(:,i) is the i-th training example.
% W3 K*1
% The input theta is a vector (because minFunc expects the parameters to be a vector).
% We first convert theta to the (W1, W2, b1, b2) matrix/vector format, so that this
% follows the notation convention of the lecture notes.
% W1 = reshape(thetaW1(1:hiddenSize*visibleSize), hiddenSize, visibleSize);
% b1 = thetaW1(2*hiddenSize*visibleSize+1:2*hiddenSize*visibleSize+hiddenSize);
NumallWFCM = size(allWFCM, 2);
UnitLength = size(theta, 1)/NumallWFCM;
for index = 1 : NumHFCM % defaults NumHFCM
allWFCMvector{1, index} = theta(1 + (index-1)* UnitLength: index* UnitLength);
end
cost = 0;
a1 = SAEFeaturesTotal;
Y = Labels';
[a2,a2t] = DataforHFCM(a1,order); % output of training data and target
atemp = [];
atemp = [atemp; a2];
for index = 1 : NumHFCM
thetaInner = allWFCMvector{1, index};
W2 = reshape(thetaInner(1:hiddenSize*NumSAE*hiddenSize*NumSAE), hiddenSize*NumSAE, hiddenSize*NumSAE);
b2 = thetaInner(hiddenSize*NumSAE*hiddenSize*NumSAE*order+1:end);
Wx = reshape(thetaInner(hiddenSize*NumSAE*hiddenSize*NumSAE+1:hiddenSize*NumSAE*hiddenSize*NumSAE*order), hiddenSize*NumSAE*(order-1), hiddenSize*NumSAE);
a3inner = sigmoid(W2'*a2 + repmat(b2,1,size(a2,2)) + Wx'*a2t); % output of HFCM
atemp = [atemp; a3inner];
end
a4 = W3'*atemp; % output of fianl layer
cost = sum(sum((a4 - Y).^2))/length(Y) + (lambda/2)*(sum(sum(W2.^2)) + sum(sum(Wx.^2)));
fprintf("%f\n", cost);
% Backward propagation
grad = [];
for index = 1 : NumHFCM
thetaInner = allWFCMvector{1, index};
W2 = reshape(thetaInner(1:hiddenSize*NumSAE*hiddenSize*NumSAE), hiddenSize*NumSAE, hiddenSize*NumSAE);
b2 = thetaInner(hiddenSize*NumSAE*hiddenSize*NumSAE*order+1:end);
Wx = reshape(thetaInner(hiddenSize*NumSAE*hiddenSize*NumSAE+1:hiddenSize*NumSAE*hiddenSize*NumSAE*order), hiddenSize*NumSAE*(order-1), hiddenSize*NumSAE);
% W2grad = zeros(size(W2));
% b2grad = zeros(size(b2));
% Wxgrad = zeros(size(Wx));
a3inner = sigmoid(W2'*a2 + repmat(b2,1,size(a2,2)) + Wx'*a2t); % output of HFCM
atemp = [atemp; a3inner];
W3Hindex = W3(hiddenSize*NumSAE*index + 1:hiddenSize*NumSAE*(index + 1), :);
del2 = -2*(Y - a4).*sum(a3inner.*(1-a3inner))/length(Y);
del1 = [];
for i = 1: size(del2, 1)
del1 = [del1, del2(i, :)];
end
d2 = [];
for i = 1: size(W3Hindex, 2)
d2 = [d2, repmat(W3Hindex(:, i), hiddenSize*NumSAE, size(a3inner,2))];
end
d1 = [];
for i = 1:hiddenSize*NumSAE
d1 = [d1; repmat(a2(i,:),hiddenSize*NumSAE, size(Y, 1))];
end
temp = repmat(del1,hiddenSize*NumSAE*hiddenSize*NumSAE,1).*(d1.*d2);
temp1 = reshape(sum(temp,2),hiddenSize*NumSAE,hiddenSize*NumSAE);
W2grad = temp1'+ (lambda*W2);
b2grad = sum(W3Hindex*del2,2);
Wxgrad = [];
for i = 1:order-1
at = a2t((i-1)*hiddenSize*NumSAE+1:i*hiddenSize*NumSAE,:);
d2 = [];
for j = 1: size(W3Hindex, 2)
d2 = [d2, repmat(W3Hindex(:, j), hiddenSize*NumSAE, size(a3inner,2))];
end
d1 = [];
for j = 1:hiddenSize*NumSAE
d1 = [d1; repmat(at(j,:),hiddenSize*NumSAE, size(Y, 1))];
end
temp = repmat(del1,hiddenSize*NumSAE*hiddenSize*NumSAE,1).*(d1.*d2);
temp1 = reshape(sum(temp,2),hiddenSize*NumSAE,hiddenSize*NumSAE);
Wxgrad = [Wxgrad; temp1'];
end
Wxgrad = Wxgrad + (lambda*Wx);
grad = [grad; W2grad(:) ; Wxgrad(:) ; b2grad(:)];
end
% Cost and gradient variables (your code needs to compute these values).
% Here, we initialize them to zeros.
%% ---------- YOUR CODE HERE --------------------------------------
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%recode the BP of the WHFCMs%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% for p = 1 : (hiddenSize*NumSAE)
% for q = 1 : hiddenSize*NumSAE
% innerV = 0;
% innerB = 0;
% for i = 1 : size(Y, 1)
% for j = 1 : size(Y, 2)
% for m = 1 : hiddenSize*NumSAE
% innerV = innerV + 2 * (a4(i, j)-Y(i, j)) * W3Hindex(m, i) * a3(m, j) * (1 - a3(m, j)) * a2(q, j);
% innerB = innerB + (a4(i, j)-Y(i, j)) * W3Hindex(m, i) * a3(m, j) * (1 - a3(m, j));
% end
% end
% end
% W2grad(p, q) = innerV;
% b2grad(p) = innerB;
% end
% end
% W2grad = W2grad + (lambda*W2);
% for p = 1 : hiddenSize*NumSAE
% for q = 1 : hiddenSize*NumSAE * (order - 1)
% innerV2 = 0;
% for i = 1 : size(Y, 1)
% for j = 1 : size(Y, 2)
% for m = 1 : hiddenSize*NumSAE
% innerV2 = innerV2 + 2 * (a4(i, j)-Y(i, j)) * W3Hindex(m, i) * a3(m, j) * (1 - a3(m, j)) * a2t(q, j);
% end
% end
% end
% Wxgrad(q, p) = innerV;
% end
% end
% Wxgrad = Wxgrad + (lambda*Wx);
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Wxgrad = Wxgrad;
end
%-------------------------------------------------------------------
% Here's an implementation of the sigmoid function, which you may find useful
% in your computation of the costs and the gradients. This inputs a (row or
% column) vector (say (z1, z2, z3)) and returns (f(z1), f(z2), f(z3)).
function sigm = sigmoid(x)
sigm = 1 ./ (1 + exp(-x));
end