|
|
|
@ -124,12 +124,16 @@ Theta1_grad = D_1;
|
|
|
|
|
|
|
|
|
|
% Note: Theta1/2 are matrixes here, we want all their rows, but skip their
|
|
|
|
|
% first column (not regularizing the bias term).
|
|
|
|
|
regularization_term = lambda/(2*m) * ...
|
|
|
|
|
J_regularization_term = lambda/(2*m) * ...
|
|
|
|
|
(sum(sum(Theta1(:,2:end).^2)) ...
|
|
|
|
|
+ sum(sum(Theta2(:,2:end).^2)));
|
|
|
|
|
assert(size(regularization_term) == [1 1]);
|
|
|
|
|
assert(size(J_regularization_term) == [1 1]);
|
|
|
|
|
J += J_regularization_term;
|
|
|
|
|
|
|
|
|
|
J += regularization_term;
|
|
|
|
|
Theta2_grad_regularization_term = lambda/m * [zeros(size(Theta2, 1), 1) Theta2(:,2:end)];
|
|
|
|
|
Theta1_grad_regularization_term = lambda/m * [zeros(size(Theta1, 1), 1) Theta1(:,2:end)];
|
|
|
|
|
Theta2_grad += Theta2_grad_regularization_term;
|
|
|
|
|
Theta1_grad += Theta1_grad_regularization_term;
|
|
|
|
|
|
|
|
|
|
% -------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|