2014-11-01 14:54:22 +01:00
function [ J grad ] = nnCostFunction ( nn_params , . . .
input_layer _size , . . .
hidden_layer _size , . . .
num_labels , . . .
X , y , lambda )
% NNCOSTFUNCTION Implements the neural network cost function for a two layer
% neural network which performs classification
% [ J grad ] = NNCOSTFUNCTON ( nn_params , hidden_layer _size , num_labels , . . .
% X , y , lambda ) computes the cost and gradient of the neural network . The
% parameters for the neural network are "unrolled" into the vector
2014-11-01 20:30:58 +01:00
% nn_params and need to be converted back into the weight matrices .
%
2014-11-01 14:54:22 +01:00
% The returned parameter grad should be a "unrolled" vector of the
% partial derivatives of the neural network .
%
% Reshape nn_params back into the parameters Theta1 and Theta2 , the weight matrices
% for our 2 layer neural network
Theta1 = reshape ( nn_params ( 1 : hidden_layer _size * ( input_layer _size + 1 ) ) , . . .
hidden_layer _size , ( input_layer _size + 1 ) ) ;
Theta2 = reshape ( nn_params ( ( 1 + ( hidden_layer _size * ( input_layer _size + 1 ) ) ) : end ) , . . .
num_labels , ( hidden_layer _size + 1 ) ) ;
% Setup some useful variables
m = size ( X , 1 ) ;
2014-11-01 20:30:58 +01:00
% You need to return the following variables correctly
2014-11-01 14:54:22 +01:00
J = 0 ;
Theta1_grad = zeros ( size ( Theta1 ) ) ;
Theta2_grad = zeros ( size ( Theta2 ) ) ;
% === === === === === === === = YOUR CODE HERE === === === === === === === =
% Instructions : You should complete the code by working through the
% following parts .
%
% Part 1 : Feedforward the neural network and return the cost in the
% variable J . After implementing Part 1 , you can verify that your
% cost function computation is correct by verifying the cost
% computed in ex4 . m
%
2014-11-01 20:30:58 +01:00
X = [ ones ( m , 1 ) , X ] ; % add a first colum of ones ( bias term )
A_2 = sigmoid ( X * Theta1 ' ) ;
A_2 = [ ones ( m , 1 ) , A_2 ] ; % ( bias term )
A_3 = sigmoid ( A_2 * Theta2 ' ) ;
h_0 = A_3 ;
% disp ( round ( h_0 ) ) ;
% y is 1 x5000 and holds the labels as numbers , turn it into 5000 x10 ,
% each row holding the label as vectors , e . g . [ 0 1 0 0 0 . . . ] for 2.
y = eye ( num_labels ) ( y , : ) ; % y is used as an index , it gets a row ,
% e . g . [ 0 0 0 1 . . . 0 0 ]
assert ( size ( y ) = = [ m num_labels ] ) ;
J = 1 / m * sum ( sum ( - y . * log ( h_0 ) - ( 1 - y ) . * log ( 1 - h_0 ) ) ) ;
assert ( size ( J ) = = [ 1 1 ] ) ;
2014-11-01 14:54:22 +01:00
% Part 2 : Implement the backpropagation algorithm to compute the gradients
% Theta1_grad and Theta2_grad . You should return the partial derivatives of
% the cost function with respect to Theta1 and Theta2 in Theta1_grad and
% Theta2_grad , respectively . After implementing Part 2 , you can check
% that your implementation is correct by running checkNNGradients
%
% Note : The vector y passed into the function is a vector of labels
2014-11-01 20:30:58 +01:00
% containing values from 1. . K . You need to map this vector into a
2014-11-01 14:54:22 +01:00
% binary vector of 1 ' s and 0 ' s to be used with the neural network
% cost function .
%
% Hint : We recommend implementing backpropagation using a for - loop
2014-11-01 20:30:58 +01:00
% over the training examples if you are implementing it for the
2014-11-01 14:54:22 +01:00
% first time .
%
2014-11-02 13:27:11 +01:00
D_1 = zeros ( size ( Theta1 ) ) ;
D_2 = zeros ( size ( Theta2 ) ) ;
for t = 1 : m
% feed forward this training sample
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
a_1 = X ( t , : ) ;
% ( X already has 1 - column )
assert ( size ( a_1 ) = = [ 1 , input_layer _size + 1 ] ) ;
z_2 = a_1 * Theta1 ' ;
a_2 = sigmoid ( z_2 ) ;
a_2 = [ ones ( size ( a_2 , 1 ) ) , a_2 ] ; % ( bias term )
assert ( size ( a_2 ) = = [ 1 , hidden_layer _size + 1 ] ) ;
z_3 = a_2 * Theta2 ' ;
a_3 = sigmoid ( z_3 ) ;
h_0 = a_3 ;
assert ( size ( h_0 ) = = [ 1 , num_labels ] ) ;
% back propagate / error
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
assert ( size ( y ) = = [ m num_labels ] ) ;
d_3 = a_3 - y ( t , : ) ;
assert ( size ( d_3 ) = = [ 1 , num_labels ] ) ;
d_2 = d_3 * Theta2 . * [ 1 , sigmoidGradient ( z_2 ) ] ;
d_2 = d_2 ( 2 : end ) ;
assert ( size ( d_2 ) = = [ 1 , hidden_layer _size ] ) ;
% accumulate over all m training examples
D_2 = D_2 + d_3 ' * a_2 ;
D_1 = D_1 + d_2 ' * a_1 ;
end
% average
D_2 / = m ;
D_1 / = m ;
Theta2_grad = D_2 ;
Theta1_grad = D_1 ;
2014-11-01 14:54:22 +01:00
% Part 3 : Implement regularization with the cost function and gradients .
%
% Hint : You can implement this around the code for
% backpropagation . That is , you can compute the gradients for
% the regularization separately and then add them to Theta1_grad
% and Theta2_grad from Part 2.
%
2014-11-01 20:42:58 +01:00
% Note : Theta1 / 2 are matrixes here , we want all their rows , but skip their
% first column ( not regularizing the bias term ) .
regularization_term = lambda / ( 2 * m ) * . . .
( sum ( sum ( Theta1 ( : , 2 : end ) . ^ 2 ) ) . . .
+ sum ( sum ( Theta2 ( : , 2 : end ) . ^ 2 ) ) ) ;
assert ( size ( regularization_term ) = = [ 1 1 ] ) ;
J + = regularization_term ;
2014-11-01 14:54:22 +01:00
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% === === === === === === === === === === === === === === === === === === === === === === === === =
% Unroll gradients
grad = [ Theta1_grad ( : ) ; Theta2_grad ( : ) ] ;
end