2014-11-01 14:54:22 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								function   [ J   grad ]   =   nnCostFunction ( nn_params ,   . . . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                                    input_layer _size ,   . . . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                                    hidden_layer _size ,   . . . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                                    num_labels ,   . . . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                                    X ,   y ,   lambda ) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% NNCOSTFUNCTION   Implements   the   neural   network   cost   function   for   a   two   layer 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% neural   network   which   performs   classification 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%     [ J   grad ]   =   NNCOSTFUNCTON ( nn_params ,   hidden_layer _size ,   num_labels ,   . . . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%     X ,   y ,   lambda )   computes   the   cost   and   gradient   of   the   neural   network .   The 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%     parameters   for   the   neural   network   are   "unrolled"   into   the   vector 
  
						 
					
						
							
								
									
										
										
										
											2014-11-01 20:30:58 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								%     nn_params   and   need   to   be   converted   back   into   the   weight   matrices . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% 
  
						 
					
						
							
								
									
										
										
										
											2014-11-01 14:54:22 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								%     The   returned   parameter   grad   should   be   a   "unrolled"   vector   of   the 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%     partial   derivatives   of   the   neural   network . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   Reshape   nn_params   back   into   the   parameters   Theta1   and   Theta2 ,   the   weight   matrices 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   for   our   2   layer   neural   network 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Theta1   =   reshape ( nn_params ( 1 : hidden_layer _size   *   ( input_layer _size   +   1 ) ) ,   . . . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                  hidden_layer _size ,   ( input_layer _size   +   1 ) ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Theta2   =   reshape ( nn_params ( ( 1   +   ( hidden_layer _size   *   ( input_layer _size   +   1 ) ) ) : end ) ,   . . . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                  num_labels ,   ( hidden_layer _size   +   1 ) ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   Setup   some   useful   variables 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								m   =   size ( X ,   1 ) ; 
  
						 
					
						
							
								
									
										
										
										
											2014-11-01 20:30:58 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   You   need   to   return   the   following   variables   correctly 
  
						 
					
						
							
								
									
										
										
										
											2014-11-01 14:54:22 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								J   =   0 ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Theta1_grad   =   zeros ( size ( Theta1 ) ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Theta2_grad   =   zeros ( size ( Theta2 ) ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   === === === === === === === =   YOUR   CODE   HERE   === === === === === === === = 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   Instructions :   You   should   complete   the   code   by   working   through   the 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%                 following   parts . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   Part   1 :   Feedforward   the   neural   network   and   return   the   cost   in   the 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%           variable   J .   After   implementing   Part   1 ,   you   can   verify   that   your 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%           cost   function   computation   is   correct   by   verifying   the   cost 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%           computed   in   ex4 . m 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% 
  
						 
					
						
							
								
									
										
										
										
											2014-11-01 20:30:58 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								X   =   [ ones ( m ,   1 ) ,   X ] ;    %   add   a   first   colum   of   ones   ( bias   term ) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								A_2   =   sigmoid ( X * Theta1 ' ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								A_2   =   [ ones ( m ,   1 ) ,   A_2 ] ;    %   ( bias   term ) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								A_3   =   sigmoid ( A_2 * Theta2 ' ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								h_0   =   A_3 ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% disp ( round ( h_0 ) ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   y   is   1 x5000   and   holds   the   labels   as   numbers ,   turn   it   into   5000 x10 , 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   each   row   holding   the   label   as   vectors ,   e . g .    [ 0   1   0   0   0   . . .   ]   for   2. 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								y   =   eye ( num_labels ) ( y , : ) ;    %   y   is   used   as   an   index ,   it   gets   a   row , 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                            %   e . g .   [ 0   0   0   1   . . .   0   0 ] 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								assert ( size ( y )   = =   [ m   num_labels ] ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								J   =   1 / m   *   sum ( sum ( - y . * log ( h_0 )   -   ( 1 - y ) . * log ( 1 - h_0 ) ) ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								assert ( size ( J )   = =   [ 1   1 ] ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-11-01 14:54:22 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								%   Part   2 :   Implement   the   backpropagation   algorithm   to   compute   the   gradients 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%           Theta1_grad   and   Theta2_grad .   You   should   return   the   partial   derivatives   of 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%           the   cost   function   with   respect   to   Theta1   and   Theta2   in   Theta1_grad   and 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%           Theta2_grad ,   respectively .   After   implementing   Part   2 ,   you   can   check 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%           that   your   implementation   is   correct   by   running   checkNNGradients 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%           Note :   The   vector   y   passed   into   the   function   is   a   vector   of   labels 
  
						 
					
						
							
								
									
										
										
										
											2014-11-01 20:30:58 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								%                 containing   values   from   1. . K .   You   need   to   map   this   vector   into   a 
  
						 
					
						
							
								
									
										
										
										
											2014-11-01 14:54:22 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								%                 binary   vector   of   1 ' s   and   0 ' s   to   be   used   with   the   neural   network 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%                 cost   function . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%           Hint :   We   recommend   implementing   backpropagation   using   a   for - loop 
  
						 
					
						
							
								
									
										
										
										
											2014-11-01 20:30:58 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								%                 over   the   training   examples   if   you   are   implementing   it   for   the 
  
						 
					
						
							
								
									
										
										
										
											2014-11-01 14:54:22 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								%                 first   time . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% 
  
						 
					
						
							
								
									
										
										
										
											2014-11-02 13:27:11 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								D_1   =   zeros ( size ( Theta1 ) ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								D_2   =   zeros ( size ( Theta2 ) ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								for   t   =   1 : m 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   %   feed   forward   this   training   sample 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   %   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   a_1   =   X ( t , : ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   %   ( X   already   has   1 - column ) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   assert ( size ( a_1 )   = =   [ 1 ,   input_layer _size + 1 ] ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   z_2   =   a_1 * Theta1 ' ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   a_2   =   sigmoid ( z_2 ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   a_2   =   [ ones ( size ( a_2 ,   1 ) ) ,   a_2 ] ;    %   ( bias   term ) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   assert ( size ( a_2 )   = =   [ 1 ,   hidden_layer _size + 1 ] ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   z_3   =   a_2 * Theta2 ' ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   a_3   =   sigmoid ( z_3 ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   h_0   =   a_3 ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   assert ( size ( h_0 )   = =   [ 1 ,   num_labels ] ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   %   back   propagate   /   error 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   %   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   assert ( size ( y )   = =   [ m   num_labels ] ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   d_3   =   a_3   -   y ( t , : ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   assert ( size ( d_3 )   = =   [ 1 ,   num_labels ] ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   d_2   =   d_3 * Theta2   . *   [ 1 ,   sigmoidGradient ( z_2 ) ] ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   d_2   =   d_2 ( 2 : end ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   assert ( size ( d_2 )   = =   [ 1 ,   hidden_layer _size ] ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   %   accumulate   over   all   m   training   examples 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   D_2   =   D_2   +   d_3 ' * a_2 ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   D_1   =   D_1   +   d_2 ' * a_1 ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								end 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   average 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								D_2   / =   m ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								D_1   / =   m ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Theta2_grad   =   D_2 ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Theta1_grad   =   D_1 ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-11-01 14:54:22 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								%   Part   3 :   Implement   regularization   with   the   cost   function   and   gradients . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%           Hint :   You   can   implement   this   around   the   code   for 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%                 backpropagation .   That   is ,   you   can   compute   the   gradients   for 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%                 the   regularization   separately   and   then   add   them   to   Theta1_grad 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%                 and   Theta2_grad   from   Part   2. 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2014-11-01 20:42:58 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								%   Note :   Theta1 / 2   are   matrixes   here ,   we   want   all   their   rows ,   but   skip   their 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   first   column   ( not   regularizing   the   bias   term ) . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								regularization_term   =   lambda / ( 2 * m )   *   . . . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                       ( sum ( sum ( Theta1 ( : , 2 : end ) . ^ 2 ) )   . . . 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								                        +   sum ( sum ( Theta2 ( : , 2 : end ) . ^ 2 ) ) ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								assert ( size ( regularization_term )   = =   [ 1   1 ] ) ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								J   + =   regularization_term ; 
  
						 
					
						
							
								
									
										
										
										
											2014-11-01 14:54:22 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   === === === === === === === === === === === === === === === === === === === === === === === === = 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%   Unroll   gradients 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								grad   =   [ Theta1_grad ( : )   ;   Theta2_grad ( : ) ] ; 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								end