%[Err, Grad, Out] = backprop( X, Y, W, Con, Trn ) % %Compute error and gradient of feedforward network via backpropagation % % X: inputs; each column of the matrix X is an input vector % Y: outputs; each column of Y is a desired output vector % correspoding to the input vector given by the same column of X % W: synaptic weights; W(j,i) is the weight from unit i to unit j; % non-existing weights are ignored % Con: network topology; Con(j,i) = 1 iff weight from i to j exists % Trn: type of transfer function for each non-input unit: % 1- linear; 2- soft-threshold; 3- sigmoid; 4- tanh % if Trn is scalar, the same type is used for all units % % The total number of units is (W,1). % The first size(X,1) "units" are inputs with identity transfer functions. % When Trn is a vector it should not include the input units; % thus the length of Trn should be either 1 or size(W,1)-size(X,1). % % Err: squared error summed over all data points % Grad: gradient of Err with respect to W; Grad is a matrix with same % size as W and Con; non-existing elements are set to 0 % Out: the outputs of the network for the current inputs and weights % %NOTE: Simple gradient descent can be implemented as W = W - rate*Grad. % When using a more efficient second-order method one should % remove the non-existing weights and turn the weight matrix W % into a vector: w_vec = W(Con==1). % Copyright (C) Emanuel Todorov, 2004-2006 function [Err, Grad, Out] = backprop( X, Y, W, Con, Trn ) % compute sizes nInput = size(X,1); nOutput = size(Y,1); nTotal = size(W,1); nData = size(X,2); idxOutput = (nTotal-nOutput+1 : nTotal); if length(Trn)==1, Trn = Trn*ones(nTotal-nInput,1); end % remove nonexistent weights W = W .* Con; % allocate variables Out = zeros(nTotal,nData); % outputs d = zeros(nTotal,nData); % deltas z = zeros(nTotal,nData); % internal activations h1= zeros(nTotal,nData); % h'(z) % initialize forward pass Out(1:nInput,:) = X; % run forward pass for j = nInput+1:nTotal z(j,:) = W(j,:)*Out; [Out(j,:), h1(j,:)] = trnsfr( Trn(j-nInput), z(j,:) ); end % compute residuals (desired minus actual outputs) d_a = Y - Out(idxOutput,:); % initialize backward pass d(idxOutput,:) = - d_a .* h1(idxOutput,:); % run backward pass for j = nTotal-nOutput:-1:nInput+1 d(j,:) = h1(j,:) .* (W(:,j)'*d); end % compute error and gradient Err = sum(sum(d_a.^2)) / 2; Grad = (d*Out') .* Con; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % compute specified transfer function and its derivative function [o, h1] = trnsfr( Trn, z ) switch Trn, case 1, % linear o = z; h1 = ones(size(z)); case 2, % soft-threshold o = log(exp(z)+1); h1 = exp(z)./(1+exp(z)); case 3, % sigmoid o = 1./(1+exp(-z)); h1 = o - o.^2; case 4, % tanh o = (exp(z)-exp(-z)) ./ (exp(z)+exp(-z)); h1 = 1 - o.^2; otherwise, error('Unknown transfer function type'); end