function [Bhat, fOpt, hessian] = ESAGmle(Y, X, B0, parsToFit, optimAlg, optimOpts)
%ESAGmle : calculates the density of the ESAG distribution for iid or
%regression data.
%INPUT:
% Y : 3 x n matrix of responses, each column of which is a unit vector,
%     where n is the sample size.
%
% X : p x n matrix of the covariates, where p is the dimension of each
%     covariate vector.
%
% B0 : 5 x p matrix of parameter matrix used to initialise the optimisation.
%      Any parameters which are known (i.e. not to be optimised over) should
%      be set to their known value.
%
% parsToFit: 5 x p matrix each of whose elements is 1 or 0.  If 1 then the
%            parameter is fitted, if 0 then not.  [default: ones(5,p)]
%
% optimAlg: one of 'Nelder-Mead' and 'Newton-Raphson' [default: 'Nelder-Mead']
%
% optimOpts: options object for Matlab optimisers [a default is used if not
%            specified - see code for details]
%
%OUTPUT:
% Bhat : MLE of B from the optimisation
%
% fOpt : minimised objective function (equal to minus the maximised log likelihood)
%
%IID EXAMPLE:
%
% n = 25;
% Y = ESAGsim( [0 0 1 0 0], n); 
% [Bhat, fOpt] = ESAGmle(Y);
% fit IAG model:
% parsToFit = [1;1;1;0;0];
% [Bhat, fOpt] = ESAGmle(Y, ones(1,n), [rand(3,1);0;0], parsToFit);
%
%REGRESSION EXAMPLE:
%
% n = 25;
% X = ESAGsim([0 0 1 0.5 0], n);
% for i=1:n
%   mu = rand(3,3)*X(:,i);
%   gamma = rand(2,3)*X(:,i);
%   Y(:,i) = ESAGsim([ mu' gamma'], 1);
% end
% pars = ones(5,3); % Optimise over all parameters.
% [Bjat, fOpt] = ESAGmle(Y, X, rand(5,3), pars, 'Newton-Raphson');

if nargin < 2 || isempty(X), X = ones(1,size(Y,2)); end
[p,~] = size(X);
if nargin < 3 || isempty(B0), B0 = ones(5,p); end
if nargin < 4, parsToFit = ones(5,p); end
if nargin < 5, optimAlg = 'Nelder-Mead'; end
if nargin < 6, optimOpts = optimset('Display','final','MaxFunEvals',...
   100000,'MaxIter',100000,'TolX',1e-8,'TolFun',1e-8); end

a0 = B0(parsToFit==1);
b = B0(parsToFit==0);

switch optimAlg
   case 'Nelder-Mead'
      [ahat,fOpt] = fminsearch(@(a) f(a, b, Y, X, parsToFit),a0,optimOpts);
   case 'Newton-Raphson'
      [ahat,fOpt,ef,output,grad,hessian] = fminunc(@(a) f(a, b, Y, X, parsToFit),a0,optimOpts);
end

Bhat = NaN(5,p);
Bhat(parsToFit==1) = ahat;
Bhat(parsToFit==0) = b;

function out = f(a, b, Y, X, parsToFit)

[p,~] = size(X);
B = NaN(5,p);
B(parsToFit==1) = a;
B(parsToFit==0) = b;

out = - ESAGregloglik(Y,X,B);

function loglik = ESAGregloglik(Y,X,B)

[~,n] = size(Y);

loglik = 0;

for n_ = 1:n
   mu = B(1:3,:)*X(:,n_);
   gam = B(4:5,:)*X(:,n_);
   param = [mu', gam'];
   loglik = loglik + log(ESAGdensity(Y(:,n_),param));
end