 function p = grad_softmax(x, gamma, dir)
%function p = grad_softmax(x, gamma, dir)
%
% This procedure computes the proximity operator of the function:
%
%                   f(x) = gamma * log(sum(exp(x)))
%
% When the input 'x' is an array, the computation can vary as follows:
%  - dir = 0 --> 'x' is processed as a single vector [DEFAULT]
%  - dir > 0 --> 'x' is processed block-wise along the specified direction
%
%  INPUTS
% ========
%  x     - ND array
%  gamma - positive, scalar or ND array compatible with the blocks of 'x'
%  dir   - integer, direction of block-wise processing


% default inputs
if nargin < 3 || (~isempty(dir) && dir == 0)
    dir = [];
end

% check input
sz = size(x); sz(dir) = 1;
if any( gamma(:) <= 0 ) || ~isscalar(gamma) && (isempty(dir) || any(size(gamma)~=sz))
    error('''gamma'' must be positive and either scalar or compatible with the blocks of ''x''')
end
%------%

% linearize
sz = size(x);
if isempty(dir)
    x   = x(:);
    dir = 1;
end

% compute the gradient
t = exp(x);
p = bsxfun(@rdivide, t, sum(t,dir));
p = bsxfun(@times, p, gamma);

% revert back
p = reshape(p, sz);