// Copyright (C) 2004 Michael Creel
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
// bfgsmin
//
// bfgs minimization of function of form
//
// [value, return_2,..., return_m] = f(arg_1, arg_2,..., arg_n)
//
// By default, minimization is w.r.t. arg_1, but this can be overridden
#include
#include
#include
#include
#include
// define argument checks
static bool
any_bad_argument(const octave_value_list& args)
{
if (!args(0).is_string())
{
error("bfgsmin: first argument must be string holding objective function name");
return true;
}
if (!args(1).is_cell())
{
error("bfgsmin: second argument must cell array of function arguments");
return true;
}
if (args.length() == 3)
{
if (!args(2).is_cell() || (!(args(2).length() == 4)))
{
error("bfgsmin: 3rd argument, if supplied, must a 4-element cell array of control options");
return true;
}
}
if (args.length() == 4)
{
if (!args(3).is_cell() || (!(args(3).length() == 3)))
{
error("bfgsmin: 4th argument, if supplied, must a 3-element cell array of tolerances");
return true;
}
}
return false;
}
DEFUN_DLD(bfgsmin, args, ,
"bfgsmin: bfgs minimization of a function.\n\
\n\
[x, obj, convergence] = bfgsmin(\"f\", {args}, {control}, {tols})\n\
\n\
Arguments:\n\
* \"f\": function name (string)\n\
* {args}: a cell array that holds all arguments of the function,\n\
* {control}: an optional cell array of 4 elements\n\
* elem 1: maximum iterations (-1 = infinity (default))\n\
* elem 2: verbosity\n\
- 0 = no screen output (default)\n\
- 1 = summary every iteration\n\
- 2 = only last iteration summary\n\
* elem 3: convergence criterion\n\
- 1 = strict (function, gradient and param change) (default)\n\
- 2 = weak - only function convergence required\n\
* elem 4: arg with respect to which minimization is done\n\
(default = first)\n\
* {tols}: an optional cell array of 3 elements\n\
* elem 1: function change tolerance, default 1e-12\n\
* elem 2: parameter change tolerance, default 1e-6\n\
* elem 3: gradient tolerance, default 1e-4\n\
\n\
Returns:\n\
* x: the minimizer\n\
* obj: the value of f() at x\n\
* convergence: 1 if normal conv, other values if not\n\
\n\
Example:\n\
function a = f(x,y)\n\
a = x'*x + y'*y;\n\
endfunction\n\
\n\
In this example, x is optimized since it's the first\n\
element of the cell array, y is a fixed constant = 1\n\
\n\
bfgsmin(\"f\", {ones(2,1), 1}, [10,2,1,1])\n\
\n\
bfgsmin final results: Iteration 1\n\
Stepsize 0.0000000\n\
Objective function value 1.0000000000\n\
Function conv 1 Param conv 1 Gradient conv 1\n\
params gradient change\n\
0.0000 0.0000 0.0000\n\
0.0000 0.0000 0.0000\n\
\n\
ans =\n\
\n\
6.1497e-12\n\
6.1497e-12\n\
")
{
int nargin = args.length();
if ((nargin < 2) || (nargin > 4))
{
error("bfgsmin: you must supply 2, 3 or 4 arguments");
return octave_value_list();
}
// check the arguments
if (any_bad_argument(args)) return octave_value_list();
std::string f (args(0).string_value());
Cell f_args (args(1).cell_value());
octave_value_list step_args(4,1); // for stepsize: {f, f_args, direction, minarg}
octave_value_list g_args(3,1); // for numgradient {f, f_args, minarg}
octave_value_list c_args(2,1); // for celleval {f, f_args}
octave_value_list f_return; // holder for feval returns
int max_iters, verbosity, criterion, minarg;
int convergence, iter;
int gradient_failed, i, j, k, conv_fun, conv_param, conv_grad;
int have_gradient = 0;
double func_tol, param_tol, gradient_tol, stepsize, obj_value;
double obj_in, last_obj_value, denominator, test;
Matrix H, tempmatrix, H1, H2;
ColumnVector thetain, d, g, g_new, p, q;
// use provided controls, if applicable
if (args.length() >= 3)
{
Cell control (args(2).cell_value());
if (xisinf (control(0).double_value()))
max_iters = -1;
else
max_iters = control(0).int_value();
if (max_iters == -1) max_iters = INT_MAX;
verbosity = control(1).int_value();
criterion = control(2).int_value();
minarg = control(3).int_value();
}
else
{
// Default values for controls
max_iters = INT_MAX; // no limit on iterations
verbosity = 0; // by default don't report results to screen
criterion = 1; // strong convergence required
minarg = 1; // by default, first arg is one over which we minimize
}
// use provided tolerances, if applicable
if (args.length() == 4)
{
Cell tols (args(3).cell_value());
func_tol = tols(0).double_value();
param_tol = tols(1).double_value();
gradient_tol = tols(2).double_value();
}
else
{
// Default values for tolerances
func_tol = 1e-12;
param_tol = 1e-6;
gradient_tol = 1e-4;
}
step_args(0) = f;
step_args(1) = f_args;
step_args(3) = minarg;
g_args(0) = f;
g_args(1) = f_args;
g_args(2) = minarg;
c_args(0) = f;
c_args(1) = f_args;
ColumnVector theta = f_args(minarg - 1).column_vector_value();
k = theta.rows();
// initialize things
convergence = -1; // if this doesn't change, it means that maxiters were exceeded
thetain = theta;
H = identity_matrix(k,k);
// Initial obj_value
f_return = feval("celleval", c_args);
obj_in = f_return(0).double_value();
last_obj_value = obj_in;
// maybe we have analytic gradient?
// if the function returns more than one item, and the second
// is a kx1 vector, it is assumed to be the gradient
if (f_return.length() > 1)
{
if (f_return(1).is_real_matrix())
{
if ((f_return(1).rows() == k) & (f_return(1).columns() == 1))
{
g = f_return(1).column_vector_value();
have_gradient = 1; // future reference
}
}
else have_gradient = 0;
}
if (!have_gradient) // use numeric gradient
{
f_return = feval("numgradient", g_args);
tempmatrix = f_return(0).matrix_value();
g = tempmatrix.row(0).transpose();
}
// check that gradient is ok
gradient_failed = 0; // test = 1 means gradient failed
for (i=0; i 1.0)
{
conv_fun = (fabs(obj_value - last_obj_value)/fabs(last_obj_value)) < func_tol;
}
else
{
conv_fun = fabs(obj_value - last_obj_value) < func_tol;
}
// parameter change convergence
test = sqrt(theta.transpose() * theta);
if (test > 1)
{
conv_param = sqrt(p.transpose() * p) / test < param_tol ;
}
else
{
conv_param = sqrt(p.transpose() * p) < param_tol;
}
// gradient convergence
conv_grad = sqrt(g.transpose() * g) < gradient_tol;
// Want intermediate results?
if (verbosity == 1)
{
printf("\nBFGSMIN intermediate results: Iteration %d\n",iter);
printf("Stepsize %8.7f\n", stepsize);
if (have_gradient) printf("Using analytic gradient\n");
else printf("Using numeric gradient\n");
printf("Objective function value %16.10f\n", last_obj_value);
printf("Function conv %d Param conv %d Gradient conv %d\n", conv_fun, conv_param, conv_grad);
printf(" params gradient change\n");
for (j = 0; j