// Copyright (C) 2004 Michael Creel // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // bfgsmin // // bfgs minimization of function of form // // [value, return_2,..., return_m] = f(arg_1, arg_2,..., arg_n) // // By default, minimization is w.r.t. arg_1, but this can be overridden #include #include #include #include #include // define argument checks static bool any_bad_argument(const octave_value_list& args) { if (!args(0).is_string()) { error("bfgsmin: first argument must be string holding objective function name"); return true; } if (!args(1).is_cell()) { error("bfgsmin: second argument must cell array of function arguments"); return true; } if (args.length() == 3) { if (!args(2).is_cell() || (!(args(2).length() == 4))) { error("bfgsmin: 3rd argument, if supplied, must a 4-element cell array of control options"); return true; } } if (args.length() == 4) { if (!args(3).is_cell() || (!(args(3).length() == 3))) { error("bfgsmin: 4th argument, if supplied, must a 3-element cell array of tolerances"); return true; } } return false; } DEFUN_DLD(bfgsmin, args, , "bfgsmin: bfgs minimization of a function.\n\ \n\ [x, obj, convergence] = bfgsmin(\"f\", {args}, {control}, {tols})\n\ \n\ Arguments:\n\ * \"f\": function name (string)\n\ * {args}: a cell array that holds all arguments of the function,\n\ * {control}: an optional cell array of 4 elements\n\ * elem 1: maximum iterations (-1 = infinity (default))\n\ * elem 2: verbosity\n\ - 0 = no screen output (default)\n\ - 1 = summary every iteration\n\ - 2 = only last iteration summary\n\ * elem 3: convergence criterion\n\ - 1 = strict (function, gradient and param change) (default)\n\ - 2 = weak - only function convergence required\n\ * elem 4: arg with respect to which minimization is done\n\ (default = first)\n\ * {tols}: an optional cell array of 3 elements\n\ * elem 1: function change tolerance, default 1e-12\n\ * elem 2: parameter change tolerance, default 1e-6\n\ * elem 3: gradient tolerance, default 1e-4\n\ \n\ Returns:\n\ * x: the minimizer\n\ * obj: the value of f() at x\n\ * convergence: 1 if normal conv, other values if not\n\ \n\ Example:\n\ function a = f(x,y)\n\ a = x'*x + y'*y;\n\ endfunction\n\ \n\ In this example, x is optimized since it's the first\n\ element of the cell array, y is a fixed constant = 1\n\ \n\ bfgsmin(\"f\", {ones(2,1), 1}, [10,2,1,1])\n\ \n\ bfgsmin final results: Iteration 1\n\ Stepsize 0.0000000\n\ Objective function value 1.0000000000\n\ Function conv 1 Param conv 1 Gradient conv 1\n\ params gradient change\n\ 0.0000 0.0000 0.0000\n\ 0.0000 0.0000 0.0000\n\ \n\ ans =\n\ \n\ 6.1497e-12\n\ 6.1497e-12\n\ ") { int nargin = args.length(); if ((nargin < 2) || (nargin > 4)) { error("bfgsmin: you must supply 2, 3 or 4 arguments"); return octave_value_list(); } // check the arguments if (any_bad_argument(args)) return octave_value_list(); std::string f (args(0).string_value()); Cell f_args (args(1).cell_value()); octave_value_list step_args(4,1); // for stepsize: {f, f_args, direction, minarg} octave_value_list g_args(3,1); // for numgradient {f, f_args, minarg} octave_value_list c_args(2,1); // for celleval {f, f_args} octave_value_list f_return; // holder for feval returns int max_iters, verbosity, criterion, minarg; int convergence, iter; int gradient_failed, i, j, k, conv_fun, conv_param, conv_grad; int have_gradient = 0; double func_tol, param_tol, gradient_tol, stepsize, obj_value; double obj_in, last_obj_value, denominator, test; Matrix H, tempmatrix, H1, H2; ColumnVector thetain, d, g, g_new, p, q; // use provided controls, if applicable if (args.length() >= 3) { Cell control (args(2).cell_value()); if (xisinf (control(0).double_value())) max_iters = -1; else max_iters = control(0).int_value(); if (max_iters == -1) max_iters = INT_MAX; verbosity = control(1).int_value(); criterion = control(2).int_value(); minarg = control(3).int_value(); } else { // Default values for controls max_iters = INT_MAX; // no limit on iterations verbosity = 0; // by default don't report results to screen criterion = 1; // strong convergence required minarg = 1; // by default, first arg is one over which we minimize } // use provided tolerances, if applicable if (args.length() == 4) { Cell tols (args(3).cell_value()); func_tol = tols(0).double_value(); param_tol = tols(1).double_value(); gradient_tol = tols(2).double_value(); } else { // Default values for tolerances func_tol = 1e-12; param_tol = 1e-6; gradient_tol = 1e-4; } step_args(0) = f; step_args(1) = f_args; step_args(3) = minarg; g_args(0) = f; g_args(1) = f_args; g_args(2) = minarg; c_args(0) = f; c_args(1) = f_args; ColumnVector theta = f_args(minarg - 1).column_vector_value(); k = theta.rows(); // initialize things convergence = -1; // if this doesn't change, it means that maxiters were exceeded thetain = theta; H = identity_matrix(k,k); // Initial obj_value f_return = feval("celleval", c_args); obj_in = f_return(0).double_value(); last_obj_value = obj_in; // maybe we have analytic gradient? // if the function returns more than one item, and the second // is a kx1 vector, it is assumed to be the gradient if (f_return.length() > 1) { if (f_return(1).is_real_matrix()) { if ((f_return(1).rows() == k) & (f_return(1).columns() == 1)) { g = f_return(1).column_vector_value(); have_gradient = 1; // future reference } } else have_gradient = 0; } if (!have_gradient) // use numeric gradient { f_return = feval("numgradient", g_args); tempmatrix = f_return(0).matrix_value(); g = tempmatrix.row(0).transpose(); } // check that gradient is ok gradient_failed = 0; // test = 1 means gradient failed for (i=0; i 1.0) { conv_fun = (fabs(obj_value - last_obj_value)/fabs(last_obj_value)) < func_tol; } else { conv_fun = fabs(obj_value - last_obj_value) < func_tol; } // parameter change convergence test = sqrt(theta.transpose() * theta); if (test > 1) { conv_param = sqrt(p.transpose() * p) / test < param_tol ; } else { conv_param = sqrt(p.transpose() * p) < param_tol; } // gradient convergence conv_grad = sqrt(g.transpose() * g) < gradient_tol; // Want intermediate results? if (verbosity == 1) { printf("\nBFGSMIN intermediate results: Iteration %d\n",iter); printf("Stepsize %8.7f\n", stepsize); if (have_gradient) printf("Using analytic gradient\n"); else printf("Using numeric gradient\n"); printf("Objective function value %16.10f\n", last_obj_value); printf("Function conv %d Param conv %d Gradient conv %d\n", conv_fun, conv_param, conv_grad); printf(" params gradient change\n"); for (j = 0; j