updates to roots finding and optimization

arotem3 · Jan 10, 2019 · 476a0f0 · 476a0f0
1 parent ea5f76c
commit 476a0f0
Show file tree

Hide file tree

Showing 23 changed files with 497 additions and 258 deletions.
diff --git a/ODEs/ODE.hpp b/ODEs/ODE.hpp
@@ -2,6 +2,20 @@
 
 #include "../numerics.hpp"
 
+/* Copyright 2019 Amit Rotem
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
 namespace ODE {
     // --- ode solver constants --- //
         const double rk45_kmin = 1.0e-4;
@@ -194,8 +208,8 @@ namespace ODE {
         arma::vec am2(std::function<double(double,double)>, arma::vec&, double, ivp_options&);
         arma::vec am2(std::function<double(double,double)>, arma::vec&, double);
 
-        numerics::CubicInterp IVP_solve(const odefun&, arma::vec&, arma::mat&, ivp_options&, ode_solver solver = RK45);
-        numerics::CubicInterp IVP_solve(const odefun&, arma::vec&, arma::mat&, ode_solver solver = RK45);
+        dsolnc IVP_solve(const odefun&, arma::vec&, arma::mat&, ivp_options&, ode_solver solver = RK45);
+        dsolnc IVP_solve(const odefun&, arma::vec&, arma::mat&, ode_solver solver = RK45);
     // --- BVPs ------------------- //
         class linear_BVP {
             //--- u''(x) = a(x) + b(x)*u(x) + c(x)*u'(x) ---//

diff --git a/ODEs/nonlin_bvp.cpp b/ODEs/nonlin_bvp.cpp
@@ -15,7 +15,7 @@ ODE::dsolnp ODE::bvp(const odefun& f, const bcfun& bc, const soln_init& guess, b
     arma::vec x;
     cheb(D, x, bc.xL, bc.xR, m-1);
 
-    auto ff = [&](const arma::vec& u) -> arma::vec {
+    numerics::vector_func ff = [&](const arma::vec& u) -> arma::vec {
         arma::mat U = arma::reshape(u,m,n);
         arma::rowvec BC = bc.func(u.row(0), u.row(m-1));
         arma::mat A = D;
@@ -30,7 +30,7 @@ ODE::dsolnp ODE::bvp(const odefun& f, const bcfun& bc, const soln_init& guess, b
         return z;
     }; // wrapper for root finding function
 
-    std::function<arma::mat(const arma::vec&)> J = [&](const arma::vec& u) -> arma::mat {
+    numerics::vec_mat_func J = [&](const arma::vec& u) -> arma::mat {
         arma::mat U = arma::reshape(u,m,n);
         arma::mat DF = arma::zeros(m*n,m*n);
         if (opts.jacobian_func != nullptr) { // Jacobian provided
@@ -82,8 +82,8 @@ ODE::dsolnp ODE::bvp(const odefun& f, const bcfun& bc, const soln_init& guess, b
         opts.lsqropts.jacobian_func = &J;
         numerics::lmlsqr(ff, U0, opts.lsqropts);
     } else { // use Broyden solver
-        arma::mat J0 = J(U0);
-        opts.nlnopts.init_jacobian = &J0;
+        arma::mat JJ = J(U0);
+        opts.nlnopts.init_jacobian = &JJ;
         numerics::broyd(ff, U0, opts.nlnopts);
     }
 

diff --git a/bfgs.cpp b/bfgs.cpp
@@ -1,23 +1,5 @@
 #include "numerics.hpp"
 
-//--- rough approximator step size for quasi-newton methods based on strong wolfe conditions ---//
-double numerics::wolfe_step(const vec_dfunc& obj_func, const vector_func& f, const arma::vec& x, const arma::vec& p, double c1, double c2, double b) {
-    double alpha = 1;
-    int k = 0;
-    while (true) {
-        if (k > 100) break;
-        double pfa = arma::dot(p, f(x + alpha*x));
-        bool cond1 = obj_func(x + alpha*p) <= obj_func(x) + c1*alpha*pfa;
-        bool cond2 = std::abs(arma::dot(p, f(x + alpha*p))) <= c2*std::abs(arma::dot(p, f(x)));
-        if (cond1 && cond2) break;
-        else if (obj_func(x + b*alpha*p) < obj_func(x + alpha*p)) alpha *= b;
-        else alpha /= b;
-        k++;
-    }
-    return alpha;
-}
-
-
 //--- Broyden–Fletcher–Goldfarb–Shanno (BFGS) algorithm for local optimization ---//
 //----- f  : vec = f(x) system of equations --------------------------------------//
 //----- x : initial guess close to a local minimum, root will be stored here -----//

diff --git a/broyd.cpp b/broyd.cpp
@@ -13,6 +13,9 @@ void numerics::broyd(const vector_func& f, arma::vec& x, nonlin_opts& opts) {
     else if (opts.init_jacobian != nullptr) {
         Jinv = *opts.init_jacobian;
         Jinv = arma::pinv(Jinv);
+    } else if (opts.jacobian_func != nullptr) {
+        Jinv = opts.jacobian_func->operator()(x);
+        Jinv = arma::pinv(Jinv);
     } else { // FD approximation always used to initialize.
         approx_jacobian(f,Jinv,x,1e-4);
         Jinv = arma::pinv(Jinv);

diff --git a/cyc_queue.cpp b/cyc_queue.cpp
@@ -11,7 +11,7 @@ numerics::cyc_queue::cyc_queue(size_t num_rows, size_t max_size) {
 }
 
 //--- add an element to the queue, removing first in element if the queue is full ---//
-void numerics::cyc_queue::push(arma::vec& x) {
+void numerics::cyc_queue::push(const arma::vec& x) {
     if (size < max_elem) { // queue not full
         A.col(size) = x;
         size++;
@@ -26,13 +26,16 @@ arma::vec numerics::cyc_queue::operator()(size_t i) {
     if (i >= size) {
         std::cerr << "cyc_queue::element access out of bounds." << std::endl;
         return {0};
-    }
-    else {
+    } else {
         int ind = mod(i + head, size);
         return A.col(ind);
     }
 }
 
+arma::vec numerics::cyc_queue::end() {
+    return A.col(size-1);
+}
+
 //--- return current length of cyclic queue (relevent especially when not full) ---//
 int numerics::cyc_queue::length() {
     return size;
@@ -41,4 +44,16 @@ int numerics::cyc_queue::length() {
 //--- return num_rows of the elements (each a vector) of the queue ---//
 int numerics::cyc_queue::col_size() {
     return A.n_rows;
+}
+
+void numerics::cyc_queue::clear() {
+    A.fill(0);
+}
+
+arma::mat numerics::cyc_queue::data() {
+    arma::mat D  = arma::zeros(A.n_rows, size);
+    for (unsigned int i(0); i < size; ++i) {
+        D.col(i) = operator()(i);
+    }
+    return D;
 }
diff --git a/examples/bvp_ex.cpp b/examples/bvp_ex.cpp
@@ -1,8 +1,6 @@
 #include "../ODEs/ODE.hpp"
 #include "gnuplot_i.hpp"
 
-// g++ -g -o bvp_ex polyInterp.cpp newton.cpp broyd.cpp lmlsqr.cpp finite_dif.cpp ODEs/cheb.cpp ODEs/linear_bvp.cpp ODEs/nonlin_bvp.cpp examples/bvp_ex.cpp examples/wait.cpp -larmadillo -lsuperlu
-
 double afunc(double x) {
     return std::sin(x);
 }
@@ -28,9 +26,9 @@ int main() {
     arma::vec x;
     arma::mat U;
 
-    // problem.solve(x,U,40); // 4th order FD approximation
+    problem.solve(x,U,40); // 4th order FD approximation
     // problem.solve(x,U,40, SECOND_ORDER); // 2nd order FD approximation
-    problem.solve(x,U,20, CHEBYSHEV); // spectral order approximation
+    // problem.solve(x,U,20, CHEBYSHEV); // spectral order approximation
     // dsolnp y = problem.solve(20); x = arma::linspace(0,2*M_PI); U = y.soln(x); // spectral order approximation outputing cheb polynomial
 
     arma::mat u = 0.25 * (arma::exp(-2*M_PI - x) % (-1 + 4*std::exp(2*M_PI)+arma::exp(2*x)) - 2*arma::sin(x));
@@ -73,7 +71,7 @@ int main() {
     bc.xR = 2*M_PI;
     bc.func = [](const arma::rowvec& uL, const arma::rowvec& uR) -> arma::rowvec {
         arma::rowvec v(2,arma::fill::zeros);
-        v(0) = uL(0) - 1;
+        v(0) = uL(0) - 1; // solution fixed as 1 at end points
         v(1) = uR(0) - 1;
         return v;
     };
@@ -87,16 +85,15 @@ int main() {
 
     bvp_opts opts;
     opts.num_points = 100;
-    opts.jacobian_func = &J; // providing a jacobian function improves runtime significantly
-    // opts.solver = numerics::LMLSQR;
+    // opts.jacobian_func = &J; // providing a jacobian function improves runtime significantly
 
     dsolnp soln = bvp(f, bc, guess, opts);
     x1 = arma::conv_to<stdv>::from(soln.independent_var_values);
     U1 = arma::conv_to<stdv>::from(soln.solution_values.col(0));
     stdv U2 = arma::conv_to<stdv>::from(soln.solution_values.col(1));
 
     graph.reset_plot();
-    graph.set_style("points");
+    graph.set_style("lines");
     graph.plot_xy(x1,U1,"u(x) -- guess of cos(x)");
     graph.plot_xy(x1,U2,"v(x) -- guess of sin(x)");
 

diff --git a/examples/make_bvp_ex b/examples/make_bvp_ex
@@ -0,0 +1,26 @@
+CXX = g++
+CXXFLAGS = -g -Wall
+LIBS = -larmadillo -lsuperlu
+DEPS = ODEs/ODE.hpp
+TARGET = bvp_ex
+
+all: bvp_ex
+
+bvp_ex: bvp_ex.o polyInterp.o wolfe_step.o newton.o broyd.o lmlsqr.o finite_dif.o cheb.o linear_bvp.o nonlin_bvp.o wait.o
+	$(CXX) $(CXXFLAGS) -o $(TARGET) *.o $(LIBS)
+
+%.o: %.cpp $(DEPS)
+cheb.o: ODEs/cheb.cpp $(DEPS)
+	$(CXX) -g -Wall -c ODEs/cheb.cpp
+linear_bvp.o: ODEs/linear_bvp.cpp $(DEPS)
+	$(CXX) -g -Wall -c ODEs/linear_bvp.cpp
+nonlin_bvp.o: ODEs/nonlin_bvp.cpp $(DEPS)
+	$(CXX) -g -Wall -c ODEs/nonlin_bvp.cpp
+bvp_ex.o: examples/bvp_ex.cpp $(DEPS)
+	$(CXX) -g -Wall -c examples/bvp_ex.cpp
+wait.o: examples/wait.cpp
+	$(CXX) -g -Wall -c examples/wait.cpp
+
+clean:
+	rm *.o
+	rm bvp_ex
diff --git a/examples/make_minimize_ex b/examples/make_minimize_ex
@@ -6,15 +6,13 @@ TARGET = minimize_ex
 
 all: minimize_ex
 
-minimize_ex: minimize_ex.o minimize_unc.o newton.o momentum.o sgd.o bfgs.o broyd.o lmlsqr.o nlcgd.o finite_dif.o cyc_queue.o wait.o
+minimize_ex: minimize_ex.o minimize_unc.o newton.o momentum.o sgd.o bfgs.o broyd.o lmlsqr.o nlcgd.o finite_dif.o cyc_queue.o line_min.o roots.o wolfe_step.o
 	$(CXX) $(CXXFLAGS) -o $(TARGET) *.o $(LIBS)
 
 %.o: %.cpp $(DEPS)
 
 minimize_ex.o: examples/minimize_ex.cpp $(DEPS)
 	$(CXX) -g -c examples/minimize_ex.cpp
-wait.o: examples/wait.cpp
-	$(CXX) -g -c examples/wait.cpp
 
 clean:
 	rm *.o
diff --git a/examples/minimize_ex.cpp b/examples/minimize_ex.cpp
@@ -7,49 +7,69 @@ typedef std::vector<double> vvd;
 void wait_for_key();
 
 int main() {
+    int n = 10; // n=2 for rosenbrock, anything for the others
     arma::arma_rng::set_seed_random();
-    arma::vec x = 5 - 10*arma::randu(100);
-    arma::vec w = arma::linspace(-5,5,x.n_elem);
+
+    arma::vec x;
+    x = 5 - 10*arma::randu(n); // styb-tang or sphere
+    // x = {4*arma::randu()-2, 4*arma::randu() - 1}; // rosenbrock
+    if (n <= 5) std::cout << "initial point: " << x.t() << std::endl;
+
+    arma::vec w = arma::linspace(-5,5,n); // for sphere
+
+    arma::vec tru_min;
+    tru_min = arma::ones(n);
+    tru_min *= -2.9035; // for rosenbrock
 
     // sphere function
     // styblinski tang
+    // rosenbrock
     vec_dfunc g = [&w](const arma::vec& x) -> double {
         // return arma::norm(x - w);
         return arma::sum(x%x%x%x - 16*x%x + 5*x)/2;
+        // return std::pow(1-x(0),2) + 100*std::pow(x(1) - x(0)*x(0),2);
     };
     vector_func dg = [&w](const arma::vec& x) -> arma::vec {
         // return 2*(x-w);
         return (2*x%x%x - 16*x + 2.5);
+        // return {-2*(1-x(0)) - 400*x(0)*(x(1)-x(0)*x(0)),
+                // 200*(x(1)-x(0)*x(0))};
     };
     sp_vector_func dgi = [&w](const arma::vec& x,int i) -> double {
         // return 2*(x(i) - w(i));
         return (2*x(i)*x(i)*x(i) - 16*x(i) + 2.5);
+        // none for rosenbrock
     };
     vec_mat_func J = [](const arma::vec& x) -> arma::mat {
         // return 2*arma::eye(x.n_elem, x.n_elem);
         return arma::diagmat(6*x%x - 16);
+        // return {{1200*x(0) - 400*x(1) + 2, -400*x(0)},
+                // {-400*x(0), 200}};
     };
 
     optim_opts opts;
     // opts.use_bfgs(&dg); std::cout << "using BFGS..." << std::endl;
     // opts.use_momentum(&dg); std::cout << "using momentum gradient descent..." << std::endl;
-    // opts.use_sgd(&dgi); opts.stochastic_batch_size = 50; std::cout << "using batch stochastic gradient descent..." << std::endl;
-    // opts.use_lmlsqr(&dg); std::cout << "using Levenberg-Marquardt least squares..." << std::endl;
+    // opts.use_sgd(&dgi); opts.stochastic_batch_size = 5; std::cout << "using batch stochastic gradient descent..." << std::endl;
+    // opts.use_lmlsqr(&dg); opts.hessian_func = &J; std::cout << "using Levenberg-Marquardt least squares..." << std::endl;
     // opts.use_lbfgs(&dg); opts.num_iters_to_remember = 5; std::cout << "using limited memory BFGS..." << std::endl;
-    opts.use_lncgd(&dg); opts.hessian_func = &J; opts.tolerance = 1e-10; std::cout << "using conjugate gradient method..." << std::endl;
+    // opts.use_nlcgd(&dg); std::cout << "using conjugate gradient method..." << std::endl;
     // opts.use_newton(&dg,&J); std::cout << "using Newton's method..." << std::endl;
-    opts.use_FD_hessian = true;
+    opts.tolerance = 1e-5;
+    opts.max_iter = 1000;
 
     clock_t tt = clock();
-    minimize_unc(g, x, opts);
+    double gx = numerics::minimize_unc(g, x, opts);
     tt = clock() - tt;
 
-    arma::vec centers = {-2.903,0,2.903};
+    arma::vec centers;
+    centers = {-2.903,0.1567,2.746};  // styb-tang
+    // centers = {-2,-1,0,1,2}; // rosenbrock and sphere
     arma::uvec bins = arma::hist(x,centers);
     int i = arma::index_max(bins);
 
-    std::cout << "\noptimization results:\t\t" << g(x) << std::endl
-              << "true min:\t\t\t" << g(-2.903*arma::ones(x.n_elem)) << std::endl
+    std::cout << "\noptimization results:\t\t" << gx << std::endl
+              << "true min:\t\t\t" << g(tru_min) << std::endl
               << "mode(x):\t\t\t " << centers(i) << std::endl
               << "minimize_unc() took " << (float)tt/CLOCKS_PER_SEC << " seconds" << std::endl
               << "num iterations needed: " << opts.num_iters_returned << std::endl;

diff --git a/examples/newton_ex.cpp b/examples/newton_ex.cpp
@@ -1,12 +1,14 @@
 #include "../numerics.hpp"
 #include <iomanip>
 
-// g++ -Wall -g -o newton_ex examples/newton_ex.cpp newton.cpp broyd.cpp lmlsqr.cpp finite_dif.cpp -larmadillo
+// g++ -Wall -g -o newton_ex examples/newton_ex.cpp newton.cpp broyd.cpp lmlsqr.cpp finite_dif.cpp cyc_queue.cpp wolfe_step.cpp -larmadillo
 
 using namespace numerics;
 
 int main() {
-    auto f = [](const arma::vec& x) -> arma::vec { // function
+    arma::arma_rng::set_seed_random();
+
+    vector_func f = [](const arma::vec& x) -> arma::vec { // function
         arma::vec y = {0,0,0};
         y(0) = x(0)*x(1) - 1;
         y(1) = x(0) + x(1)*x(2);
@@ -26,11 +28,11 @@ int main() {
 
     arma::vec root = {-1.25992, -0.793701, -1.5874};
 
-    std::cout << "In this file you can test the nonlinear solvers: Newton's method, BFGS, Broyden's method, and Levenberg-Marquardt." << std::endl;
+    std::cout << "In this file you can test the nonlinear solvers: Newton's method, Broyden's method, and Levenberg-Marquardt." << std::endl;
     std::cout << "we will try to find roots of f(x,y,z) = [xy - 1, x + yz, 2y - z] with initial guess [-1,-1,-1]." << std::endl;
     std::cout << "for newton we will need the jacobian: " << std::endl << "[y   x   0]\n[1   z   y]\n[0   2  -1]" << std::endl;
     std::cout << "we also know there is only one root at: [-1.25992, -0.793701, -1.5874]" << std::endl;
-    arma::vec x0 = {-1,-1,-1};
+    arma::vec x0 = 4*arma::randu(3) - 2;
 
     nonlin_opts opts;
     opts.use_FD_jacobian = true;