Find a model including one parameter (using a purely evolutionary approach)

In this second tutorial we show how to find a model for our input data when we also want to learn some constants.

Constants can, in general, be learned via two main techniques:
  1. evolutionary (common and standard practice in GP)

  2. memetic (original with dCGP)

The difference is that the evolutionary approach cannot find the correct and exact values for constants, only approximations. In this tutorial we follow the evolutionary approach 1. In the next tutorial we will follow a memetic approach 2. We use the problem P1 from the dcgp::gym, that is x^5 - pi*x^3 + x

Code:

 1#include <boost/algorithm/string.hpp>
 2#include <pagmo/algorithm.hpp>
 3#include <pagmo/io.hpp>
 4#include <pagmo/population.hpp>
 5#include <pagmo/problem.hpp>
 6#include <vector>
 7
 8#include <dcgp/algorithms/es4cgp.hpp>
 9#include <dcgp/gym.hpp>
10#include <dcgp/kernel_set.hpp>
11#include <dcgp/problems/symbolic_regression.hpp>
12#include <dcgp/symengine.hpp>
13
14using namespace dcgp;
15using namespace boost::algorithm;
16
17int main()
18{
19    // We load the data (using problem P1 from the gym)
20    std::vector<std::vector<double>> X, Y;
21    gym::generate_P1(X, Y);
22
23    // We instantiate a symbolic regression problem with one ephemeral constants, and multiobjectove.
24    auto n_eph = 1u;
25    symbolic_regression udp(X, Y, 1u, 20u, 21u, 2u, kernel_set<double>({"sum", "diff", "mul", "pdiv"})(), n_eph, false,
26                            0u, "MSE");
27
28    // We init a population with four individuals
29    pagmo::population pop{udp, 4u};
30
31    // We use an ES startegy also to learn constants
32    dcgp::es4cgp uda1(100000u, 4u, 1e-8, true);
33    pagmo::algorithm algo{uda1};
34    algo.set_verbosity(500u);
35
36    auto t1 = std::chrono::high_resolution_clock::now();
37    pop = algo.evolve(pop);
38    auto t2 = std::chrono::high_resolution_clock::now();
39    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
40    std::cout << duration;
41
42    // We print on screen the best found
43    auto idx = pop.best_idx();
44    auto prettier = udp.prettier(pop.get_x()[idx]);
45    trim_left_if(prettier, is_any_of("["));
46    trim_right_if(prettier, is_any_of("]"));
47
48    pagmo::print("\nBest fitness: ", pop.get_f()[idx], "\n");
49    pagmo::print("Chromosome: ", pop.get_x()[idx], "\n");
50    pagmo::print("Pretty Formula: ", udp.pretty(pop.get_x()[idx]), "\n");
51    pagmo::print("Prettier Formula: ", prettier, "\n");
52    pagmo::print("Expanded Formula: ", SymEngine::expand(SymEngine::Expression(prettier)), "\n");
53    return false;
54}

Output:

Note: the actual output will be different on your computers as its non deterministic.

 Gen:        Fevals:          Best:   Constants:      Formula:
    0              0        3803.05   [1.11378]       [x0 + c1**2] ...
  500           2000         8.2487   [0.959988]      [(-c1 + x0)*x0**4 - x0**2] ...
 1000           4000        2.70799   [1.14114]       [-x0**2*c1 + (-c1 + x0)*x0**4*c1] ...
 1500           6000        2.70797   [1.14053]       [-x0**2*c1 + (-c1 + x0)*x0**4*c1] ...
 2000           8000        2.70796   [1.14064]       [-x0**2*c1 + (-c1 + x0)*x0**4*c1] ...
 2500          10000      0.0901456   [1.0711]        [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 3000          12000      0.0597183   [1.07233]       [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 3500          14000      0.0597183   [1.07233]       [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 4000          16000      0.0597183   [1.07233]       [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 4500          18000      0.0596577   [1.0723]        [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 5000          20000      0.0596577   [1.0723]        [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 5500          22000      0.0596577   [1.0723]        [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 6000          24000      0.0596577   [1.0723]        [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 6500          26000      0.0596577   [1.0723]        [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 7000          28000      0.0596577   [1.0723]        [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 7500          30000      0.0596577   [1.0723]        [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 8000          32000      0.0596577   [1.0723]        [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 8500          34000      0.0596577   [1.0723]        [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 9000          36000      0.0596577   [1.0723]        [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
 9500          38000      0.0596577   [1.0723]        [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
10000          40000      0.0596577   [1.0723]        [-x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3] ...
Exit condition -- generations = 10000

Best fitness: [0.0596577]
Chromosome: [1.0723, 1, 0, 0, 2, ... ]
Pretty Formula: [((((x0*c1)*((x0*c1)*x0))*((x0*c1)*(x0-c1)))-((x0*c1)*((x0*c1)*x0)))]
Prettier Formula: -x0**3*c1**2 + (-c1 + x0)*x0**4*c1**3
Expanded Formula: -x0**3*c1**2 - x0**4*c1**4 + x0**5*c1**3