auto_diff_NN/BNN_adept.cpp at master · mpkuchera/auto_diff_NN · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
/* test_adept.cpp - Demonstration of basic features of Adept

  Copyright (C) 2012-2014 The University of Reading

  Copying and distribution of this file, with or without modification,
  are permitted in any medium without royalty provided the copyright
  notice and this notice are preserved.  This file is offered as-is,
  without any warranty.
*/

#include <iostream>

#include "adept.h"

// Provide function prototype for "algorithm"; see algorithm.cpp for
// the contents of the function
#include "algorithm.h"

int
main(int argc, char** argv)
{
  using adept::adouble;

  // Start an Adept stack before the first adouble object is
  // constructed
  adept::Stack s;
  //adept::Stack s_net;
  //adouble x[2]; // Our independent variables
  //adouble y;    // Our dependent variable

  // Set the values of x
  //x[0] = 2.0;
  //x[1] = 3.0;

  const int np = 17;

  adouble q[np];
  for(int i=0;i<np;i++){
    q[i] = 0.2;
  }


  // PART 1: NUMERICAL ADJOINT
  // std::cout << "*** Computing numerical adjoint ***\n\n";

  // We will provide an estimate of the adjoints by perturbing the
  // inputs by a small amount

  // adouble x_perturbed[2]; // Perturbed independent variables

  // This version of the code uses the same algorithm function that
  // takes adouble arguments for doing the numerical adjoint, even
  // though we are not doing automatic differentiation. To make it
  // faster, we can turn off the recording of derivative information
  // using the pause_recording function.  This only works if all code
  // has been compiled with the -DADEPT_RECORDING_PAUSABLE flag;
  // otherwise it does nothing (so the program will still run
  // correctly, but will be less efficient). Note that another
  // approach if you want to call a function several times, sometimes
  // with automatic differentiation and sometimes without, is
  // demonstrated in
  // test_adept_with_without_automatic_differentiation.cpp.
  //s.pause_recording();

  // We will compare the Adept result to a numerically computed
  // adjoint, so define the perturbation size
  //double dx = 1.0e-5;

  // Run the algorithm
  //y = algorithm(x);


  // Now perturb x[0] and x[1] in turn and get a numerical estimate of
  // the gradient
  // x_perturbed[0] = x[0]+dx;
  // x_perturbed[1] = x[1];
  // double dy_dx0 = adept::value((algorithm(x_perturbed)-y)/dx);
  // x_perturbed[0] = x[0];
  // x_perturbed[1] = x[1]+dx;
  // double dy_dx1 = adept::value((algorithm(x_perturbed)-y)/dx);

  // Turn the recording of deriviative information back on
  //s.continue_recording();

  // Print information about the data held in the stack
  /*std::cout << "Stack status after numerical adjoint (if recording was successfully\n"
	    << "paused then the number of operations should be zero):\n"
	    << s;
  // Print memory information
  std::cout << "Memory usage: " << s.memory() << " bytes\n\n";
  */
  // PART 2: REVERSE-MODE AUTOMATIC DIFFERENTIATION

  // Now we use Adept to do the automatic differentiation
  std::cout << "*** Computing adjoint using automatic differentiation ***\n\n";

  // Start a new recording of derivative statements; note that this
  // must be done after the independent variables x[0] and x[1] are
  // defined and after they have been given their initial values
  s.new_recording();
  //s_net.new_recording();
  // Run the algorithm again
  //y = algorithm(x);
  adouble f = neural_net(q);
  std::cout << "f = neural_net(q) = " << f.value() << std::endl;
  // Print information about the data held in the stack
  std::cout << "Stack status after algorithm run but adjoint not yet computed:\n"
	    << s;
  // Print memory information
  std::cout << "Memory usage: " << s.memory() << " bytes\n\n";

  // If we set the adjoint of the dependent variable to 1 then the
  // resulting adjoints of the independent variables after
  // reverse-mode automatic differentiation will be comparable to the
  // outputs of the numerical differentiation
  //y.set_gradient(1.0);
  f.set_gradient(1.0);
  // Print out some diagnostic information
  // std::cout << "List of derivative statements:\n";
  //s.print_statements();
  //std::cout << "\n";

  //std::cout << "Initial list of gradients:\n";
  //s.print_gradients();
  std::cout << "\n";

  // Run the adjoint algorithm (reverse-mode differentiation)
  s.reverse();

  // Some more diagnostic information
  std::cout << "Final list of gradients:\n";
  s.print_gradients();
  std::cout << "\n";

  // Extract the adjoints of the independent variables
  double q_ad[np] = {0};
  //double x0_ad = 0, x1_ad = 0;
  //x[0].get_gradient(x0_ad);
  //x[1].get_gradient(x1_ad);
  for(int i=0;i<np;i++){
    q[i].get_gradient(q_ad[i]);
  }

  // PART 3: JACOBIAN COMPUTATION

  // Here we use the same recording to compute the Jacobian matrix
  std::cout << "*** Computing Jacobian matrix ***\n\n";

  //s.independent(x, 2); // Declare independents
  s.independent(q, 17); // Declare independents
  // s.dependent(y);      // Declare dependents
  s.dependent(f);      // Declare dependents
  double jac[17];       // Where the Jacobian will be stored
  s.jacobian(jac);     // Compute Jacobian


  // PART 4: PRINT OUT RESULTS

  // Print information about the data held in the stack
  std::cout << "Stack status after adjoint and Jacobian computed:\n"
	    << s;
  // Print memory information
  std::cout << "Memory usage: " << s.memory() << " bytes\n\n";

  std::cout << "Result of forward algorithm:\n";
  // std::cout << "  y = " << y.value() << "\n";
   std::cout << "  f = " << f.value() << "\n";
  //std::cout << "Comparison of gradients:\n";
  //std::cout << "  dy_dx0[numerical] = " << dy_dx0 << "\n";
  //std::cout << "  dy_dx0[adjoint]   = " << x0_ad  << "\n";
   // std::cout << "  dy_dx0[jacobian]  = " << jac[0] << "\n";
  //std::cout << "  dy_dx1[numerical] = " << dy_dx1 << "\n";
  //std::cout << "  dy_dx1[adjoint]   = " << x1_ad  << "\n";
  //std::cout << "  dy_dx1[jacobian]  = " << jac[1] << "\n\n\n";

  for(int i=0;i<np;i++){
    std::cout << "q[" << i << "] = " << q[i] << "  df_dq[" << i << "][jacobian]  = " << jac[i] << "\t adjoint = " << q_ad[i] << "\n";
  }

  std::cout << "\nNote that the numerical gradients are less accurate since they use\n"
	    << "a finite difference and are also succeptible to round-off error.\n";

  return 0;

}