Commits

Wataru Kawasaki authored 3f21e37353e Merge
Merge branch 'master' into CAS-13667

Conflicts: casatasks/tests/tasks/test_task_sdbaseline.py
No tags

casa5/code/synthesis/MeasurementEquations/objfunc_cpu_bk.h

Added
1 +#ifndef SYNTHESIS_OBJFUNCCPU_BK_H
2 +#define SYNTHESIS_OBJFUNCCPU_BK_H
3 +
4 +#include <ms/MeasurementSets/MeasurementSet.h>
5 +#include <casa/Arrays/Matrix.h>
6 +#include <casa/Arrays/IPosition.h>
7 +#include <images/Images/ImageInterface.h>
8 +#include <images/Images/PagedImage.h>
9 +#include <images/Images/TempImage.h>
10 +
11 +#include <scimath/Mathematics/FFTServer.h>
12 +#include <scimath/Functionals/Gaussian2D.h>
13 +
14 +#include <cuda_runtime.h>
15 +#include "culbfgsb/culbfgsb.h"
16 +
17 +namespace casa { //# NAMESPACE CASA - BEGIN
18 +
19 +
20 +//template <typename real>
21 +void dsscfg_cpu(const casacore::Matrix<casacore::Float>& itsMatDirty,
22 + const casacore::Matrix<casacore::Complex>& itsPsfFT,
23 + const std::vector<casacore::IPosition>& center,
24 + int const& nx, int const& ny, double* x, double& f,
25 + double* fgrad, double** assist_buffer, int task) {
26 + double hx = 1.0 / static_cast<double>(nx + 1);
27 + double hy = 1.0 / static_cast<double>(ny + 1);
28 + double area = 0.5 * hx * hy;
29 + //
30 + // Compute the standard starting point if task = 'XS'.
31 + //
32 + if (task == 'XS') {
33 + double temp1 = 0.5;
34 + for (int j = 0; j < ny; ++j) {
35 + for (int i = 0; i < nx; ++i) {
36 + double temp = static_cast<double>(std::min(j, ny - j)) * hy;
37 + int k = nx * j + i;
38 + x[k] =
39 + temp1 *
40 + sqrt(std::min(static_cast<double>(std::min(i, nx - i)) * hx, temp));
41 + }
42 + }
43 + *assist_buffer = new double[nx * ny * 4];
44 + std::cout << "itsMatDirty(100,100]) " << itsMatDirty(100,100) << std::endl;
45 + return;
46 + }
47 + //
48 + bool feval = task == 'F' || task == 'FG';
49 + bool geval = task == 'G' || task == 'FG';
50 + //
51 + // Compute the function if task = 'F', the gradient if task = 'G', or
52 + // both if task = 'FG'.
53 + //
54 + double fquad = 0.0;
55 + double fexp = 0.0;
56 +
57 + double* fgrad_r = NULL;
58 + double* fgrad_t = NULL;
59 + double* fgrad_l = NULL;
60 + double* fgrad_b = NULL;
61 + if (geval) {
62 + fgrad_r = *assist_buffer;
63 + fgrad_t = fgrad_r + nx * ny;
64 + fgrad_l = fgrad_t + nx * ny;
65 + fgrad_b = fgrad_l + nx * ny;
66 +
67 + memset(fgrad, 0, nx * ny * sizeof(double));
68 + memset(fgrad_r, 0, nx * ny * sizeof(double));
69 + memset(fgrad_t, 0, nx * ny * sizeof(double));
70 + memset(fgrad_l, 0, nx * ny * sizeof(double));
71 + memset(fgrad_b, 0, nx * ny * sizeof(double));
72 + }
73 + //
74 + // Computation of the function and the gradient over the lower
75 + // triangular elements. The trapezoidal rule is used to estimate
76 + // the integral of the exponential term.
77 + //
78 + for (int j = 0; j < ny; ++j) {
79 + for (int i = 0; i < nx; ++i) {
80 + int k = nx * j + i;
81 + double v = 0.0;
82 + double vr = 0.0;
83 + double vt = 0.0;
84 + v = x[k];
85 + if (i != nx - 1) {
86 + vr = x[k + 1];
87 + }
88 + if (j != ny - 1) {
89 + vt = x[k + nx];
90 + }
91 + double dvdx = (vr - v) / hx;
92 + double dvdy = (vt - v) / hy;
93 + double expv = exp(v);
94 + double expvr = exp(vr);
95 + double expvt = exp(vt);
96 + if (feval) {
97 + fquad += dvdx * dvdx + dvdy * dvdy;
98 + fexp = fexp - 1.0 * (expv + expvr + expvt) / 3.0;
99 + }
100 + if (geval) {
101 + fgrad[k] += -dvdx / hx - dvdy / hy - 1.0 * expv / 3.0;
102 + if (i != nx - 1) {
103 + fgrad_r[k + 1] = dvdx / hx - 1.0 * expvr / 3.0;
104 + }
105 + if (j != ny - 1) {
106 + fgrad_t[k + nx] = dvdy / hy - 1.0 * expvt / 3.0;
107 + }
108 + }
109 + //
110 + // Computation of the function and the gradient over the upper
111 + // triangular elements. The trapezoidal rule is used to estimate
112 + // the integral of the exponential term.
113 + //
114 + double vb = 0.0;
115 + double vl = 0.0;
116 +
117 + if (j != 0) {
118 + vb = x[k - nx];
119 + }
120 + if (i != 0) {
121 + vl = x[k - 1];
122 + }
123 +
124 + dvdx = (v - vl) / hx;
125 + dvdy = (v - vb) / hy;
126 + double expvb = exp(vb);
127 + double expvl = exp(vl);
128 + expv = exp(v);
129 + if (feval) {
130 + fquad += dvdx * dvdx + dvdy * dvdy;
131 + fexp = fexp - 1.0 * (expvb + expvl + expv) / 3.0;
132 + }
133 + if (geval) {
134 + if (j != 0) {
135 + fgrad_b[k - nx] = -dvdy / hy - 1.0 * expvb / 3.0;
136 + }
137 + if (i != 0) {
138 + fgrad_l[k - 1] = -dvdx / hx - 1.0 * expvl / 3.0;
139 + }
140 + fgrad[k] += dvdx / hx + dvdy / hy - 1.0 * expv / 3.0;
141 + }
142 + }
143 + }
144 +
145 + //
146 + // Scale the result.
147 + //
148 + if (feval) {
149 + f = 0.0;
150 + f = area * (.5 * fquad + fexp);
151 + std::cout << "dsscfg_cpu after f " << f << std::endl;
152 + std::cout << "center " << center[0] << std::endl;
153 + }
154 + if (geval) {
155 + for (int k = 0; k < nx * ny; ++k) {
156 + fgrad[k] =
157 + (fgrad[k] + fgrad_b[k] + fgrad_l[k] + fgrad_r[k] + fgrad_t[k]) * area;
158 + }
159 + }
160 + //
161 +}
162 +
163 +
164 +
165 +// test CPU mode
166 +//template <typename real>
167 +double test_dsscfg_cpu(const casacore::Matrix<casacore::Float>& itsMatDirty,
168 + const casacore::Matrix<casacore::Complex>& itsPsfFT,
169 + const std::vector<casacore::IPosition>& center) {
170 + const int g_nx = 4;
171 + const int g_ny = 4;
172 +
173 + // initialize LBFGSB option
174 + LBFGSB_CUDA_OPTION<double> lbfgsb_options;
175 +
176 + lbfgsbcuda::lbfgsbdefaultoption<double>(lbfgsb_options);
177 + lbfgsb_options.mode = LCM_NO_ACCELERATION;
178 + lbfgsb_options.eps_f = static_cast<double>(1e-3);
179 + lbfgsb_options.eps_g = static_cast<double>(1e-3);
180 + lbfgsb_options.eps_x = static_cast<double>(1e-3);
181 + lbfgsb_options.max_iteration = 10;
182 +
183 + // initialize LBFGSB state
184 + LBFGSB_CUDA_STATE<double> state;
185 + memset(&state, 0, sizeof(state));
186 + double* assist_buffer_cpu = nullptr;
187 +
188 + double minimal_f = std::numeric_limits<double>::max();
189 + // setup callback function that evaluate function value and its gradient
190 + state.m_funcgrad_callback = [&itsMatDirty,&itsPsfFT, &center, &assist_buffer_cpu, &minimal_f](
191 + double* x, double& f, double* g,
192 + const cudaStream_t& stream,
193 + const LBFGSB_CUDA_SUMMARY<double>& summary) {
194 + std::cout << "I'm called2" << std::endl;
195 + dsscfg_cpu(itsMatDirty, itsPsfFT, center, g_nx, g_ny, x, f, g, &assist_buffer_cpu, 'FG');
196 + if (summary.num_iteration % 100 == 0) {
197 + std::cout << "CPU iteration " << summary.num_iteration << " F: " << f
198 + << " x[0] " << x[0] << std::endl;
199 + }
200 +
201 + minimal_f = fmin(minimal_f, f);
202 + return 0;
203 + };
204 +
205 + // initialize CPU buffers
206 + int N_elements = g_nx * g_ny;
207 +
208 + double* x = new double[N_elements];
209 + double* g = new double[N_elements];
210 +
211 + double* xl = new double[N_elements];
212 + double* xu = new double[N_elements];
213 +
214 + // in this example, we don't have boundaries
215 + memset(xl, 0, N_elements * sizeof(xl[0]));
216 + memset(xu, 0, N_elements * sizeof(xu[0]));
217 +
218 + // initialize starting point
219 + //double f_init = std::numeric_limits<double>::max();
220 + //dsscfg_cpu(itsMatDirty, itsPsfFT, center, g_nx, g_ny, x, f_init, nullptr, &assist_buffer_cpu, 'XS');
221 + double hx = 1.0 / static_cast<double>(g_nx + 1);
222 + double hy = 1.0 / static_cast<double>(g_ny + 1);
223 + //
224 + // Compute the standard starting point if task = 'XS'.
225 + //
226 +
227 + double temp1 = 0.5;
228 + for (int j = 0; j < g_ny; ++j) {
229 + for (int i = 0; i < g_nx; ++i) {
230 + double temp = static_cast<double>(std::min(j, g_ny - j)) * hy;
231 + int k = g_nx * j + i;
232 + x[k] =
233 + temp1 *
234 + sqrt(std::min(static_cast<double>(std::min(i, g_nx - i)) * hx, temp));
235 + }
236 + }
237 + assist_buffer_cpu = new double[g_nx * g_ny * 4];
238 + std::cout << "itsMatDirty(100,100]) " << itsMatDirty(100,100) << std::endl;
239 + // initialize number of bounds (0 for this example)
240 + int* nbd = new int[N_elements];
241 + memset(nbd, 0, N_elements * sizeof(nbd[0]));
242 +
243 + LBFGSB_CUDA_SUMMARY<double> summary;
244 + memset(&summary, 0, sizeof(summary));
245 +
246 + // call optimization
247 + lbfgsbcuda::lbfgsbminimize<double>(N_elements, state, lbfgsb_options, x, nbd,
248 + xl, xu, summary);
249 +
250 + // release allocated memory
251 + delete[] x;
252 + delete[] g;
253 + delete[] xl;
254 + delete[] xu;
255 + delete[] nbd;
256 + delete[] assist_buffer_cpu;
257 +
258 + return minimal_f;
259 +}
260 +
261 +/*#define INST_HELPER(real) \
262 + template void dsscfg_cpu<real>(const casacore::Matrix<casacore::Float>& itsMatDirty, \
263 + int const& nx, int const& ny, real* x, \
264 + real& f, real* fgrad, real** assist_buffer, \
265 + int task);
266 +
267 +INST_HELPER(float);
268 +INST_HELPER(double);*/
269 +
270 +} // end namespace casa
271 +
272 +#endif // SYNTHESIS_OBJFUNCCPU_H

Everything looks good. We'll let you know here if there's anything you should know about.

Add shortcut