Commits
Wataru Kawasaki authored 3f21e37353e Merge
1 + | |
2 + | |
3 + | |
4 + | |
5 + | |
6 + | |
7 + | |
8 + | |
9 + | |
10 + | |
11 + | |
12 + | |
13 + | |
14 + | |
15 + | |
16 + | |
17 + | namespace casa { //# NAMESPACE CASA - BEGIN |
18 + | |
19 + | |
20 + | //template <typename real> |
21 + | void dsscfg_cpu(const casacore::Matrix<casacore::Float>& itsMatDirty, |
22 + | const casacore::Matrix<casacore::Complex>& itsPsfFT, |
23 + | const std::vector<casacore::IPosition>& center, |
24 + | int const& nx, int const& ny, double* x, double& f, |
25 + | double* fgrad, double** assist_buffer, int task) { |
26 + | double hx = 1.0 / static_cast<double>(nx + 1); |
27 + | double hy = 1.0 / static_cast<double>(ny + 1); |
28 + | double area = 0.5 * hx * hy; |
29 + | // |
30 + | // Compute the standard starting point if task = 'XS'. |
31 + | // |
32 + | if (task == 'XS') { |
33 + | double temp1 = 0.5; |
34 + | for (int j = 0; j < ny; ++j) { |
35 + | for (int i = 0; i < nx; ++i) { |
36 + | double temp = static_cast<double>(std::min(j, ny - j)) * hy; |
37 + | int k = nx * j + i; |
38 + | x[k] = |
39 + | temp1 * |
40 + | sqrt(std::min(static_cast<double>(std::min(i, nx - i)) * hx, temp)); |
41 + | } |
42 + | } |
43 + | *assist_buffer = new double[nx * ny * 4]; |
44 + | std::cout << "itsMatDirty(100,100]) " << itsMatDirty(100,100) << std::endl; |
45 + | return; |
46 + | } |
47 + | // |
48 + | bool feval = task == 'F' || task == 'FG'; |
49 + | bool geval = task == 'G' || task == 'FG'; |
50 + | // |
51 + | // Compute the function if task = 'F', the gradient if task = 'G', or |
52 + | // both if task = 'FG'. |
53 + | // |
54 + | double fquad = 0.0; |
55 + | double fexp = 0.0; |
56 + | |
57 + | double* fgrad_r = NULL; |
58 + | double* fgrad_t = NULL; |
59 + | double* fgrad_l = NULL; |
60 + | double* fgrad_b = NULL; |
61 + | if (geval) { |
62 + | fgrad_r = *assist_buffer; |
63 + | fgrad_t = fgrad_r + nx * ny; |
64 + | fgrad_l = fgrad_t + nx * ny; |
65 + | fgrad_b = fgrad_l + nx * ny; |
66 + | |
67 + | memset(fgrad, 0, nx * ny * sizeof(double)); |
68 + | memset(fgrad_r, 0, nx * ny * sizeof(double)); |
69 + | memset(fgrad_t, 0, nx * ny * sizeof(double)); |
70 + | memset(fgrad_l, 0, nx * ny * sizeof(double)); |
71 + | memset(fgrad_b, 0, nx * ny * sizeof(double)); |
72 + | } |
73 + | // |
74 + | // Computation of the function and the gradient over the lower |
75 + | // triangular elements. The trapezoidal rule is used to estimate |
76 + | // the integral of the exponential term. |
77 + | // |
78 + | for (int j = 0; j < ny; ++j) { |
79 + | for (int i = 0; i < nx; ++i) { |
80 + | int k = nx * j + i; |
81 + | double v = 0.0; |
82 + | double vr = 0.0; |
83 + | double vt = 0.0; |
84 + | v = x[k]; |
85 + | if (i != nx - 1) { |
86 + | vr = x[k + 1]; |
87 + | } |
88 + | if (j != ny - 1) { |
89 + | vt = x[k + nx]; |
90 + | } |
91 + | double dvdx = (vr - v) / hx; |
92 + | double dvdy = (vt - v) / hy; |
93 + | double expv = exp(v); |
94 + | double expvr = exp(vr); |
95 + | double expvt = exp(vt); |
96 + | if (feval) { |
97 + | fquad += dvdx * dvdx + dvdy * dvdy; |
98 + | fexp = fexp - 1.0 * (expv + expvr + expvt) / 3.0; |
99 + | } |
100 + | if (geval) { |
101 + | fgrad[k] += -dvdx / hx - dvdy / hy - 1.0 * expv / 3.0; |
102 + | if (i != nx - 1) { |
103 + | fgrad_r[k + 1] = dvdx / hx - 1.0 * expvr / 3.0; |
104 + | } |
105 + | if (j != ny - 1) { |
106 + | fgrad_t[k + nx] = dvdy / hy - 1.0 * expvt / 3.0; |
107 + | } |
108 + | } |
109 + | // |
110 + | // Computation of the function and the gradient over the upper |
111 + | // triangular elements. The trapezoidal rule is used to estimate |
112 + | // the integral of the exponential term. |
113 + | // |
114 + | double vb = 0.0; |
115 + | double vl = 0.0; |
116 + | |
117 + | if (j != 0) { |
118 + | vb = x[k - nx]; |
119 + | } |
120 + | if (i != 0) { |
121 + | vl = x[k - 1]; |
122 + | } |
123 + | |
124 + | dvdx = (v - vl) / hx; |
125 + | dvdy = (v - vb) / hy; |
126 + | double expvb = exp(vb); |
127 + | double expvl = exp(vl); |
128 + | expv = exp(v); |
129 + | if (feval) { |
130 + | fquad += dvdx * dvdx + dvdy * dvdy; |
131 + | fexp = fexp - 1.0 * (expvb + expvl + expv) / 3.0; |
132 + | } |
133 + | if (geval) { |
134 + | if (j != 0) { |
135 + | fgrad_b[k - nx] = -dvdy / hy - 1.0 * expvb / 3.0; |
136 + | } |
137 + | if (i != 0) { |
138 + | fgrad_l[k - 1] = -dvdx / hx - 1.0 * expvl / 3.0; |
139 + | } |
140 + | fgrad[k] += dvdx / hx + dvdy / hy - 1.0 * expv / 3.0; |
141 + | } |
142 + | } |
143 + | } |
144 + | |
145 + | // |
146 + | // Scale the result. |
147 + | // |
148 + | if (feval) { |
149 + | f = 0.0; |
150 + | f = area * (.5 * fquad + fexp); |
151 + | std::cout << "dsscfg_cpu after f " << f << std::endl; |
152 + | std::cout << "center " << center[0] << std::endl; |
153 + | } |
154 + | if (geval) { |
155 + | for (int k = 0; k < nx * ny; ++k) { |
156 + | fgrad[k] = |
157 + | (fgrad[k] + fgrad_b[k] + fgrad_l[k] + fgrad_r[k] + fgrad_t[k]) * area; |
158 + | } |
159 + | } |
160 + | // |
161 + | } |
162 + | |
163 + | |
164 + | |
165 + | // test CPU mode |
166 + | //template <typename real> |
167 + | double test_dsscfg_cpu(const casacore::Matrix<casacore::Float>& itsMatDirty, |
168 + | const casacore::Matrix<casacore::Complex>& itsPsfFT, |
169 + | const std::vector<casacore::IPosition>& center) { |
170 + | const int g_nx = 4; |
171 + | const int g_ny = 4; |
172 + | |
173 + | // initialize LBFGSB option |
174 + | LBFGSB_CUDA_OPTION<double> lbfgsb_options; |
175 + | |
176 + | lbfgsbcuda::lbfgsbdefaultoption<double>(lbfgsb_options); |
177 + | lbfgsb_options.mode = LCM_NO_ACCELERATION; |
178 + | lbfgsb_options.eps_f = static_cast<double>(1e-3); |
179 + | lbfgsb_options.eps_g = static_cast<double>(1e-3); |
180 + | lbfgsb_options.eps_x = static_cast<double>(1e-3); |
181 + | lbfgsb_options.max_iteration = 10; |
182 + | |
183 + | // initialize LBFGSB state |
184 + | LBFGSB_CUDA_STATE<double> state; |
185 + | memset(&state, 0, sizeof(state)); |
186 + | double* assist_buffer_cpu = nullptr; |
187 + | |
188 + | double minimal_f = std::numeric_limits<double>::max(); |
189 + | // setup callback function that evaluate function value and its gradient |
190 + | state.m_funcgrad_callback = [&itsMatDirty,&itsPsfFT, ¢er, &assist_buffer_cpu, &minimal_f]( |
191 + | double* x, double& f, double* g, |
192 + | const cudaStream_t& stream, |
193 + | const LBFGSB_CUDA_SUMMARY<double>& summary) { |
194 + | std::cout << "I'm called2" << std::endl; |
195 + | dsscfg_cpu(itsMatDirty, itsPsfFT, center, g_nx, g_ny, x, f, g, &assist_buffer_cpu, 'FG'); |
196 + | if (summary.num_iteration % 100 == 0) { |
197 + | std::cout << "CPU iteration " << summary.num_iteration << " F: " << f |
198 + | << " x[0] " << x[0] << std::endl; |
199 + | } |
200 + | |
201 + | minimal_f = fmin(minimal_f, f); |
202 + | return 0; |
203 + | }; |
204 + | |
205 + | // initialize CPU buffers |
206 + | int N_elements = g_nx * g_ny; |
207 + | |
208 + | double* x = new double[N_elements]; |
209 + | double* g = new double[N_elements]; |
210 + | |
211 + | double* xl = new double[N_elements]; |
212 + | double* xu = new double[N_elements]; |
213 + | |
214 + | // in this example, we don't have boundaries |
215 + | memset(xl, 0, N_elements * sizeof(xl[0])); |
216 + | memset(xu, 0, N_elements * sizeof(xu[0])); |
217 + | |
218 + | // initialize starting point |
219 + | //double f_init = std::numeric_limits<double>::max(); |
220 + | //dsscfg_cpu(itsMatDirty, itsPsfFT, center, g_nx, g_ny, x, f_init, nullptr, &assist_buffer_cpu, 'XS'); |
221 + | double hx = 1.0 / static_cast<double>(g_nx + 1); |
222 + | double hy = 1.0 / static_cast<double>(g_ny + 1); |
223 + | // |
224 + | // Compute the standard starting point if task = 'XS'. |
225 + | // |
226 + | |
227 + | double temp1 = 0.5; |
228 + | for (int j = 0; j < g_ny; ++j) { |
229 + | for (int i = 0; i < g_nx; ++i) { |
230 + | double temp = static_cast<double>(std::min(j, g_ny - j)) * hy; |
231 + | int k = g_nx * j + i; |
232 + | x[k] = |
233 + | temp1 * |
234 + | sqrt(std::min(static_cast<double>(std::min(i, g_nx - i)) * hx, temp)); |
235 + | } |
236 + | } |
237 + | assist_buffer_cpu = new double[g_nx * g_ny * 4]; |
238 + | std::cout << "itsMatDirty(100,100]) " << itsMatDirty(100,100) << std::endl; |
239 + | // initialize number of bounds (0 for this example) |
240 + | int* nbd = new int[N_elements]; |
241 + | memset(nbd, 0, N_elements * sizeof(nbd[0])); |
242 + | |
243 + | LBFGSB_CUDA_SUMMARY<double> summary; |
244 + | memset(&summary, 0, sizeof(summary)); |
245 + | |
246 + | // call optimization |
247 + | lbfgsbcuda::lbfgsbminimize<double>(N_elements, state, lbfgsb_options, x, nbd, |
248 + | xl, xu, summary); |
249 + | |
250 + | // release allocated memory |
251 + | delete[] x; |
252 + | delete[] g; |
253 + | delete[] xl; |
254 + | delete[] xu; |
255 + | delete[] nbd; |
256 + | delete[] assist_buffer_cpu; |
257 + | |
258 + | return minimal_f; |
259 + | } |
260 + | |
261 + | /*#define INST_HELPER(real) \ |
262 + | template void dsscfg_cpu<real>(const casacore::Matrix<casacore::Float>& itsMatDirty, \ |
263 + | int const& nx, int const& ny, real* x, \ |
264 + | real& f, real* fgrad, real** assist_buffer, \ |
265 + | int task); |
266 + | |
267 + | INST_HELPER(float); |
268 + | INST_HELPER(double);*/ |
269 + | |
270 + | } // end namespace casa |
271 + | |
272 + | // SYNTHESIS_OBJFUNCCPU_H |