// -*- C++ -*- //# AWVisResampler.cc: Implementation of the AWVisResampler class //# Copyright (C) 1997,1998,1999,2000,2001,2002,2003 //# Associated Universities, Inc. Washington DC, USA. //# //# This library is free software; you can redistribute it and/or modify it //# under the terms of the GNU Library General Public License as published by //# the Free Software Foundation; either version 2 of the License, or (at your //# option) any later version. //# //# This library is distributed in the hope that it will be useful, but WITHOUT //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public //# License for more details. //# //# You should have received a copy of the GNU Library General Public License //# along with this library; if not, write to the Free Software Foundation, //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. //# //# Correspondence concerning AIPS++ should be addressed as follows: //# Internet email: aips2-request@nrao.edu. //# Postal address: AIPS++ Project Office //# National Radio Astronomy Observatory //# 520 Edgemont Road //# Charlottesville, VA 22903-2475 USA //# //# $Id$ #ifdef USE_HPG #include <synthesis/TransformMachines/SynthesisError.h> #include <synthesis/TransformMachines2/AWVisResamplerHPG.h> #include <synthesis/TransformMachines2/AWConvFuncHolder.h> #include <synthesis/TransformMachines2/Utils.h> #include <synthesis/TransformMachines/SynthesisMath.h> #include <casacore/coordinates/Coordinates/SpectralCoordinate.h> #include <casacore/coordinates/Coordinates/CoordinateSystem.h> #include <casacore/casa/OS/Timer.h> #include <fstream> #include <iostream> #include <typeinfo> #include <iomanip> #include <map> //#include <synthesis/TransformMachines2/FortranizedLoops.h> //#include <synthesis/TransformMachines2/hpg.hpp> //#include <hpg/hpg.hpp> //#include <hpg/hpg_indexing.hpp> #include <type_traits> #include <synthesis/TransformMachines2/HPGVisBuffer.cc> #ifdef _OPENMP #include <omp.h> #endif //#include <casa/BasicMath/Functors.h> using namespace casacore; using namespace hpg; namespace casa{ using namespace refim; // // Global functions // // --------------- template <unsigned N> std::vector<hpg::VisData<N>> makeHPGVisBuffer2(casacore::Matrix<double>& sumwt, const casa::refim::VBStore& casaVBS, AWConvFuncHolder& awh, const unsigned nGridPol, const unsigned nGridChan, unsigned startRow, unsigned endRow, const unsigned startChan, const unsigned endChan, const casacore::Vector<casacore::Int>& chanMap, const casacore::Vector<casacore::Int>& polMap, const casacore::Vector<double>& dphase, const hpg::CFSimpleIndexer& cfsi ) { unsigned nVisChan=endChan - startChan + 1; unsigned nVisRow=endRow - startRow + 1; const casa::VisBuffer2& vb = *(casaVBS.vb_p); IPosition dataShape = vb.visCube().shape(); unsigned nDataPol=dataShape(0); vector<int> unique_pol = polMap.tovector(); std::sort(unique_pol.begin(), unique_pol.end()); auto last = std::unique(unique_pol.begin(), unique_pol.end()); unique_pol.erase(last, unique_pol.end()); uint nVisPol = unique_pol.size(); std::vector<hpg::VisData<N>> hpgVB=create_blank_vis_data_vector<N>(nVisPol,nVisChan,nVisRow); std::array<std::complex<hpg::visibility_fp>, N> vis; std::array<hpg::vis_weight_fp, N> wt; const casacore::Matrix<double> UVW=casaVBS.uvw_p; std::array<hpg::vis_uvw_fp, 3> hpgUVW; hpg::vis_phase_fp dphaseHPG; unsigned grid_cube=0; // For now, grid on the same plane. // Need to pass the beam offsets phase grad //setting to use pointing table for now Vector<Double> pointingOffsets = awh.getPointingPhaseShift(vb, True); hpg::cf_phase_gradient_t cf_phase_gradient = {(hpg::cf_phase_gradient_fp)pointingOffsets[0], (hpg::cf_phase_gradient_fp)pointingOffsets[1]}; std::array<unsigned, 2> cf_index; Vector<Double>freq = vb.getFrequencies(0); // Int vbSpw = vb.spectralWindows()(0); unsigned hpgIRow=0; // Lets get convindices Vector<Int> convpolmap; Vector<Int> convchanmap; Vector<Int> convrowmap; awh.getConvIndicesHPG(convpolmap, convchanmap, convrowmap, vb, UVW); // convrowmap goes from 0 to nW-1 inclusive uint nconvchan = awh.getFreqValsHPG().nelements(); uint nconvwvals = awh.getWValsHPG().nelements(); //cerr << "vb.spec" << vb.spectralWindows()(0) << "convchanmap" << convchanmap << " nconvchan " << nconvchan << " convrow " << convrowmap << endl; for(unsigned irow=startRow; irow< endRow; irow++) { if (casaVBS.ftmType_p==casa::refim::FTMachine::WEIGHT) { hpgUVW={0.0, 0.0, 0.0}; } else{ hpgUVW={UVW(0,irow), UVW(1,irow), UVW(2,irow)}; } if(!casaVBS.rowFlag_p(irow) ) { for(unsigned ichan=startChan; ichan< endChan; ichan++) { if(((chanMap[ichan]>=0) && (chanMap[ichan]<nGridChan))) { int polelem=0; for(unsigned ipol=0; ipol< nDataPol; ipol++) { if ((polMap(ipol)>=0) && (polMap(ipol)<nGridPol)) { //int visVecElement= polMap[ipol]; int visVecElement=polelem; if (vb.flagCube()(ipol,ichan,irow)==false) { if ((casaVBS.ftmType_p==casa::refim::FTMachine::WEIGHT)|| (casaVBS.ftmType_p==casa::refim::FTMachine::PSF)) { vis[visVecElement] = std::complex<double>(1.0,0.0); } else { vis[visVecElement] = casaVBS.visCube_p(ipol,ichan,irow); } wt[visVecElement]=casaVBS.imagingWeight_p(ichan,irow); } else { vis[visVecElement] = 0.0; wt[visVecElement] = 0.0; } ++polelem; sumwt(polMap[ipol],chanMap[ichan]) += vb.imagingWeight()(ichan, irow); } // if polmap } //ipol uint cindex=0; hpg::vis_frequency_fp frequency = freq[ichan]; if (casaVBS.ftmType_p==casa::refim::FTMachine::PSF || casaVBS.ftmType_p==casa::refim::FTMachine::WEIGHT ) { dphaseHPG = 0.0; //cindex = (0 + nconvwvals - 1) * nconvchan + convchanmap[ichan]; } else{ dphaseHPG = -2.0 * C::pi * dphase[irow] * frequency / C::c; //cindex = (convrowmap[irow] + nconvwvals - 1) * nconvchan + convchanmap[ichan]; } //uint cindex = (convrowmap[irow]+nconvwvals-1)*nconvchan+convchanmap[ichan]; cindex = convchanmap[ichan] * (nconvwvals) + abs(convrowmap[irow]); cf_index = {0,cindex}; // cfindex is rowindex*nconvchan+ convchanmap hpgVB[hpgIRow++] = hpg::VisData<N>(vis,wt,frequency,dphaseHPG,hpgUVW,grid_cube,cf_index ,cf_phase_gradient ); } // if chnamap } // ichan } // if flag } // irow hpgVB.resize(hpgIRow); return hpgVB; } //----------------------------------------------------------------------------------//---- template <unsigned N> void makeMuellerIndexes(const PolMapType& mNdx, const PolMapType& mValues, const unsigned& nGridPol, std::vector<std::array<int, N>>& muellerIndexes) { PolMapType tt; //cerr << "####N " << N << endl; // for(unsigned i=0;i<mNdx.size();i++) // { // cerr << mNdx[i] << endl; // } // cerr << endl; for(unsigned i=0,k=0;i<mNdx.size();i++) { if (mNdx[i].size()>0) { tt.resize(k+1,true); tt[k].resize(mNdx[i].size()); tt[k]=mNdx[i]; k++; } } muellerIndexes.resize(nGridPol); for(unsigned i=0;i<muellerIndexes.size();i++) for(unsigned j=0;j<muellerIndexes[i].size();j++) muellerIndexes[i][j]=-1; ////This is incomprehensible indexing (2023/3/13) but mValues size and tt size may not match ...so for now looping //// over the minimum of the two sizes just to avoid out of bounds indexing !! for(unsigned i=0 ; i<min(mValues.size(), tt.size()) ;i++) { //cerr <<"i= " << i << " SIZES mValues " << mValues[i].size() << " mval[i] "<< mValues[i] << " tt[i] " << tt[i] << endl; for(unsigned j=0;j<mValues[i].size();j++) { muellerIndexes[j][mValues[i][j]%N]=tt[i][j]; //muellerIndexes[j][mValues[i][j]%N]=1; } } /* for(unsigned ir=0;ir<muellerIndexes.size();ir++) { for(unsigned ic=0;ic<muellerIndexes[ir].size();ic++) cerr << muellerIndexes[ir][ic] << " "; cerr << endl; } */ } // //------------------------------------------------------------------------- // void makeAWLists(CFBuffer& cfb, const bool& wbAWP, const int& nw, const double& imRefFreq, const double& spwRefFreq, Vector<Int>& wNdxList, Vector<Int>& spwNdxList, const int vbSPW) { Vector<Double> wVals, fVals; PolMapType mVals, mNdx, conjMVals, conjMNdx; Double fIncr, wIncr; // // The following can be generalized to pick a subset of CFs along // W and SPW axis in the CFB. Perhaps also useful in the longer // run, e.g. when using a super-set CFC not all of which may be // used for a given imaging. // cfb.getCoordList(fVals,wVals,mNdx, mVals, conjMNdx, conjMVals, fIncr, wIncr); // W-pixels in the CFC should be >= w-planes user setting assert(wVals.nelements() >= nw); // Make list of W-CF indexes int nWCFs=(nw==1)?1:wVals.nelements(); wNdxList.resize(nWCFs); for(int i=0;i<nWCFs;i++) wNdxList[i] = i; // cerr << "nWCFs" << nWCFs << " vbSPW " << vbSPW << " fvals " << fVals << endl; // Make list of SPW-CF indexes int nSPWCFs=fVals.nelements(); if (wbAWP==true) { // If a valid SPW ID is given, translate it to the spwNdx for the nearest SPW if ((vbSPW>=0))// && (vbSPW <nSPWCFs)) { int refSPW; std::vector<double> stdList(nSPWCFs); for (int i=0; i<nSPWCFs; i++) stdList[i] = fVals[i]; Double refCFFreq = SynthesisUtils::stdNearestValue(stdList, spwRefFreq, refSPW); spwNdxList.resize(1); spwNdxList[0]=refSPW; } else { spwNdxList.resize(nSPWCFs); for(int i=0;i<nSPWCFs;i++) spwNdxList[i] = i; } } else { // For wbAWP=F, pick up the CF closest to the image reference frequency int refSPW; std::vector<double> stdList(nSPWCFs); for (int i=0; i<nSPWCFs; i++) stdList[i] = fVals[i]; Double refCFFreq = SynthesisUtils::stdNearestValue(stdList, imRefFreq, refSPW); spwNdxList.resize(1); spwNdxList[0]=refSPW; } // cerr << "spwNdxList " << spwNdxList << endl; return; } // //------------------------------------------------------------------------- std::tuple<hpg::opt_t<hpg::Error>, hpg::CFSimpleIndexer> AWVisResamplerHPG::loadCF(VBStore& vbs, AWConvFuncHolder& awh, bool send_to_device) { LogIO log_l(LogOrigin("AWVisResamplerHPG","loadCF")); Int ndatapol = vbs.vb().nCorrelations(); Int ndataChan = vbs.vb().nChannels(); awh.resetHPGConvFuncs(vbs.vb()); Array<Complex> cFunc; if (vbs.ftmType_p != casa::refim::FTMachine::WEIGHT) { cFunc = awh.getConvFuncHPG(); } else{ cFunc = awh.getWeightConvFuncHPG(); } IPosition cshape = cFunc.shape(); uint convSize = cshape[0]; uInt nConvPol = cshape[2]; Vector<Double> cFreqs = awh.getFreqValsHPG(); Vector<Double> cWVals = awh.getWValsHPG(); uint nPol = 2; // we'll try only the RR uint nchan = cFreqs.nelements(); uint nw = cWVals.nelements(); int sampling = awh.getOverSampling(); uint nCF = nchan * (nw); // nBLType, nTime/PA, nW, nFreq, nPol hpg::CFSimpleIndexer cfsi({1,false},{1,false},{nw,true},{nchan,true}, nPol); if (cfArray.oversampling()==0 || cfArray.size() < nCF) { log_l << "Setting cfArray size: " << "nCF: " << nCF << " x " << nPol << " sampling: " << sampling << " convsize " << convSize << LogIO::POST; cfArray.setSize((unsigned)(nCF),(unsigned)sampling); } Bool isCopy; Complex *cfuncPtr = cFunc.getStorage(isCopy); //Matrix<Complex> conjW(convSize, convSize); //Bool isConjCopy; //Complex *conjptr = conjW.getStorage(isConjCopy); unsigned iGrp=0; // HPG Group index for(uint iFreq=0; iFreq < nchan; ++iFreq) // CASA CF Freq-index { // CASA CF W-index uint wcounter = 0; for (Int iW =0; iW < Int(nw); ++iW) { for(uint ipol=0; ipol < nPol; ++ipol) // CASA CF Pol-index { Complex* convptr=NULL; uint ptroffset = ((abs(iW)*nchan+iFreq)*nConvPol+ipol)*convSize*convSize; //uint ptroffset = ((iFreq * nw + iW) * nConvPol + ipol) * convSize * convSize; convptr = (cfuncPtr+ptroffset); /*if (iW < 0) { memcpy(conjptr, convptr, sizeof(Complex)*convSize*convSize); conjW.putStorage(conjptr, isConjCopy); //conjW = conj(conjW); conjptr = conjW.getStorage(isConjCopy); convptr = conjptr; }*/ hpg::CFCellIndex cfCellidx(0,0, 0,iFreq,ipol); std::array<unsigned, 3> index = cfsi.cf_index(cfCellidx); cfArray.resize(iGrp, convSize, convSize); if (send_to_device) { //cerr << "send iFreq " << iFreq << " igrp " << iGrp << " indices " << index[0] << "," << index[1] << "," << index[2] << endl; cfArray.setValues(convptr, iGrp, convSize, convSize, ipol, 0); } } // pol ++iGrp; ++wcounter; }// iW } // ifreq cFunc.putStorage(cfuncPtr, isCopy); hpg::opt_t<hpg::Error> err; if (send_to_device) { err=hpgGridder_p->set_convolution_function(hpg::Device::OpenMP, std::move(cfArray)); } std::tuple<hpg::opt_t<hpg::Error>, hpg::CFSimpleIndexer> retval = std::make_tuple(err, cfsi); return retval; } //-------------------------------------------------------------------------- // // This is a global method, used in AWVRHPG::DataToGrid_impl(). template <unsigned N> hpg::Gridder* AWVisResamplerHPG::initGridder2(const hpg::Device HPGDevice_l, const uInt& NProcs, const hpg::CFArray* cfArray_ptr, const std::array<unsigned, 4>& grid_size, const std::array<double, 2>& grid_scale, const PolMapType& mNdx, const PolMapType& mVals, const PolMapType& conjMNdx, const PolMapType& conjMVals, const int& nAntenna, const int& nChannel // std::vector<std::array<int, N> > mueller_indexes, // std::vector<std::array<int, N> > conjugate_mueller_indexes ) { // // FYI: gridSize{(uInt)nx,(uInt)ny,(uInt)nGridPol,(uInt)nGridChan}; // LogIO log_l(LogOrigin("AWVRHPG", "initGridder2[R&D]")); log_l << "grid_size: " << grid_size[0] << " " << grid_size[1] << " " << grid_size[2] << " " << grid_size[3] << " " << LogIO::POST; log_l << "Per VB: nAnt: " << nAntenna << ", nChannel: " << nChannel << LogIO::POST; if (!hpg::is_initialized()) hpg::initialize(); { std::vector<std::array<int, N> > mueller_indexes,conjugate_mueller_indexes; makeMuellerIndexes<N>(mNdx, mVals, grid_size[2]/*nGridPol*/, mueller_indexes); makeMuellerIndexes<N>(conjMNdx, conjMVals, grid_size[2]/*nGridPol*/, conjugate_mueller_indexes); //size_t max_visibilities_batch_size = 351*2*1000; size_t max_visibilities_batch_size = (nAntenna*(nAntenna-1)/2)*2*nChannel; hpg::rval_t<hpg::Gridder> g; cerr << "M: " << endl; for(unsigned ir=0;ir<mueller_indexes.size();ir++) { for(unsigned ic=0;ic<mueller_indexes[ir].size();ic++) { // mueller_indexes[ir][ic] = 1; cerr << "ir ic " << ir << " " << ic << " " << mueller_indexes[ir][ic] << " "; } cerr << endl; } cerr << "M*: " << endl; for(unsigned ir=0;ir<conjugate_mueller_indexes.size();ir++) { for(unsigned ic=0;ic<conjugate_mueller_indexes[ir].size();ic++) { // conjugate_mueller_indexes[ir][ic] = 0; cerr << "ir ic " << ir << " " << ic << " " << conjugate_mueller_indexes[ir][ic] << " "; } cerr << endl; } // mueller_indexes.resize(N); conjugate_mueller_indexes.resize(N); cerr << "Mueller index shape: " << mueller_indexes.size() << "X" << HPGNPOL << endl; // if (hpgDevice=="serial") Device=hpg::Device::Serial; g = hpg::Gridder::create<N>(HPGDevice_l, NProcs, max_visibilities_batch_size, cfArray_ptr, grid_size, grid_scale, mueller_indexes, conjugate_mueller_indexes); //hpgGridder_p = new hpg::Gridder(hpg::get_value(std::move(g))); return new hpg::Gridder(hpg::get_value(std::move(g))); } }; // This is a global method, used in AWVRHPG::DataToGrid_impl(). template <unsigned N> hpg::Gridder* AWVisResamplerHPG::initGridder3(const hpg::Device HPGDevice_l, const uInt& NProcs, const hpg::CFArray* cfArray_ptr, const std::array<unsigned, 4>& grid_size, const std::array<double, 2>& grid_scale, const int& nAntenna, const int& nChannel ) { // // FYI: gridSize{(uInt)nx,(uInt)ny,(uInt)nGridPol,(uInt)nGridChan}; // LogIO log_l(LogOrigin("AWVRHPG", "initGridder3")); log_l << "grid_size: " << grid_size[0] << " " << grid_size[1] << " " << grid_size[2] << " " << grid_size[3] << " " << LogIO::POST; log_l << "Per VB: nAnt: " << nAntenna << ", nChannel: " << nChannel << LogIO::POST; if (!hpg::is_initialized()) hpg::initialize(); { //size_t max_visibilities_batch_size = 351*2*1000; size_t max_visibilities_batch_size = (nAntenna*(nAntenna-1)/2)*2*nChannel; hpg::rval_t<hpg::Gridder> g; std::vector<std::array<int, N> > mueller_indexes, mueller_conj_indexes; if (N == 2) { mueller_indexes.push_back({0, 1}); mueller_conj_indexes.push_back({1, 0}); } else{ throw(AipsError("HPG does not deal with "+String::toString(N) + " pol ")); } g = hpg::Gridder::create<N>(HPGDevice_l, NProcs, max_visibilities_batch_size, cfArray_ptr, grid_size, grid_scale, mueller_indexes, mueller_indexes); //hpgGridder_p = new hpg::Gridder(hpg::get_value(std::move(g))); return new hpg::Gridder(hpg::get_value(std::move(g))); } }; //#include "HPGLoadCF.cc" #include "HPGLoadCFNew.inc" // // END -- Global functions // -------------------------------------------------------------------------------------- template void AWVisResamplerHPG::DataToGridImpl_p(Array<DComplex>& grid, VBStore& vbs, Matrix<Double>& sumwt,const Bool& dopsf, Bool useConjFreqCF); // __restrict__; template void AWVisResamplerHPG::DataToGridImpl_p(Array<Complex>& grid, VBStore& vbs, Matrix<Double>& sumwt,const Bool& dopsf, Bool useConjFreqCF); // __restrict__; void AWVisResamplerHPG::copy(const AWVisResamplerHPG& other) { AWVisResampler::copy(other); vb2CFBMap_p=new VB2CFBMap(other.getVBRow2CFBMap()); } // Moved the accumulateFromGrid() method to file to play with // multi-threading it to not clutter this file. Both versions // (threaded and non-threaded) are in this file. //#include "accumulateFromGrid.cc" std::tuple<String, hpg::Device> AWVisResamplerHPG::getHPGDevice() { String hpgDevice="cuda"; hpg::Device Device=hpg::Device::Cuda; //hpgDevice=refim::SynthesisUtils::getenv("HPGDEVICE",hpgDevice); if (hpgDevice=="serial") Device=hpg::Device::Serial; return std::make_tuple(hpgDevice,Device); } // // Gather the grid and sum-of-weights from the Device (external to the memory address space), and shut the device. // void AWVisResamplerHPG::finalizeToSky(casacore::Array<casacore::DComplex>& griddedData, casacore::Matrix<casacore::Double>& sumwt) { if(!hpgGridder_p){ cerr << "Calling ResamplerHPG finalize with null pointer...just returning " << endl; return; } LogIO log_l(LogOrigin("AWVisResamplerHPG[R&D]","finalizeToSky(DCompelx)")); log_l << "Finalizing gridding..." << LogIO::POST; hpgGridder_p->shift_grid(ShiftDirection::FORWARD); hpgGridder_p->apply_grid_fft(); hpgGridder_p->shift_grid(ShiftDirection::BACKWARD); GatherGrids(griddedData,sumwt); // size_t size; //auto wgtptr = getSumWeightsPtr(size); //cerr << "@@@SIZE " << size << " sumwt " << ((size >0) ? String::toString(wgtptr.get()[size - 1]) : "null") << endl; // cerr << endl << "HPG::resetGridder()" << endl; hpgGridder_p->reset_grid(); if (hpgGridder_p != NULL) delete hpgGridder_p; hpgGridder_p=NULL; if (isHPGCustodian_p) hpg::finalize(); log_l << "...done finalizing gridding" << LogIO::POST; } ////==================================================================== void AWVisResamplerHPG::GatherGrids(casacore::Array<casacore::DComplex>& griddedData, casacore::Matrix<casacore::Double>& sumwt) { LogIO log_l(LogOrigin("AWVisResamplerHPG[R&D]","GatherGrids(DCompelx)")); { auto gg=hpgGridder_p->grid_values(); std::unique_ptr<hpg::GridWeightArray> wts=hpgGridder_p->grid_weights(); log_l << "Got the grid from the device " << griddedData.shape() << LogIO::POST; cerr << std::setprecision(20) << "Weights shape " << wts->extent(0) << " x " << wts->extent(1) << endl; hpgSoW_p.resize(wts->extent(0)); for (unsigned i=0;i<hpgSoW_p.size();i++) hpgSoW_p[i].resize(wts->extent(1)); for (unsigned i=0;i<hpgSoW_p.size();i++) { for (unsigned j=0;j<hpgSoW_p[i].size();j++) { hpgSoW_p[i][j]=(*wts)(i,j); cerr << (*wts)(i,j) << " "; } cerr << endl; } cerr << "Before: hpgSoW_p, sumwt: " << sumwt << endl; // hpgSoW_p is of type sumofweight_fp (vector<vector<double>>). sumWeight is a Matrix. // sow[i][*] is a Mueller row. i is the index for the polarization product. // wt below is a sum of the weights of all the Mueller elements in a row. if (hpgSoW_p.size() > 0) { for (int i=0; i<sumwt.shape()(0); i++) { double wt=0.0; for (unsigned j=0; j<hpgSoW_p[i].size(); j++) wt += hpgSoW_p[i][j]; for (unsigned j=0; j<sumwt.shape()(1); j++) { sumwt(i,j)=wt; } } } cerr << "After: hpgSoW_p, sumwt: " << sumwt << endl; // for (size_t i = 0; i < 4; ++i) // cerr << gg->extent(i) << " "; IPosition shp = griddedData.shape(),ndx(4); Bool dummy; casacore::DComplex *stor = griddedData.getStorage(dummy); DComplex val, max=casacore::DComplex(0.0); // std::memcpy((void *)(griddedData.getStorage(dummy)), // (void *)(gg->data()), // shp.product()*sizeof(casacore::DComplex)); // To TEST: All the following nested loops could be replaced with // gg->copy_to(hpg::Device::OpenMP, stor); unsigned long k=0; for(ndx[3]=0; ndx[3]<shp[3]; ndx[3]++) for(ndx[2]=0; ndx[2]<shp[2]; ndx[2]++) for(ndx[1]=0; ndx[1]<shp[1]; ndx[1]++) for(ndx[0]=0; ndx[0]<shp[0]; ndx[0]++) { val = (*gg)(ndx[0],ndx[1],ndx[2],ndx[3]); stor[k++] = val; } log_l << "Copied grid to CPU buffer " << dummy << " " << max << LogIO::POST; } }; // // This is the method that can be used via the imaging framework. A // dummy for now. Perhaps the global function below is a better, // design-wise. In which case it should move to SynthesisUltils, so // that it can be used wherever necessary in the system. // void AWVisResamplerHPG::initGridder(const uInt&,//NProcs, const hpg::CFArray*,// cfArray_ptr, const std::array<unsigned, 4>&,//grid_size, const std::array<float, 2>&// grid_scale ) {} void AWVisResamplerHPG::setModelImage(std::shared_ptr<casacore::ImageInterface<casacore::Complex> > modelim){ modelImage_p=modelim; } bool AWVisResamplerHPG::sendModelImage( const std::array<unsigned, 4>& gridSize) { LogIO log_l(LogOrigin("AWVisResamplerHPG[R&D]","sendModelImage")); if (HPGModelImageName_p == "" && !modelImage_p) { //log_l << "Name for model image is blank! No model image sent to the device" << LogIO::WARN << LogIO::POST; return false; } // Open the CASA image and transfer it to hpg::GridValueArray in // a code-block so that the CASA image storage is released at the end // of the block. casacore::CoordinateSystem csys; casacore::TempImage<casacore::DComplex> modelImageGrid; if(Table::isReadable(HPGModelImageName_p)){ log_l << "Loading model image " << "\"" << HPGModelImageName_p << "\"" << LogIO::POST; csys=SynthesisUtils::makeModelGridFromImage(HPGModelImageName_p, modelImageGrid); } else{ csys=modelImage_p->coordinates(); modelImageGrid.resize(modelImage_p->shape()); modelImageGrid.setCoordinateInfo(csys); modelImageGrid.copyData((LatticeExpr<DComplex>) (*modelImage_p)); } // Stop if the model image sname is not the same as the grid shape { for (auto i : {0,1,2,3}) if (modelImageGrid.shape()[i] != gridSize[i]) { log_l << "Model image shape: " << modelImageGrid.shape() << ". " << "Grid shape: [" << gridSize[0] << "," << gridSize[1] << "," << gridSize[2] << "," << gridSize[3] << "]" << LogIO::WARN; log_l << "Model image is of incorrect shape " << LogIO::EXCEPTION << LogIO::POST; } } Bool dummy; assert(hpg::GridValueArray::rank == modelImageGrid.shape().nelements()); std::array<unsigned,hpg::GridValueArray::rank> extents={(unsigned)modelImageGrid.shape()[0], (unsigned)modelImageGrid.shape()[1], (unsigned)modelImageGrid.shape()[2], (unsigned)modelImageGrid.shape()[3]}; casacore::Array<casacore::DComplex> tarr=modelImageGrid.get(); casacore::DComplex *tstor = tarr.getStorage(dummy); std::unique_ptr<hpg::GridValueArray> HPGModelImage; HPGModelImage = hpg::GridValueArray::copy_from("whatever", HPGDevice_p, // target_device //hpg::Device::Cuda, //target_device hpg::Device::OpenMP,//Device host_device, (hpg::GridValueArray::value_type*)tstor, extents); unsigned shape[hpg::GridValueArray::rank]; log_l << "Sending model image of shape "; for (unsigned i=0;i<hpg::GridValueArray::rank;i++) { shape[i]=HPGModelImage->extent(i); log_l << shape[i] << " "; } log_l << LogIO::POST; hpg::opt_t<hpg::Error> err = hpgGridder_p->set_model(hpg::Device::OpenMP,std::move(*HPGModelImage)); // bool success=(err == nullptr); if (err) log_l << "Failed hpg::Gridder::set_model(). Error type: " << static_cast<int>(err->type()) << LogIO::SEVERE; else log_l << "Finished sending image to the device" << LogIO::POST; grid_value_fp norm = 1.0;//((grid_value_fp)(shape[0]*shape[1])); log_l << "Applying (in-place) FFT on the model image. Norm = " << norm << LogIO::POST; // Below is the following sequence of operations on the GPU: shift -> FFT -> shift. // The second shift is equivalent of applying a phase shift corresponding to N/2 pixels. I think... hpgGridder_p->shift_model(ShiftDirection::FORWARD); // <============= shift_model() err = hpgGridder_p->apply_model_fft(norm,FFTSign::NEGATIVE); // <============= apply_model_fft() if (err) log_l << "Gridder::apply_model_fft() failed. Error type: " << static_cast<int>(err->type()) << LogIO::SEVERE; hpgGridder_p->shift_model(ShiftDirection::BACKWARD); // <============= shift_model() log_l << "...finished FFT." << LogIO::POST; return bool(!err); } // //----------------------------------------------------------------------------------- // Template implementation for DataToGrid // template <class T> void AWVisResamplerHPG::DataToGridImpl_p(Array<T>& grid, VBStore& vbs, Matrix<Double>& sumwt,const Bool& dopsf, Bool /*useConjFreqCF*/) { Int nDataChan, nDataPol, nGridPol, nGridChan, nx, ny, nW;//, nCFFreq; Int targetIMPol, rbeg, rend;//, PolnPlane, ConjPlane; Int startChan, endChan; // // Load the VB in the vbsList_p. This list is emptied, once it is // full, in the code at the end of this block. // rbeg = 0; rend = vbs.nRow_p; //cerr << "NROWS " << vbs.nRow_p << endl; if(rend==0) return; rbeg = vbs.beginRow_p; rend = vbs.endRow_p; nx = grid.shape()[0]; ny = grid.shape()[1]; nGridPol = grid.shape()[2]; nGridChan = grid.shape()[3]; nDataPol = vbs.flagCube_p.shape()[0]; nDataChan = vbs.flagCube_p.shape()[1]; //Bool accumCFs=((vbs.uvw_p.nelements() == 0) && dopsf); // Bool Dummy; // Double *freq=vbs.freq_p.getStorage(Dummy); Vector<Double> wVals, fVals; PolMapType mVals, mNdx, conjMVals, conjMNdx; Double fIncr, wIncr; CountedPtr<CFBuffer> cfb = (*vb2CFBMap_p)[0]; // This loads the all-importnat conjMNDx and mNdx maps // cfb->getCoordList(fVals,wVals,mNdx, mVals, conjMNdx, conjMVals, fIncr, wIncr); // runTimeG1_p += timer_p.real(); nW = wVals.nelements(); // timer.mark(); startChan = 0; endChan = nDataChan; // if (accumCFs) {startChan = vbs.startChan_p; endChan = vbs.endChan_p;} // else {startChan = 0; endChan = nDataChan;} Int vbSpw = (vbs.vb_p)->spectralWindows()(0); uInt nVisPol=0;// nRows=rend-rbeg+1, nVisChan=endChan - startChan+1; for(Int ipol=0; ipol< nDataPol; ipol++) { targetIMPol=polMap_p(ipol); if ((targetIMPol>=0) && (targetIMPol<nGridPol)) nVisPol++; } // // Page-in the new CFs and load them on the device upon SPW change. // //hpg::CFSimpleIndexer cfsi({1,false},{1,false},{1,true},{1,true}, 1); // Reload CFs if the SPW changed and the setup is for AW-Projection (wbAWP=T & nW > 1). //bool reloadCFs = (((cachedVBSpw_p != vbSpw) && // when data for a new SPW arrives, // (vbs.nWPlanes_p > 1) && (vbs.wbAWP_p==true)) || // if WB A-term and w-term corrections are requested, or // (hpgGridder_p==NULL)); // if the HPG is uninitialized (first-pass) bool reloadCFs=(hpgGridder_p==NULL) || (cachedVBSpw_p != vbSpw); //TESTOOO //reloadCFs=True; ////////////////// double spwRefFreq = vbs.vb_p->subtableColumns().spectralWindow().refFrequency()(vbSpw); int nVBAntenna = vbs.vb_p->nAntennas(); int nVBChannels = vbs.vb_p->nChannels(); bool do_degrid=(HPGModelImageName_p != "") || modelImage_p; Vector<Int> wNdxList, spwNdxList; if (cachedVBSpw_p != vbSpw) { cachedVBSpw_p = vbSpw; // LogIO log_l(LogOrigin("AWVisResamplerHPG","DataToGrid_impl")); // log_l << "SPW: " << vbSpw << " " << "Field: " << (vbs.vb_p)->fieldId()(0) << LogIO::POST; // Set the flag to re-load and re-send the CFs. if (hpgGridder_p==NULL) { // The grid is always a 4D array: NX x NY x NPol x NChan const std::array<unsigned, 4> gridSize{(uInt)nx,(uInt)ny,(uInt)nGridPol,(uInt)nGridChan}; //const std::array<float, 2> gridScale{(float)uvwScale_p(0), (float)uvwScale_p(1)}; const std::array<double, 2> gridScale{uvwScale_p(0), uvwScale_p(1)}; // std::vector<std::array<int, HPGNPOL> > mueller_indexes,conjugate_mueller_indexes; // makeMuellerIndexes<HPGNPOL>(mNdx, mVals, (uInt)nGridPol, mueller_indexes); // makeMuellerIndexes<HPGNPOL>(conjMNdx, conjMVals, (uInt)nGridPol, conjugate_mueller_indexes); cerr << "####calling INIT " << " mNdx,mVals,conjMNdx,conjMVals,nVBAntenna, nVBChannels " << mNdx << " " << mVals << " " << conjMNdx << " " << conjMVals << " " << nVBAntenna << " " << nVBChannels << endl; hpgGridder_p = initGridder2<HPGNPOL>(HPGDevice_p,1,&cfArray, gridSize, gridScale,mNdx,mVals,conjMNdx,conjMVals, nVBAntenna, nVBChannels); //mueller_indexes,conjugate_mueller_indexes); do_degrid = sendModelImage(gridSize); if (do_degrid) { LogIO log_l(LogOrigin("AWVisResamplerHPG","DataToGrid_impl")); log_l << "Running degrid_grid GPU kernel" << LogIO::POST; } } if (reloadCFs) { // If wplanes == 1, make a list of all SPW CFs. If // wplanes > 1, make a list of all W CFs for the current // SPW. //int tspw=(vbs.nWPlanes_p == 1)?-1:vbSpw; makeAWLists(*cfb, vbs.wbAWP_p, vbs.nWPlanes_p, vbs.imRefFreq_p, spwRefFreq, wNdxList, spwNdxList, vbSpw); // MakeCFArray::makeCFArray(vbs.wbAWP_p, vbs.nWPLanes_p, // ...(skyImage),... // polMap_p, // ...(cfs2_l), // tswp,//vbSpw_l // spwRefFreq, nDataPol, nGridPol, wNdxList,spwNdxList) // MakeCFArray::makeCFArray_p(cfArray, // vbs.paQuant_p.getValue("rad"),// vbPA, // vbs.imRefFreq_p, // *cfb, // nGridPol, // nDataPol, // polMap_p, // wNdxList, // spwNdxList); //cerr << "SPW ID: " << (vbs.vb_p)->spectralWindows()[0] << endl; //cerr << "ImRefFreq, SpwRefFreq: " << vbs.imRefFreq_p << " " << spwRefFreq << endl; //cerr << "WList: "; for(auto& n : wNdxList) cerr << n << ' '; cerr << endl; //cerr << "SPWList: "; for(auto& n : spwNdxList) cerr << n << ' '; cerr << endl; //cfsi = loadCF(vbs, nGridPol, nDataPol,true); // Returns std::tuple<hpg::opt_t<hpg::Error>, hpg::CFSimpleIndexer> auto ret= loadCF(vbs, nGridPol, nDataPol,wNdxList,spwNdxList,true); //auto ret= loadCF(cfb,vbs, nGridPol, nDataPol,wNdxList,spwNdxList,true); cfsi_p=std::get<1>(ret); if (std::get<0>(ret)) { LogIO log_l(LogOrigin("AWVisResamplerHPG","DataToGrid_impl")); log_l << "HPGError in calling set_convolution_function(): " << " " << static_cast<int>(std::get<0>(ret)->type()) << LogIO::EXCEPTION; } } } //cerr << "FIELD ID: " << (vbs.vb_p)->fieldId()[0] << " "; casacore::Vector<casacore::Vector<casacore::Double> > pointingOffsets= cfb->getPointingOffset(); //cerr << "SHAPE of PTING off "<< pointingOffsets.shape() << " max " << pointingOffsets[0] << endl; // rbeg=19; // rend=20; //For a single-vis test //startChan=32; endChan=33; // cerr << chanMap_p << endl; //cerr << "Pointing offsets: " << pointingOffsets << " " << rbeg << " " << rend << " " << startChan << " " << endChan << " " << max(vbs.vb_p->visCube()) << endl; std::vector<hpg::VisData<HPGNPOL> > hpgVB = makeHPGVisBuffer<HPGNPOL>(sumwt, vbs, vb2CFBMap_p, nGridPol, nGridChan, nVisPol, rbeg, rend, startChan, endChan, chanMap_p, polMap_p, conjMNdx, conjMVals, dphase_p,pointingOffsets, cfsi_p); // Add to the list only if the hpgVB holds any data. This guard // is required since in the loop below we also count the number // visibilities gridded (the nVisGridded_p). if (hpgVB.size() > 0) hpgVBList_p.push_back(hpgVB); // // If the vbsList_p is full, send the VBs loaded in vbsList_p for gridding, and empty vbsList_p. // std::move() empties the storage of its argument. // timer_p.mark(); cerr << "hpgVBList_p.size " << hpgVBList_p.size() << " maxVBList " << maxVBList_p << endl; if (hpgVBList_p.size() >= maxVBList_p) { for(unsigned i=0;i<hpgVBList_p.size();i++) { nVisGridded_p += hpgVBList_p[i].size()*HPGNPOL; hpg::opt_t<hpg::Error> err; if (do_degrid) err = hpgGridder_p->degrid_grid_visibilities( hpg::Device::OpenMP, std::move(hpgVBList_p[i]) ); else err = hpgGridder_p->grid_visibilities( hpg::Device::OpenMP, std::move(hpgVBList_p[i]) ); if (err) { LogIO log_l(LogOrigin("AWVisResamplerHPG","DataToGrid_impl")); log_l << "Failed hpg::" << ((do_degrid)?"degrid_grid_visibilities()":"grid_visibilities()") << " Error type: " << static_cast<int>(err->type()) << LogIO::SEVERE; } } hpgVBList_p.resize(0); } griddingTime += timer_p.real(); return; } // //----------------------------------------------------------------------------------- // template <class T> void AWVisResamplerHPG::DataToGridImpl2_p(Array<T>& grid, VBStore& vbs, AWConvFuncHolder& awh, Matrix<Double>& sumwt,const Bool& dopsf) { Int nDataChan, nDataPol, nGridPol, nGridChan, nx, ny, nW;//, nCFFreq; Int targetIMPol, rbeg, rend;//, PolnPlane, ConjPlane; Int startChan, endChan; rbeg = 0; rend = vbs.nRow_p; //cerr << "NROWS " << vbs.nRow_p << endl; if(rend==0) return; rbeg = vbs.beginRow_p; rend = vbs.endRow_p; nx = grid.shape()[0]; ny = grid.shape()[1]; nGridPol = grid.shape()[2]; nGridChan = grid.shape()[3]; nDataPol = vbs.flagCube_p.shape()[0]; nDataChan = vbs.flagCube_p.shape()[1]; // timer.mark(); startChan = 0; endChan = nDataChan; Int vbSpw = (vbs.vb_p)->spectralWindows()(0); uInt nVisPol=0;// nRows=rend-rbeg+1, nVisChan=endChan - startChan+1; for(Int ipol=0; ipol< nDataPol; ipol++) { targetIMPol=polMap_p(ipol); if ((targetIMPol>=0) && (targetIMPol<nGridPol)) nVisPol++; } // bool reloadCFs=(hpgGridder_p==NULL) || (cachedVBSpw_p != vbSpw); //bool reloadCFs=(hpgGridder_p==NULL); //TESTOOO //reloadCFs=True; ////////////////// //cerr << "vbspw" << vbSpw << " cached " << cachedVBSpw_p << endl; double spwRefFreq = vbs.vb_p->subtableColumns().spectralWindow().refFrequency()(vbSpw); int nVBAntenna = vbs.vb_p->nAntennas(); int nVBChannels = vbs.vb_p->nChannels(); bool do_degrid=(HPGModelImageName_p != "") || modelImage_p; Vector<Int> wNdxList, spwNdxList; if (cachedVBSpw_p != vbSpw) { cachedVBSpw_p = vbSpw; // LogIO log_l(LogOrigin("AWVisResamplerHPG","DataToGrid_impl")); // log_l << "SPW: " << vbSpw << " " << "Field: " << (vbs.vb_p)->fieldId()(0) << LogIO::POST; // Set the flag to re-load and re-send the CFs. if (hpgGridder_p==NULL) { // The grid is always a 4D array: NX x NY x NPol x NChan const std::array<unsigned, 4> gridSize{(uInt)nx,(uInt)ny,(uInt)nGridPol,(uInt)nGridChan}; //const std::array<float, 2> gridScale{(float)uvwScale_p(0), (float)uvwScale_p(1)}; const std::array<double, 2> gridScale{uvwScale_p(0), uvwScale_p(1)}; // std::vector<std::array<int, HPGNPOL> > mueller_indexes,conjugate_mueller_indexes; // makeMuellerIndexes<HPGNPOL>(mNdx, mVals, (uInt)nGridPol, mueller_indexes); // makeMuellerIndexes<HPGNPOL>(conjMNdx, conjMVals, (uInt)nGridPol, conjugate_mueller_indexes); //cerr << "####calling INIT3 " << " nVBAntenna, nVBChannels " << " " << nVBAntenna << " " << nVBChannels << endl; hpgGridder_p = initGridder3<HPGNPOL>(HPGDevice_p,1,&cfArray, gridSize, gridScale,nVBAntenna, nVBChannels); //mueller_indexes,conjugate_mueller_indexes); do_degrid = sendModelImage(gridSize); if (do_degrid) { LogIO log_l(LogOrigin("AWVisResamplerHPG","DataToGrid_impl")); log_l << "Running degrid_grid GPU kernel" << LogIO::POST; } } if (reloadCFs) { auto ret= loadCF(vbs, awh, true); //auto ret= loadCF(cfb,vbs, nGridPol, nDataPol,wNdxList,spwNdxList,true); cfsi_p=std::get<1>(ret); if (std::get<0>(ret)) { LogIO log_l(LogOrigin("AWVisResamplerHPG","DataToGrid_impl")); log_l << "HPGError in calling set_convolution_function(): " << " " << static_cast<int>(std::get<0>(ret)->type()) << LogIO::EXCEPTION; } } } //cerr << "FIELD ID: " << (vbs.vb_p)->fieldId()[0] << " "; //casacore::Vector<casacore::Vector<casacore::Double> > pointingOffsets(1); //cerr << "SHAPE of PTING off "<< pointingOffsets.shape() << " max " << pointingOffsets[0] << endl; // rbeg=19; // rend=20; //For a single-vis test //startChan=32; endChan=33; // cerr << chanMap_p << endl; //cerr << "Pointing offsets: " << pointingOffsets << " " << rbeg << " " << rend << " " << startChan << " " << endChan << " " << max(vbs.vb_p->visCube()) << endl; std::vector<hpg::VisData<HPGNPOL> > hpgVB = makeHPGVisBuffer2<HPGNPOL>(sumwt, vbs,awh,nGridPol, nGridChan, rbeg, rend,startChan, endChan,chanMap_p,polMap_p, dphase_p,cfsi_p); // Add to the list only if the hpgVB holds any data. This guard // is required since in the loop below we also count the number // visibilities gridded (the nVisGridded_p). if (hpgVB.size() > 0) hpgVBList_p.push_back(hpgVB); // // If the vbsList_p is full, send the VBs loaded in vbsList_p for gridding, and empty vbsList_p. // std::move() empties the storage of its argument. // timer_p.mark(); //cerr << "hpgVBList_p.size " << hpgVBList_p.size() << " maxVBList " << maxVBList_p << endl; if (hpgVBList_p.size() >= maxVBList_p) { for(unsigned i=0;i<hpgVBList_p.size();i++) { nVisGridded_p += hpgVBList_p[i].size()*HPGNPOL; hpg::opt_t<hpg::Error> err; if (do_degrid) err = hpgGridder_p->degrid_grid_visibilities( hpg::Device::OpenMP, std::move(hpgVBList_p[i]) ); else err = hpgGridder_p->grid_visibilities( hpg::Device::OpenMP, std::move(hpgVBList_p[i]) ); if (err) { LogIO log_l(LogOrigin("AWVisResamplerHPG","DataToGrid_impl")); log_l << "Failed hpg::" << ((do_degrid)?"degrid_grid_visibilities()":"grid_visibilities()") << " Error type: " << static_cast<int>(err->type()) << LogIO::SEVERE; } } hpgVBList_p.resize(0); } griddingTime += timer_p.real(); return; } // template void AWVisResamplerHPG::DataToGridImpl2_p(Array<DComplex>& grid, VBStore& vbs, AWConvFuncHolder& awh, Matrix<Double>& sumwt,const Bool& dopsf); template void AWVisResamplerHPG::DataToGridImpl2_p(Array<Complex>& grid, VBStore& vbs, AWConvFuncHolder& awh, Matrix<Double>& sumwt,const Bool& dopsf); //----------------------------------------------------------------------------------- // std::shared_ptr<std::complex<double>> AWVisResamplerHPG::getGridPtr(size_t& size) const { if (hpgGridder_p) { hpgGridder_p->fence(); size = hpgGridder_p->grid_values_span(); return hpgGridder_p->grid_values_ptr(); } else { size = 0; return std::shared_ptr<std::complex<double>>(); } } std::shared_ptr<double> AWVisResamplerHPG::getSumWeightsPtr(size_t& size) const { if (hpgGridder_p) { hpgGridder_p->fence(); size = hpgGridder_p->grid_weights_span(); return hpgGridder_p->grid_weights_ptr(); } else { size = 0; return std::shared_ptr<double>(); } } using namespace casacore; };// end namespace casa #endif // endif of USE_HPG