changa 3.5
Loading...
Searching...
No Matches
DataManager.h
Go to the documentation of this file.
1
6
7#ifndef DATAMANAGER_H
8#define DATAMANAGER_H
9
10#include <vector>
11#include <map>
12#include <string>
13#include "GenericTreeNode.h"
14#include "ParallelGravity.decl.h"
15#include "lymanwerner.h"
16
17#if CHARM_VERSION > 60401 && CMK_BALANCED_INJECTION_API
18#include "ckBIconfig.h"
19#endif
20
22struct TreePieceDescriptor{
23 TreePiece *treePiece;
25
26 TreePieceDescriptor(){}
27 TreePieceDescriptor(TreePiece *tp_, GenericTreeNode *r){
28 treePiece = tp_;
29 root = r;
30 }
31};
32
33#ifdef CUDA
34
35struct UpdateParticlesStruct{
36 CkCallback *cb;
37 DataManager *dm;
39 int size;
40};
41
42
43// store pointers to flattened buffers if !gpuFree
44struct PendingBuffers {
45 CkVec<CudaMultipoleMoments> *moments;
46 CkVec<CompactPartData> *particles;
47 int chunk;
49 CkCallback *cb;
50};
51
52#endif
53
60class DataManager : public CBase_DataManager {
61 friend class TreePiece;
62 friend class OctTreeBuildPhaseIWorker;
63
65 CProxy_TreePiece treePieces;
66
67protected:
68
70 std::vector<SFC::Key> boundaryKeys;
71
73 std::vector<int> responsibleIndex;
74
76 std::vector<int> particleCounts;
77
79 // holds chare array indices of registered treepieces
80 CkVec<TreePieceDescriptor> registeredTreePieces;
81#ifdef CUDA
82 //CkVec<int> registeredTreePieceIndices;
85 int cumNumReplicatedNodes;
86 int treePiecesDone;
87 int savedChunk;
88 int treePiecesDonePrefetch;
89 int treePiecesDoneLocalComputation;
90 // XXX - assumes that only one chunk can be on the gpu
91 // at a given time
92 int treePiecesDoneRemoteChunkComputation;
93 int treePiecesWantParticlesBack;
96 int treePiecesParticlesUpdated;
97 int savedNumTotalParticles;
98 int savedNumTotalNodes;
99 // keeps track of buckets of particles that were
100 // received during the prefetch and which were subsequently
101 // shipped off to the gpu - XXX
102 // not including cached particles in postfrefetch entities shipped to gpu
103 // since it is hard to count their number given just the pointer to the cache entry
104 // * either do not concern yourself with cached particles
105 // * or for each entry, get key, find bucket node in CM, DM or TPs and get number
106 // for now, the former
107 std::map<NodeKey, int> cachedPartsOnGpu;
108 // local particles that have been copied to the gpu
109 //std::map<NodeKey, int> localPartsOnGpu;
110
111 // TreePiece counter for multi-threaded GPU host buffer copy
112 int treePiecesBufferFilled;
113
114 // can the gpu accept a chunk of remote particles/nodes?
115 bool gpuFree;
116
118 CkCallback *localTransferCallback;
119
120 PendingBuffers *currentChunkBuffers;
121 // queue that stores all pending chunk transfers
122 CkQ<PendingBuffers *> pendingChunkTransferQ;
123
124 // last remote chunk's size in moments and particles
125 int lastChunkMoments;
126 int lastChunkParticles;
128 CudaMultipoleMoments *bufRemoteMoments;
130 CompactPartData *bufRemoteParts;
131
133 CkVec<CudaMultipoleMoments> localMoments;
135 CudaMultipoleMoments *bufLocalMoments;
137 CompactPartData *bufLocalParts;
139 VariablePartData *bufLocalVars;
140
141 // Pointers to particle and tree data on GPU
142 CudaMultipoleMoments *d_localMoments;
143 CudaMultipoleMoments *d_remoteMoments;
144 CompactPartData *d_localParts;
145 CompactPartData *d_remoteParts;
146 VariablePartData *d_localVars;
147 size_t sMoments;
148 size_t sCompactParts;
149 size_t sVarParts;
150
151 int numStreams;
152 cudaStream_t *streams;
153
154#endif
159 CkVec<GenericTreeNode *> nodeTable;
160
167
168public:
169
170 /*
171 ** Cooling
172 */
173 COOL *Cool;
174 /*
175 * LW Feedback
176 */
177 LWDATA *LWData;
183 HMStarLog *hmStarLog;
185 CmiNodeLock lockStarLog;
187 CmiNodeLock lockHMStarLog;
188
189 DataManager(const CkArrayID& treePieceID);
190 DataManager(CkMigrateMessage *);
191
192 void startLocalWalk();
193 void resumeRemoteChunk();
194#ifdef CUDA
195 void createStreams(int _numStreams, const CkCallback& cb);
196 void donePrefetch(int chunk); // serialize remote chunk wrapper
197 void serializeLocalTree();
198
199#ifdef GPU_LOCAL_TREE_WALK
200 void transformLocalTreeRecursive(GenericTreeNode *node, CkVec<CudaMultipoleMoments>& localMoments);
201#endif //GPU_LOCAL_TREE_WALK
202
203 // actual serialization methods
204 PendingBuffers *serializeRemoteChunk(GenericTreeNode *);
205 void serializeLocal(GenericTreeNode *);
206 void transferLocalToGPU(int nParts, GenericTreeNode *node);
207 void freeLocalTreeMemory();
208 void freeRemoteChunkMemory(int chunk);
209 void transferParticleVarsBack();
210 void updateParticles(UpdateParticlesStruct *data);
211 void updateParticlesFreeMemory(UpdateParticlesStruct *data);
212 void initiateNextChunkTransfer();
213 DataManager(){}
214
215#endif
216
217private:
218 void init();
219
220public:
221
222 ~DataManager() {
223 for (unsigned int i = 0; i < nodeTable.length(); i++) {
224 delete nodeTable[i];
225 }
226 nodeTable.clear();
227
228 CoolFinalize(Cool);
229 LymanWernerTableFinalize(LWData);
230 delete starLog;
231 delete hmStarLog;
232 CmiDestroyLock(lockStarLog);
233 CmiDestroyLock(lockHMStarLog);
234#ifdef CUDA
235 for (int i = 0; i < numStreams; i++) {
236 cudaStreamDestroy(streams[i]);
237 }
238 delete[] streams;
239#endif
240 }
241
244 void acceptResponsibleIndex(const int* responsible, const int n,
245 const CkCallback& cb);
254 void acceptFinalKeys(const SFC::Key* keys, const int* responsible, uint64_t* bins, const int n, const CkCallback& cb);
255 void pup(PUP::er& p);
256
257#ifdef CUDA
258 /*
259 std::map<NodeKey, int> &getLocalPartsOnGpuTable(){
260 return localPartsOnGpu;
261 }
262 */
263 std::map<NodeKey, int> &getCachedPartsOnGpuTable(){
264 return cachedPartsOnGpu;
265 }
266#endif
267 // Functions used to create a tree inside the DataManager comprising
268 // all the trees in the TreePieces in the local node
269private:
270 Tree::GenericTreeNode *buildProcessorTree(int n, Tree::GenericTreeNode **gtn);
271 int createLookupRoots(Tree::GenericTreeNode *node, Tree::NodeKey *keys);
272public:
273
278 void notifyPresence(Tree::GenericTreeNode *root, TreePiece *treePiece);
279 void clearRegisteredPieces(const CkCallback& cb);
280 void combineLocalTrees(CkReductionMsg *msg);
281 void getChunks(int &num, Tree::NodeKey *&roots);
282 inline Tree::GenericTreeNode *chunkRootToNode(const Tree::NodeKey k) {
283 NodeLookupType::iterator iter = chunkRootTable.find(k);
284 if (iter != chunkRootTable.end()) return iter->second;
285 else return NULL;
286 }
287 inline Tree::GenericTreeNode *getRoot() { return root; }
288 void initCooling(double dGmPerCcUnit, double dComovingGmPerCcUnit,
289 double dErgPerGmUnit, double dSecUnit, double dKpcUnit,
290 COOLPARAM inParam, const CkCallback& cb);
291 void initStarLog(std::string _fileName, const CkCallback &cb);
292 void initLWData(const CkCallback& cb);
293 void initHMStarLog(std::string _fileName, const CkCallback &cb);
294 void dmCoolTableRead(double *dTableData, int nData, const CkCallback& cb);
295 void CoolingSetTime(double z, // redshift
296 double dTime, // Time
297 const CkCallback& cb);
298 void SetStarCM(double dCenterOfMass[4], const CkCallback& cb);
299 void memoryStats(const CkCallback& cb);
300 void resetReadOnly(Parameters param, const CkCallback &cb);
301
302 public:
303 static Tree::GenericTreeNode *pickNodeFromMergeList(int n, GenericTreeNode **gtn, int &nUnresolved, int &pickedIndex);
304};
305
306inline static void setBIconfig()
307{
308#if CHARM_VERSION > 60401 && CMK_BALANCED_INJECTION_API
309 if (CkMyRank()==0) {
310#define GNI_BI_DEFAULT 64
311 uint16_t cur_bi = ck_get_GNI_BIConfig();
312 if (cur_bi > GNI_BI_DEFAULT) {
313 ck_set_GNI_BIConfig(GNI_BI_DEFAULT);
314 }
315 }
316 if (CkMyPe() == 0)
317 CkPrintf("Balanced injection is set to %d.\n", ck_get_GNI_BIConfig());
318#endif
319}
320
326class ProjectionsControl : public CBase_ProjectionsControl {
327 public:
328 ProjectionsControl() {
329#ifdef CUDA
330 // GPUs are assigned to nodes in a round-robin fashion. This allows the user to define
331 // one virtual node per device and utilize multiple GPUs on a single node
332 // Beacuse devices are assigned per-PE, this is a convenient place to call setDevice
333 // Note that this code has nothing to do with initalizing projections
334 int numGpus;
335 cudaGetDeviceCount(&numGpus);
336 cudaSetDevice(CmiMyNode() % numGpus);
337#endif
338 setBIconfig();
339 LBTurnCommOff();
340#ifndef LB_MANAGER_VERSION
341 // Older Charm++ requires this to avoid excessive delays between successive LBs even
342 // when using AtSync mode
343 LBSetPeriod(0.0);
344#endif
345 }
346 ProjectionsControl(CkMigrateMessage *m) : CBase_ProjectionsControl(m) {
347 setBIconfig();
348 LBTurnCommOff();
349#ifndef LB_MANAGER_VERSION
350 // Older Charm++ requires this to avoid excessive delays between successive LBs even
351 // when using AtSync mode
352 LBSetPeriod(0.0);
353#endif
354 }
355
356 void on(CkCallback cb) {
357 if(CkMyPe() == 0){
358 CkPrintf("\n\n**** PROJECTIONS ON *****\n\n");
359 }
360 traceBegin();
361 contribute(cb);
362 }
363
364 void off(CkCallback cb) {
365 if(CkMyPe() == 0){
366 CkPrintf("\n\n**** PROJECTIONS OFF *****\n\n");
367 }
368 traceEnd();
369 contribute(cb);
370 }
371
372 void pup(PUP::er &p){
373 CBase_ProjectionsControl::pup(p);
374 }
375};
376
377#endif //DATAMANAGER_H
std::map< NodeKey, GenericTreeNode * > NodeLookupType
Definition GenericTreeNode.h:352
KeyType NodeKey
This key is the identification of a node inside the global tree, and it is unique for the node....
Definition GenericTreeNode.h:35
Definition DataManager.h:60
std::vector< int > responsibleIndex
An array identifying which chare is responsible for each interval of keys.
Definition DataManager.h:73
void acceptResponsibleIndex(const int *responsible, const int n, const CkCallback &cb)
Definition DataManager.cpp:69
Tree::GenericTreeNode * root
The root of the combined trees.
Definition DataManager.h:156
Tree::NodeKey * chunkRoots
Nodes currently used as roots for remote computation.
Definition DataManager.h:164
CkVec< TreePieceDescriptor > registeredTreePieces
A list of roots of the TreePieces in this node.
Definition DataManager.h:80
int oldNumChunks
Number of chunks in which the tree was splitted during last combine operation.
Definition DataManager.h:162
static Tree::GenericTreeNode * pickNodeFromMergeList(int n, GenericTreeNode **gtn, int &nUnresolved, int &pickedIndex)
Pick a node out of equivalent nodes on different TreePieces. If one of the nodes is internal to a Tre...
Definition DataManager.cpp:268
void combineLocalTrees(CkReductionMsg *msg)
Build a local tree inside the node.
Definition DataManager.cpp:178
void notifyPresence(Tree::GenericTreeNode *root, TreePiece *treePiece)
Collect roots of treepieces on this node.
Definition DataManager.cpp:151
Tree::NodeLookupType chunkRootTable
Lookup table for the chunkRoots.
Definition DataManager.h:166
CmiNodeLock lockHMStarLog
Lock for accessing hmstarlog from TreePieces.
Definition DataManager.h:187
CmiNodeLock lockStarLog
Lock for accessing starlog from TreePieces.
Definition DataManager.h:185
void getChunks(int &num, Tree::NodeKey *&roots)
return the number of chunks and the roots of the remote walk subtrees.
Definition DataManager.cpp:420
std::vector< int > particleCounts
An array with how many particles are held by each TreePiece when sorted.
Definition DataManager.h:76
std::vector< SFC::Key > boundaryKeys
The array of splitter keys for the sort.
Definition DataManager.h:70
void initCooling(double dGmPerCcUnit, double dComovingGmPerCcUnit, double dErgPerGmUnit, double dSecUnit, double dKpcUnit, COOLPARAM inParam, const CkCallback &cb)
Definition Sph.cpp:136
void SetStarCM(double dCenterOfMass[4], const CkCallback &cb)
DataManager::SetStarCM saves the total mass and center of mass of the star(s) to the COOL struct Cool...
Definition Sph.cpp:281
CkVec< GenericTreeNode * > nodeTable
Definition DataManager.h:159
StarLog * starLog
log of star formation events.
Definition DataManager.h:182
void clearRegisteredPieces(const CkCallback &cb)
Clear registeredTreePieces on this node.
Definition DataManager.cpp:165
void acceptFinalKeys(const SFC::Key *keys, const int *responsible, uint64_t *bins, const int n, const CkCallback &cb)
Definition DataManager.cpp:76
Log of high mass stars in star formation events to be written out to a file.
Definition starform.h:215
Log of star formation events to be written out to a file.
Definition starform.h:193
Fundamental structure that holds particle and tree data.
Definition ParallelGravity.h:755
Base class for tree nodes.
Definition GenericTreeNode.h:59
Particle data needed on the GPU to calculate gravity.
Definition cuda_typedef.h:240
Version of MultipoleMoments using cudatype.
Definition cuda_typedef.h:104
Particle data that gets calculated by the GPU.
Definition cuda_typedef.h:268