changa 3.5
Loading...
Searching...
No Matches
State.h
1#ifndef __STATE_H__
2#define __STATE_H__
3#include "ParallelGravity.h"
4
6class State {
7 public:
13
14 // shifted variable into state. there is an issue of redundancy
15 // here, though. in addition to local state, remote and remote-resume
16 // state also have this variable but have no use for it, since only
17 // a single copy is required.
18 // could have made this the third element in the array below
22
23 // again, redundant variables, since only remote-no-resume
24 // walks use this variable to see how many chunks have
25 // been used
33
35 // requests tied to each bucket (position 0) and chunk (position 1).
37 virtual ~State() {}
38};
39
40#if INTERLIST_VER > 0
41#if defined CUDA
42#include "HostCUDA.h"
43#include "DataManager.h"
44#include "ck128bitHash.h"
45
46class DoubleWalkState;
47
48template<typename T>
49class GenericList{
50 public:
51 CkVec<CkVec<T> > lists;
52 int totalNumInteractions;
53
54 GenericList() : totalNumInteractions(0) {}
55
56 void reset(){
57 // clear all bucket lists:
58 for(int i = 0; i < lists.length(); i++){
59 lists[i].length() = 0;
60 }
61 totalNumInteractions = 0;
62 }
63
64 void free(){
65 for(int i = 0; i < lists.length(); i++){
66 lists[i].free();
67 }
68 lists.free();
69 totalNumInteractions = 0;
70 }
71
72 void init(int numBuckets, int numper){
73 lists.resize(numBuckets);
74 for(int i = 0; i < numBuckets; i++){
75 lists[i].reserve(numper);
76 }
77 }
78
79 CudaRequest *serialize(TreePiece *tp);
80 void getBucketParameters(TreePiece *tp,
81 int bucket,
82 int &bucketStart, int &bucketSize){
83 //std::map<NodeKey, int>&lpref){
84 // bucket is listed in this offload
85 GenericTreeNode *bucketNode = tp->bucketList[bucket];
86
87 bucketSize = bucketNode->lastParticle - bucketNode->firstParticle + 1;
88 bucketStart = bucketNode->bucketArrayIndex;
89 CkAssert(bucketStart >= 0);
90 }
91
92 void getActiveBucketParameters(TreePiece *tp,
93 int bucket,
94 int &bucketStart, int &bucketSize){
95 //std::map<NodeKey, int>&lpref){
96 // bucket is listed in this offload
97 GenericTreeNode *bucketNode = tp->bucketList[bucket];
98 BucketActiveInfo *binfo = &(tp->bucketActiveInfo[bucket]);
99
100 //bucketSize = bucketNode->lastParticle - bucketNode->firstParticle + 1;
101 //bucketStart = bucketNode->bucketArrayIndex;
102 bucketSize = tp->bucketActiveInfo[bucket].size;
103 bucketStart = tp->bucketActiveInfo[bucket].start;
104 CkAssert(bucketStart >= 0);
105 }
106
107 void push_back(int b, T &ilc, DoubleWalkState *state, TreePiece *tp);
108
109
110};
111
112#endif
113
117class DoubleWalkState : public State {
118 public:
126 CkVec<CkVec<OffsetNode> >clists;
128 CkVec<CkVec<LocalPartInfo> >lplists;
130 CkVec<CkVec<RemotePartInfo> >rplists;
131
138 // to tell a remote-resume state from a remote-no-resume state
139 bool resume;
140
141#ifdef CUDA
142 int nodeThreshold;
143 int partThreshold;
144
145 GenericList<ILCell> nodeLists;
146 GenericList<ILPart> particleLists;
147
148 CkVec<CudaMultipoleMoments> *nodes;
149 CkVec<CompactPartData> *particles;
150
151 // during 'small' rungs, buckets are marked when
152 // they are included for computation in the request's
153 // aux. particle array. these markings should be
154 // cleared before the assembly of the next request is
155 // begun. for this purpose, we keep track of buckets
156 // marked during the construction of a request.
157 //
158 // NB: for large rungs, we don't mark buckets while
159 // compiling requests. for such rungs, since all
160 // particles are shipped at the beginning of the iteration,
161 // we have them marked at that time. since all particles,
162 // are available on the gpu for these rungs, we do not clear
163 // the markings when requests are sent out.
164 CkVec<GenericTreeNode *> markedBuckets;
165
168 std::unordered_map<NodeKey,int> nodeMap;
169 std::unordered_map<NodeKey,int> partMap;
170
171 bool nodeOffloadReady(){
172 return nodeLists.totalNumInteractions >= nodeThreshold;
173 }
174
175 bool partOffloadReady(){
176 return particleLists.totalNumInteractions >= partThreshold;
177 }
178
179#endif
180
187 int level;
188
189 DoubleWalkState() : chklists(0), lowestNode(0), level(-1) {
190#ifdef CUDA
191 partMap.reserve(100);
192#endif
193 }
194
195#ifdef HAPI_INSTRUMENT_WRS
196 void nodeListConstructionTimeStart(){
197 nodeListTime = CmiWallTimer();
198 }
199
200 double nodeListConstructionTimeStop(){
201 return CmiWallTimer()-nodeListTime;
202 }
203
204 void partListConstructionTimeStart(){
205 partListTime = CmiWallTimer();
206 }
207
208 double partListConstructionTimeStop(){
209 return CmiWallTimer()-partListTime;
210 }
211
212#endif
213};
214#endif // INTERLIST_VER
215
216class NullState : public State {
217};
218
219#endif
CkVec< UndecidedList > UndecidedLists
Vector of undecided lists, one for each level.
Definition ParallelGravity.h:658
CkQ< OffsetNode > CheckList
Queue of nodes to check for interactions.
Definition ParallelGravity.h:654
Hold state where both the targets and sources are tree walked.
Definition State.h:117
CkVec< CkVec< LocalPartInfo > > lplists
Lists of local particles to be computed. One list for each level.
Definition State.h:128
CkVec< CkVec< OffsetNode > > clists
Lists of cells to be computed. One list for each level.
Definition State.h:126
UndecidedLists undlists
Definition State.h:124
CheckList * chklists
Definition State.h:121
bool * placedRoots
Definition State.h:137
CkVec< CkVec< RemotePartInfo > > rplists
Lists of remote particles to be computed. One list for each level.
Definition State.h:130
GenericTreeNode * lowestNode
Definition State.h:186
Definition State.h:216
Base class for maintaining the state of a tree walk.
Definition State.h:6
int myNumParticlesPending
Keep track of how many buckets are unfinished. XXX note the misnomer.
Definition State.h:21
int * counterArrays[2]
counters to keep track of outstanding remote processor
Definition State.h:36
int bWalkDonePending
Definition State.h:10
int numPendingChunks
Number of pending chunks.
Definition State.h:32
int currentBucket
The bucket we have started to walk.
Definition State.h:12
Base class for tree nodes.
Definition GenericTreeNode.h:59
int lastParticle
An index to the last particle contained by this node, myNumParticles+1 means outside the node.
Definition GenericTreeNode.h:108
int firstParticle
An index for the first particle contained by this node, 0 means outside the node.
Definition GenericTreeNode.h:106