changa 3.5
Loading...
Searching...
No Matches
Orb3dLBCommon.h
1//Abhishek - Extract common code from Orb and mslb_notopo
2#ifndef _ORB3DHELPER_H
3#define _ORB3DHELPER_H
4
5#include <charm++.h>
6#include <charm++.h>
7#include "cklists.h"
8#include "ParallelGravity.h"
9#include "TopoManager.h"
10
11#include "Refiner.h"
12#include "MapStructures.h"
13#include "TaggedVector3D.h"
14#include "Vector3D.h"
15#include "CentralLB.h"
16#define ORB3DLB_NOTOPO_DEBUG(X)
17// #define ORB3DLB_NOTOPO_DEBUG(X) CkPrintf X
18
19void Orb_PrintLBStats(BaseLB::LDStats *stats, int numobjs);
20void write_LB_particles(BaseLB::LDStats* stats, const char *achFileName, bool bFrom);
21
23class PeInfo {
24 public:
25 int idx;
26 double load;
27 double items;
28 PeInfo(int id, double ld, int it) : idx(id), load(ld), items(it) {}
29};
30
33 public:
34 bool operator()(PeInfo& p1, PeInfo& p2) {
35 // This can be done based on load or number of tps assigned to a PE
36 return (p1.load > p2.load);
37 }
38};
39
42 // pointer to stats->to_proc
43 protected:
44 decltype(BaseLB::LDStats::to_proc) *mapping;
45 decltype(BaseLB::LDStats::from_proc) *from;
46
47 CkVec<float> procload;
48
52
55
56 // Greedy strategy to assign TreePieces to PEs on a node.
57 void orbPePartition(vector<Event> *events, vector<OrbObject> &tp, int node,
58 BaseLB::LDStats *stats) {
59
60 std::vector<PeInfo> peinfo;
61 float totalLoad = 0.0;
62 int firstProc = CkNodeFirst(node);
63 int lastProc = firstProc + CkNodeSize(node) - 1;
64 for (int i = firstProc; i <= lastProc; i++) {
65 peinfo.push_back(PeInfo(i, 0.0, 0));
66 }
67 // Make a heap of processors belonging to this node
68 std::make_heap(peinfo.begin(), peinfo.end(), ProcLdGreater());
69
70 int nextProc;
71 for(int i = 0; i < events[XDIM].size(); i++){
72 Event &ev = events[XDIM][i];
73 OrbObject &orb = tp[ev.owner];
74
75 // Pop the least loaded PE from the heap and assign TreePiece to it
76 PeInfo p = peinfo.front();
77 pop_heap(peinfo.begin(), peinfo.end(), ProcLdGreater());
78 peinfo.pop_back();
79
80 nextProc = p.idx;
81
82 if(orb.numParticles > 0){
83 (*mapping)[orb.lbindex] = nextProc;
84 procload[nextProc] += ev.load;
85 p.load += ev.load;
86 p.items += 1;
87 totalLoad += ev.load;
88 } else{
89 int fromPE = (*from)[orb.lbindex];
90 procload[fromPE] += ev.load;
91 }
92
93 peinfo.push_back(p);
94 push_heap(peinfo.begin(), peinfo.end(), ProcLdGreater());
95 }
96 }
97
106 void orbPartition(vector<Event> *events, OrientedBox<float> &box, int nprocs,
107 vector<OrbObject> & tp, BaseLB::LDStats *stats,
108 bool node_partition=false){
109
110 ORB3DLB_NOTOPO_DEBUG(("partition events %d %d %d nprocs %d\n",
111 events[XDIM].size(),
112 events[YDIM].size(),
113 events[ZDIM].size(),
114 nprocs
115 ));
116 int numEvents = events[XDIM].size();
117 CkAssert(numEvents == events[YDIM].size());
118 CkAssert(numEvents == events[ZDIM].size());
119
120 if(numEvents == 0)
121 return;
122
123 if(nprocs == 1){
124 ORB3DLB_NOTOPO_DEBUG(("base: assign %d tps to proc %d\n", numEvents, nextProc));
125 if (!stats->procs[nextProc].available) {
126 nextProc++;
127 return;
128 }
129
130 // If we are doing orb partition at the node level, then call
131 // orbPePartition to assign the treepieces to the PEs belonging to the node.
132 if (node_partition) {
133 orbPePartition(events, tp, nextProc, stats);
134 } else {
135 // direct assignment of tree pieces to processors
136 //if(numEvents > 0) CkAssert(nprocs != 0);
137 float totalLoad = 0.0;
138 for(int i = 0; i < events[XDIM].size(); i++){
139 Event &ev = events[XDIM][i];
140 OrbObject &orb = tp[ev.owner];
141 if(orb.numParticles > 0){
142 (*mapping)[orb.lbindex] = nextProc;
143 totalLoad += ev.load;
144 }
145 else{
146 int fromPE = (*from)[orb.lbindex];
147 if (fromPE < 0 || fromPE >= procload.size()) {
148 CkPrintf("[%d] trying to access fromPe %d nprocs %lu\n", CkMyPe(), fromPE, procload.size());
149 CkAbort("Trying to access a PE which is outside the range\n");
150 }
151 procload[fromPE] += ev.load;
152 }
153 }
154 procload[nextProc] += totalLoad;
155 }
156
157 if(numEvents > 0) nextProc++;
158 return;
159 }
160
161 // find longest dimension
162
163 int longestDim = XDIM;
164 float longestDimLength = box.greater_corner[longestDim] - box.lesser_corner[longestDim];
165 for(int i = YDIM; i <= ZDIM; i++){
166 float thisDimLength = box.greater_corner[i]-box.lesser_corner[i];
167 if(thisDimLength > longestDimLength){
168 longestDimLength = thisDimLength;
169 longestDim = i;
170 }
171 }
172
173 ORB3DLB_NOTOPO_DEBUG(("dimensions %f %f %f longest %d\n",
174 box.greater_corner[XDIM]-box.lesser_corner[XDIM],
175 box.greater_corner[YDIM]-box.lesser_corner[YDIM],
176 box.greater_corner[ZDIM]-box.lesser_corner[ZDIM],
177 longestDim
178 ));
179
180 int nlprocs = nprocs/2;
181 int nrprocs = nprocs-nlprocs;
182
183 float ratio = (1.0*nlprocs)/(1.0*(nlprocs+nrprocs));
184
185 // sum background load on each side of the processor split
186 float bglprocs = 0.0;
187 for(int np = nextProc; np < nextProc + nlprocs; np++)
188 bglprocs += stats->procs[np].bg_walltime;
189 float bgrprocs = 0.0;
190 for(int np = nextProc + nlprocs; np < nextProc + nlprocs + nrprocs; np++)
191 bgrprocs += stats->procs[np].bg_walltime;
192
193 ORB3DLB_NOTOPO_DEBUG(("nlprocs %d nrprocs %d ratio %f\n", nlprocs, nrprocs, ratio));
194
195 int splitIndex = partitionRatioLoad(events[longestDim],ratio,bglprocs,
196 bgrprocs);
197 if(splitIndex == numEvents) {
198 ORB3DLB_NOTOPO_DEBUG(("evenly split 0 load\n"));
199 splitIndex = splitIndex/2;
200 }
201 int nleft = splitIndex;
202 int nright = numEvents-nleft;
203
204#if 0
205 if(nright < nrprocs) { // at least one piece per processor
206 nright = nrprocs;
207 nleft = splitIndex = numEvents-nright;
208 CkAssert(nleft >= nlprocs);
209 }
210 else if(nleft < nlprocs) {
211 nleft = splitIndex = nlprocs;
212 nright = numEvents-nleft;
213 CkAssert(nright >= nrprocs);
214 }
215#endif
216
217 if(nleft > nlprocs*maxPieceProc) {
218 nleft = splitIndex = (int) (nlprocs*maxPieceProc);
219 nright = numEvents-nleft;
220 }
221 else if (nright > nrprocs*maxPieceProc) {
222 nright = (int) (nrprocs*maxPieceProc);
223 nleft = splitIndex = numEvents-nright;
224 }
225 CkAssert(splitIndex >= 0);
226 CkAssert(splitIndex < numEvents);
227
228 OrientedBox<float> leftBox;
229 OrientedBox<float> rightBox;
230
231 leftBox = rightBox = box;
232 float splitPosition = events[longestDim][splitIndex].position;
233 leftBox.greater_corner[longestDim] = splitPosition;
234 rightBox.lesser_corner[longestDim] = splitPosition;
235
236 // classify events
237 for(int i = 0; i < splitIndex; i++){
238 Event &ev = events[longestDim][i];
239 CkAssert(ev.owner >= 0);
240 CkAssert(tp[ev.owner].partition == INVALID_PARTITION);
241 tp[ev.owner].partition = LEFT_PARTITION;
242 }
243 for(int i = splitIndex; i < numEvents; i++){
244 Event &ev = events[longestDim][i];
245 CkAssert(ev.owner >= 0);
246 CkAssert(tp[ev.owner].partition == INVALID_PARTITION);
247 tp[ev.owner].partition = RIGHT_PARTITION;
248 }
249
250 vector<Event> leftEvents[NDIMS];
251 vector<Event> rightEvents[NDIMS];
252
253 for(int i = 0; i < NDIMS; i++){
254 if(i == longestDim){
255 leftEvents[i].resize(nleft);
256 rightEvents[i].resize(nright);
257 }
258 else{
259 leftEvents[i].reserve(nleft);
260 rightEvents[i].reserve(nright);
261 }
262 }
263 // copy events of split dimension
264 memcpy(&leftEvents[longestDim][0],&events[longestDim][0],sizeof(Event)*nleft);
265 memcpy(&rightEvents[longestDim][0],&events[longestDim][splitIndex],sizeof(Event)*nright);
266
267 // copy events of other dimensions
268 for(int i = XDIM; i <= ZDIM; i++){
269 if(i == longestDim) continue;
270 for(int j = 0; j < numEvents; j++){
271 Event &ev = events[i][j];
272 CkAssert(ev.owner >= 0);
273 OrbObject &orb = tp[ev.owner];
274 CkAssert(orb.partition != INVALID_PARTITION);
275 if(orb.partition == LEFT_PARTITION) leftEvents[i].push_back(ev);
276 else if(orb.partition == RIGHT_PARTITION) rightEvents[i].push_back(ev);
277 }
278 }
279
280 // cleanup
281 // next, reset the ownership information in the
282 // OrbObjects, so that the next invocation may use
283 // the same locations for its book-keeping
284 vector<Event> &eraseVec = events[longestDim];
285 for(int i = 0; i < numEvents; i++){
286 Event &ev = eraseVec[i];
287 CkAssert(ev.owner >= 0);
288 OrbObject &orb = tp[ev.owner];
289 CkAssert(orb.partition != INVALID_PARTITION);
290 orb.partition = INVALID_PARTITION;
291 }
292
293 // free events from parent node,
294 // since they are not needed anymore
295 // (we have partition all events into the
296 // left and right event subsets)
297 for(int i = 0; i < NDIMS; i++){
298 //events[i].free();
299 vector<Event>().swap(events[i]);
300 }
301 orbPartition(leftEvents,leftBox,nlprocs,tp, stats, node_partition);
302 orbPartition(rightEvents,rightBox,nrprocs,tp, stats, node_partition);
303 }
304
311 void orbPrepare(vector<Event> *tpEvents, OrientedBox<float> &box, int
312 numobjs, BaseLB::LDStats * stats, bool node_partition=false){
313
314 int nmig = stats->n_migrateobjs;
315 if(dMaxBalance < 1.0)
316 dMaxBalance = 1.0;
317
318 // If using node based orb partition, then the maxPieceProc is total
319 // migratable objs / total number of node.
320 if (node_partition) {
321 maxPieceProc = dMaxBalance * nmig / CkNumNodes();
322 } else {
323 maxPieceProc = dMaxBalance*nmig/stats->nprocs();
324 }
325
326 if(maxPieceProc < 1.0)
327 maxPieceProc = 1.01;
328
329 CkAssert(tpEvents[XDIM].size() == numobjs);
330 CkAssert(tpEvents[YDIM].size() == numobjs);
331 CkAssert(tpEvents[ZDIM].size() == numobjs);
332
333 mapping = &stats->to_proc;
334 from = &stats->from_proc;
335
336 CkPrintf("[Orb3dLB_notopo] sorting\n");
337 for(int i = 0; i < NDIMS; i++){
338 sort(tpEvents[i].begin(),tpEvents[i].end());
339 }
340
341 box.lesser_corner.x = tpEvents[XDIM][0].position;
342 box.lesser_corner.y = tpEvents[YDIM][0].position;
343 box.lesser_corner.z = tpEvents[ZDIM][0].position;
344
345 box.greater_corner.x = tpEvents[XDIM][numobjs-1].position;
346 box.greater_corner.y = tpEvents[YDIM][numobjs-1].position;
347 box.greater_corner.z = tpEvents[ZDIM][numobjs-1].position;
348
349 nextProc = 0;
350
351 procload.resize(stats->nprocs());
352 for(int i = 0; i < stats->nprocs(); i++){
353 procload[i] = stats->procs[i].bg_walltime;
354 }
355
356 }
357
358 void refine(BaseLB::LDStats *stats, int numobjs){
359#ifdef DO_REFINE
360 int *from_procs = Refiner::AllocProcs(stats->nprocs(), stats);
361 int *to_procs = Refiner::AllocProcs(stats->nprocs(), stats);
362#endif
363
364 for(int i = 0; i < numobjs; i++){
365#ifdef DO_REFINE
366 int pe = stats->to_proc[i];
367 from_procs[i] = pe;
368 to_procs[i] = pe;
369#endif
370 }
371
372 int numRefineMigrated = 0;
373#ifdef DO_REFINE
374 CkPrintf("[orb3dlb_notopo] refine\n");
375 Refiner refiner(1.050);
376 refiner.Refine(stats->nprocs(),stats,from_procs,to_procs);
377
378 for(int i = 0; i < numobjs; i++){
379 if(to_procs[i] != from_procs[i]) numRefineMigrated++;
380 stats->to_proc[i] = to_procs[i];
381 }
382#endif
383
384 Orb_PrintLBStats(stats, numobjs);
385
386#ifdef DO_REFINE
387 // Free the refine buffers
388 Refiner::FreeProcs(from_procs);
389 Refiner::FreeProcs(to_procs);
390#endif
391
392 }
393
402 int partitionRatioLoad(vector<Event> &events, float ratio, float bglp, float bgrp){
403
404 float approxBgPerEvent = (bglp + bgrp) / events.size();
405 float totalLoad = bglp + bgrp;
406 for(int i = 0; i < events.size(); i++){
407 totalLoad += events[i].load;
408 }
409 //CkPrintf("************************************************************\n");
410 //CkPrintf("partitionEvenLoad start %d end %d total %f\n", tpstart, tpend, totalLoad);
411 float perfectLoad = ratio * totalLoad;
412 ORB3DLB_NOTOPO_DEBUG(("partitionRatioLoad bgl %f bgr %f\n",
413 bglp, bgrp));
414 int splitIndex = 0;
415 float prevLoad = 0.0;
416 float leftLoadAtSplit = 0.0;
417 for(splitIndex = 0; splitIndex < events.size(); splitIndex++){
418
419 leftLoadAtSplit += events[splitIndex].load + approxBgPerEvent;
420
421 if (leftLoadAtSplit > perfectLoad) {
422 if ( fabs(leftLoadAtSplit - perfectLoad) < fabs(prevLoad - perfectLoad) ) {
423 splitIndex++;
424 }
425 else {
426 leftLoadAtSplit = prevLoad;
427 }
428 break;
429 }
430 prevLoad = leftLoadAtSplit;
431 }
432
433 ORB3DLB_NOTOPO_DEBUG(("partitionEvenLoad mid %d lload %f rload %f ratio %f\n", splitIndex, leftLoadAtSplit, totalLoad - leftLoadAtSplit, leftLoadAtSplit / totalLoad));
434 return splitIndex;
435 }
436
437}; //end class
438
439#endif
Common methods among Orb3d class load balancers.
Definition Orb3dLBCommon.h:41
int nextProc
index of first processor of the group we are considering
Definition Orb3dLBCommon.h:54
int partitionRatioLoad(vector< Event > &events, float ratio, float bglp, float bgrp)
Given a vector of Events, find a split that partitions them into two partitions with a given ratio of...
Definition Orb3dLBCommon.h:402
double maxPieceProc
Definition Orb3dLBCommon.h:51
void orbPartition(vector< Event > *events, OrientedBox< float > &box, int nprocs, vector< OrbObject > &tp, BaseLB::LDStats *stats, bool node_partition=false)
Recursively partition treepieces among processors by bisecting the load in orthogonal directions.
Definition Orb3dLBCommon.h:106
void orbPrepare(vector< Event > *tpEvents, OrientedBox< float > &box, int numobjs, BaseLB::LDStats *stats, bool node_partition=false)
Prepare structures for the ORB partition.
Definition Orb3dLBCommon.h:311
Hold information about Pe load and number of objects.
Definition Orb3dLBCommon.h:23
Utility class for sorting processor loads.
Definition Orb3dLBCommon.h:32
structure for sorting TreePieces in one dimension for load balancing.
Definition MapStructures.h:36
float load
load of this TreePiece
Definition MapStructures.h:40
int owner
Index within TreePiece array or stats->objData array of this TreePiece.
Definition MapStructures.h:38
data for Orb3D load balancing.
Definition MapStructures.h:80
int lbindex
index into LB stats->objData
Definition MapStructures.h:83
int numParticles
Particles in the TreePiece.
Definition MapStructures.h:87