106 void orbPartition(vector<Event> *events, OrientedBox<float> &box,
int nprocs,
107 vector<OrbObject> & tp, BaseLB::LDStats *stats,
108 bool node_partition=
false){
110 ORB3DLB_NOTOPO_DEBUG((
"partition events %d %d %d nprocs %d\n",
116 int numEvents = events[XDIM].size();
117 CkAssert(numEvents == events[YDIM].size());
118 CkAssert(numEvents == events[ZDIM].size());
124 ORB3DLB_NOTOPO_DEBUG((
"base: assign %d tps to proc %d\n", numEvents,
nextProc));
125 if (!stats->procs[
nextProc].available) {
132 if (node_partition) {
133 orbPePartition(events, tp,
nextProc, stats);
137 float totalLoad = 0.0;
138 for(
int i = 0; i < events[XDIM].size(); i++){
139 Event &ev = events[XDIM][i];
143 totalLoad += ev.
load;
146 int fromPE = (*from)[orb.
lbindex];
147 if (fromPE < 0 || fromPE >= procload.size()) {
148 CkPrintf(
"[%d] trying to access fromPe %d nprocs %lu\n", CkMyPe(), fromPE, procload.size());
149 CkAbort(
"Trying to access a PE which is outside the range\n");
151 procload[fromPE] += ev.
load;
163 int longestDim = XDIM;
164 float longestDimLength = box.greater_corner[longestDim] - box.lesser_corner[longestDim];
165 for(
int i = YDIM; i <= ZDIM; i++){
166 float thisDimLength = box.greater_corner[i]-box.lesser_corner[i];
167 if(thisDimLength > longestDimLength){
168 longestDimLength = thisDimLength;
173 ORB3DLB_NOTOPO_DEBUG((
"dimensions %f %f %f longest %d\n",
174 box.greater_corner[XDIM]-box.lesser_corner[XDIM],
175 box.greater_corner[YDIM]-box.lesser_corner[YDIM],
176 box.greater_corner[ZDIM]-box.lesser_corner[ZDIM],
180 int nlprocs = nprocs/2;
181 int nrprocs = nprocs-nlprocs;
183 float ratio = (1.0*nlprocs)/(1.0*(nlprocs+nrprocs));
186 float bglprocs = 0.0;
188 bglprocs += stats->procs[np].bg_walltime;
189 float bgrprocs = 0.0;
191 bgrprocs += stats->procs[np].bg_walltime;
193 ORB3DLB_NOTOPO_DEBUG((
"nlprocs %d nrprocs %d ratio %f\n", nlprocs, nrprocs, ratio));
197 if(splitIndex == numEvents) {
198 ORB3DLB_NOTOPO_DEBUG((
"evenly split 0 load\n"));
199 splitIndex = splitIndex/2;
201 int nleft = splitIndex;
202 int nright = numEvents-nleft;
205 if(nright < nrprocs) {
207 nleft = splitIndex = numEvents-nright;
208 CkAssert(nleft >= nlprocs);
210 else if(nleft < nlprocs) {
211 nleft = splitIndex = nlprocs;
212 nright = numEvents-nleft;
213 CkAssert(nright >= nrprocs);
219 nright = numEvents-nleft;
223 nleft = splitIndex = numEvents-nright;
225 CkAssert(splitIndex >= 0);
226 CkAssert(splitIndex < numEvents);
228 OrientedBox<float> leftBox;
229 OrientedBox<float> rightBox;
231 leftBox = rightBox = box;
232 float splitPosition = events[longestDim][splitIndex].position;
233 leftBox.greater_corner[longestDim] = splitPosition;
234 rightBox.lesser_corner[longestDim] = splitPosition;
237 for(
int i = 0; i < splitIndex; i++){
238 Event &ev = events[longestDim][i];
239 CkAssert(ev.
owner >= 0);
240 CkAssert(tp[ev.
owner].partition == INVALID_PARTITION);
241 tp[ev.
owner].partition = LEFT_PARTITION;
243 for(
int i = splitIndex; i < numEvents; i++){
244 Event &ev = events[longestDim][i];
245 CkAssert(ev.
owner >= 0);
246 CkAssert(tp[ev.
owner].partition == INVALID_PARTITION);
247 tp[ev.
owner].partition = RIGHT_PARTITION;
250 vector<Event> leftEvents[NDIMS];
251 vector<Event> rightEvents[NDIMS];
253 for(
int i = 0; i < NDIMS; i++){
255 leftEvents[i].resize(nleft);
256 rightEvents[i].resize(nright);
259 leftEvents[i].reserve(nleft);
260 rightEvents[i].reserve(nright);
264 memcpy(&leftEvents[longestDim][0],&events[longestDim][0],
sizeof(
Event)*nleft);
265 memcpy(&rightEvents[longestDim][0],&events[longestDim][splitIndex],
sizeof(
Event)*nright);
268 for(
int i = XDIM; i <= ZDIM; i++){
269 if(i == longestDim)
continue;
270 for(
int j = 0; j < numEvents; j++){
271 Event &ev = events[i][j];
272 CkAssert(ev.
owner >= 0);
274 CkAssert(orb.partition != INVALID_PARTITION);
275 if(orb.partition == LEFT_PARTITION) leftEvents[i].push_back(ev);
276 else if(orb.partition == RIGHT_PARTITION) rightEvents[i].push_back(ev);
284 vector<Event> &eraseVec = events[longestDim];
285 for(
int i = 0; i < numEvents; i++){
286 Event &ev = eraseVec[i];
287 CkAssert(ev.
owner >= 0);
289 CkAssert(orb.partition != INVALID_PARTITION);
290 orb.partition = INVALID_PARTITION;
297 for(
int i = 0; i < NDIMS; i++){
299 vector<Event>().swap(events[i]);
301 orbPartition(leftEvents,leftBox,nlprocs,tp, stats, node_partition);
302 orbPartition(rightEvents,rightBox,nrprocs,tp, stats, node_partition);
311 void orbPrepare(vector<Event> *tpEvents, OrientedBox<float> &box,
int
312 numobjs, BaseLB::LDStats * stats,
bool node_partition=
false){
314 int nmig = stats->n_migrateobjs;
315 if(dMaxBalance < 1.0)
320 if (node_partition) {
329 CkAssert(tpEvents[XDIM].size() == numobjs);
330 CkAssert(tpEvents[YDIM].size() == numobjs);
331 CkAssert(tpEvents[ZDIM].size() == numobjs);
333 mapping = &stats->to_proc;
334 from = &stats->from_proc;
336 CkPrintf(
"[Orb3dLB_notopo] sorting\n");
337 for(
int i = 0; i < NDIMS; i++){
338 sort(tpEvents[i].begin(),tpEvents[i].end());
341 box.lesser_corner.x = tpEvents[XDIM][0].position;
342 box.lesser_corner.y = tpEvents[YDIM][0].position;
343 box.lesser_corner.z = tpEvents[ZDIM][0].position;
345 box.greater_corner.x = tpEvents[XDIM][numobjs-1].position;
346 box.greater_corner.y = tpEvents[YDIM][numobjs-1].position;
347 box.greater_corner.z = tpEvents[ZDIM][numobjs-1].position;
351 procload.resize(stats->nprocs());
352 for(
int i = 0; i < stats->nprocs(); i++){
353 procload[i] = stats->procs[i].bg_walltime;
404 float approxBgPerEvent = (bglp + bgrp) / events.size();
405 float totalLoad = bglp + bgrp;
406 for(
int i = 0; i < events.size(); i++){
407 totalLoad += events[i].load;
411 float perfectLoad = ratio * totalLoad;
412 ORB3DLB_NOTOPO_DEBUG((
"partitionRatioLoad bgl %f bgr %f\n",
415 float prevLoad = 0.0;
416 float leftLoadAtSplit = 0.0;
417 for(splitIndex = 0; splitIndex < events.size(); splitIndex++){
419 leftLoadAtSplit += events[splitIndex].load + approxBgPerEvent;
421 if (leftLoadAtSplit > perfectLoad) {
422 if ( fabs(leftLoadAtSplit - perfectLoad) < fabs(prevLoad - perfectLoad) ) {
426 leftLoadAtSplit = prevLoad;
430 prevLoad = leftLoadAtSplit;
433 ORB3DLB_NOTOPO_DEBUG((
"partitionEvenLoad mid %d lload %f rload %f ratio %f\n", splitIndex, leftLoadAtSplit, totalLoad - leftLoadAtSplit, leftLoadAtSplit / totalLoad));