5#include <cuda_runtime.h>
8#define THREADS_PER_BLOCK 128
10#ifdef GPU_LOCAL_TREE_WALK
11#define THREADS_PER_WARP 32
12#define WARPS_PER_BLOCK (THREADS_PER_BLOCK / THREADS_PER_WARP)
13#define WARP_INDEX (threadIdx.x >> 5)
16#ifdef CUDA_2D_TB_KERNEL
17#define PARTS_PER_BLOCK 16
18#define NODES_PER_BLOCK (THREADS_PER_BLOCK/PARTS_PER_BLOCK)
20#define THREADS_PER_BLOCK_PART 128
21#define PARTS_PER_BLOCK_PART 16
22#define NODES_PER_BLOCK_PART (THREADS_PER_BLOCK_PART/PARTS_PER_BLOCK_PART)
26#define NUM_INIT_MOMENT_INTERACTIONS_PER_BUCKET 100
27#define NUM_INIT_PARTICLE_INTERACTIONS_PER_BUCKET 100
79#ifdef GPU_LOCAL_TREE_WALK
99void allocatePinnedHostMemory(
void **,
size_t);
100void freePinnedHostMemory(
void *);
102void DataManagerTransferLocalTree(
void *moments,
size_t sMoments,
103 void *compactParts,
size_t sCompactParts,
104 void *varParts,
size_t sVarParts,
105 void **d_localMoments,
void **d_compactParts,
void **d_varParts,
106 cudaStream_t stream,
int numParticles,
108void DataManagerTransferRemoteChunk(
void *moments,
size_t sMoments,
109 void *compactParts,
size_t sCompactParts,
110 void **d_remoteMoments,
void **d_remoteParts,
114void TransferParticleVarsBack(
VariablePartData *hostBuffer,
size_t size,
void *d_varParts, cudaStream_t stream,
void *cb);
116void TreePieceCellListDataTransferLocal(CudaRequest *data);
117void TreePieceCellListDataTransferRemote(CudaRequest *data);
118void TreePieceCellListDataTransferRemoteResume(CudaRequest *data);
121void TreePiecePartListDataTransferLocal(CudaRequest *data);
122void TreePiecePartListDataTransferLocalSmallPhase(CudaRequest *data,
CompactPartData *parts,
int len);
123void TreePiecePartListDataTransferRemote(CudaRequest *data);
124void TreePiecePartListDataTransferRemoteResume(CudaRequest *data);
126void DummyKernel(
void *cb);
float cudatype
floating point type on the GPU
Definition cuda_typedef.h:12
Particle data needed on the GPU to calculate gravity.
Definition cuda_typedef.h:240
Version of MultipoleMoments using cudatype.
Definition cuda_typedef.h:104
Particle data that gets calculated by the GPU.
Definition cuda_typedef.h:268
Device memory pointers used by most functions in HostCUDA.
Definition HostCUDA.h:92
Data and parameters for requesting gravity calculations on the GPU.
Definition HostCUDA.h:31
void * list
can either be a ILCell* or an ILPart*
Definition HostCUDA.h:47
cudatype fperiod
Definition HostCUDA.h:72
int * bucketMarkers
Definition HostCUDA.h:48
cudaStream_t stream
Definition HostCUDA.h:34
void * state
Definition HostCUDA.h:70
void * tp
Definition HostCUDA.h:59
void * missedNodes
pointer to off-processor Node/Particle buffer.
Definition HostCUDA.h:62
int * bucketStarts
Definition HostCUDA.h:51
bool remote
is this a remote or local computation?
Definition HostCUDA.h:78
CudaMultipoleMoments * d_localMoments
for accessing device memory
Definition HostCUDA.h:37
void * cb
Definition HostCUDA.h:69
int * affectedBuckets
these buckets were finished in this work request
Definition HostCUDA.h:68
size_t sMissed
Size of the off-processor data buffer.
Definition HostCUDA.h:65
bool node
is this a node or particle computation request?
Definition HostCUDA.h:76
int numInteractions
Definition HostCUDA.h:55
int numBucketsPlusOne
Definition HostCUDA.h:57
int * bucketSizes
Definition HostCUDA.h:53