changa 3.5
Loading...
Searching...
No Matches
HostCUDA.h
1#ifndef _HOST_CUDA_H_
2#define _HOST_CUDA_H_
3
4
5#include <cuda_runtime.h>
6#include "cuda_typedef.h"
7
8#define THREADS_PER_BLOCK 128
9
10#ifdef GPU_LOCAL_TREE_WALK
11#define THREADS_PER_WARP 32
12#define WARPS_PER_BLOCK (THREADS_PER_BLOCK / THREADS_PER_WARP)
13#define WARP_INDEX (threadIdx.x >> 5)
14#endif //GPU_LOCAL_TREE_WALK
15
16#ifdef CUDA_2D_TB_KERNEL
17#define PARTS_PER_BLOCK 16
18#define NODES_PER_BLOCK (THREADS_PER_BLOCK/PARTS_PER_BLOCK)
19
20#define THREADS_PER_BLOCK_PART 128
21#define PARTS_PER_BLOCK_PART 16
22#define NODES_PER_BLOCK_PART (THREADS_PER_BLOCK_PART/PARTS_PER_BLOCK_PART)
23#endif
24
25// FIXME - find appropriate values
26#define NUM_INIT_MOMENT_INTERACTIONS_PER_BUCKET 100
27#define NUM_INIT_PARTICLE_INTERACTIONS_PER_BUCKET 100
28
31typedef struct _CudaRequest{
34 cudaStream_t stream;
35
38 CudaMultipoleMoments *d_remoteMoments;
39 CompactPartData *d_localParts;
40 CompactPartData *d_remoteParts;
41 VariablePartData *d_localVars;
42 size_t sMoments;
43 size_t sCompactParts;
44 size_t sVarParts;
45
47 void *list;
59 void *tp;
63 void *missedParts;
65 size_t sMissed;
66
69 void *cb;
70 void *state;
73
74 // TODO: remove these later if we don't use COSMO_PRINT_BK.
76 bool node;
78 bool remote;
79#ifdef GPU_LOCAL_TREE_WALK
80 int firstParticle;
81 int lastParticle;
82 int rootIdx;
83 cosmoType theta;
84 cosmoType thetaMono;
85 int nReplicas;
86 cudatype fperiodY; // Support periodic boundary condition in more dimensions
87 cudatype fperiodZ; // Support periodic boundary condition in more dimensions
88#endif //GPU_LOCAL_TREE_WALK
89}CudaRequest;
90
92typedef struct _CudaDevPtr{
93 void *d_list;
94 int *d_bucketMarkers;
95 int *d_bucketStarts;
96 int *d_bucketSizes;
97}CudaDevPtr;
98
99void allocatePinnedHostMemory(void **, size_t);
100void freePinnedHostMemory(void *);
101
102void DataManagerTransferLocalTree(void *moments, size_t sMoments,
103 void *compactParts, size_t sCompactParts,
104 void *varParts, size_t sVarParts,
105 void **d_localMoments, void **d_compactParts, void **d_varParts,
106 cudaStream_t stream, int numParticles,
107 void *callback);
108void DataManagerTransferRemoteChunk(void *moments, size_t sMoments,
109 void *compactParts, size_t sCompactParts,
110 void **d_remoteMoments, void **d_remoteParts,
111 cudaStream_t stream,
112 void *callback);
113
114void TransferParticleVarsBack(VariablePartData *hostBuffer, size_t size, void *d_varParts, cudaStream_t stream, void *cb);
115
116void TreePieceCellListDataTransferLocal(CudaRequest *data);
117void TreePieceCellListDataTransferRemote(CudaRequest *data);
118void TreePieceCellListDataTransferRemoteResume(CudaRequest *data);
119
120
121void TreePiecePartListDataTransferLocal(CudaRequest *data);
122void TreePiecePartListDataTransferLocalSmallPhase(CudaRequest *data, CompactPartData *parts, int len);
123void TreePiecePartListDataTransferRemote(CudaRequest *data);
124void TreePiecePartListDataTransferRemoteResume(CudaRequest *data);
125
126void DummyKernel(void *cb);
127
128#endif
float cudatype
floating point type on the GPU
Definition cuda_typedef.h:12
Particle data needed on the GPU to calculate gravity.
Definition cuda_typedef.h:240
Version of MultipoleMoments using cudatype.
Definition cuda_typedef.h:104
Particle data that gets calculated by the GPU.
Definition cuda_typedef.h:268
Device memory pointers used by most functions in HostCUDA.
Definition HostCUDA.h:92
Data and parameters for requesting gravity calculations on the GPU.
Definition HostCUDA.h:31
void * list
can either be a ILCell* or an ILPart*
Definition HostCUDA.h:47
cudatype fperiod
Definition HostCUDA.h:72
int * bucketMarkers
Definition HostCUDA.h:48
cudaStream_t stream
Definition HostCUDA.h:34
void * state
Definition HostCUDA.h:70
void * tp
Definition HostCUDA.h:59
void * missedNodes
pointer to off-processor Node/Particle buffer.
Definition HostCUDA.h:62
int * bucketStarts
Definition HostCUDA.h:51
bool remote
is this a remote or local computation?
Definition HostCUDA.h:78
CudaMultipoleMoments * d_localMoments
for accessing device memory
Definition HostCUDA.h:37
void * cb
Definition HostCUDA.h:69
int * affectedBuckets
these buckets were finished in this work request
Definition HostCUDA.h:68
size_t sMissed
Size of the off-processor data buffer.
Definition HostCUDA.h:65
bool node
is this a node or particle computation request?
Definition HostCUDA.h:76
int numInteractions
Definition HostCUDA.h:55
int numBucketsPlusOne
Definition HostCUDA.h:57
int * bucketSizes
Definition HostCUDA.h:53