changa 3.5
Loading...
Searching...
No Matches
SSEdefs.h
1#ifndef __SSEDEFS_H__
2#define __SSEDEFS_H__
3
4#include "cosmoType.h"
5
6#if CMK_USE_AVX
7 #if !defined(__AVX__)
8 #undef CMK_USE_AVX
9 #define CMK_USE_AVX 0
10 #else
11 #warning "using AVX"
12 #endif
13#endif
14
15#if CMK_USE_SSE2 && !defined(__SSE2__)
16 #undef CMK_USE_SSE2
17 #define CMK_USE_SSE2 0
18#endif
19
20#if CMK_USE_AVX || CMK_USE_SSE2
21 #define CMK_SSE 1
22#endif
23
24#if CMK_USE_AVX
25 #ifdef COSMO_FLOAT
26 #error "single-precision AVX is not supported"
27 #else
28 #include "SSE-Double.h"
29 #define SSE_VECTOR_WIDTH 4
30 #define FORCE_INPUT_LIST_PAD 3
31 typedef SSEDouble SSEcosmoType;
32 #define SSELoad(where, arr, idx, field) where(arr[idx]field, arr[idx+1]field, arr[idx+2]field, arr[idx+3]field)
33 #define SSEStore(what, arr, idx, field) { \
34 double p[4]; \
35 storeu(p, what); \
36 arr[idx]field = p[0]; \
37 arr[idx+1]field = p[1]; \
38 arr[idx+2]field = p[2]; \
39 arr[idx+3]field = p[3]; \
40 }
41 enum {cosmoMask=0xf};
42 #endif
43#elif CMK_USE_SSE2
44 #ifdef COSMO_FLOAT
45 #define SSE_COSMO_FLOAT
46 #if defined(__SSE2__)
47 #include "SSE-Float.h"
48 #define SSE_VECTOR_WIDTH 4
49 #define FORCE_INPUT_LIST_PAD 3
50 typedef SSEFloat SSEcosmoType;
51 #define SSELoad(where, arr, idx, field) where(arr[idx]field, arr[idx+1]field, arr[idx+2]field, arr[idx+3]field)
52 #define SSEStore(what, arr, idx, field) { \
53 float p[4]; \
54 storeu(p, what); \
55 arr[idx]field = p[0]; \
56 arr[idx+1]field = p[1]; \
57 arr[idx+2]field = p[2]; \
58 arr[idx+3]field = p[3]; \
59 }
60 enum {cosmoMask=0xf};
61 #else
62 #error("SSE not available");
63 #endif
64 #else
65 #if defined(__SSE2__) && !defined(SSE_COSMO_FLOAT)
66 #include "SSE-Double.h"
67 #define SSE_VECTOR_WIDTH 2
68 #define FORCE_INPUT_LIST_PAD 1
69 typedef SSEDouble SSEcosmoType;
70 #define SSELoad(where, arr, idx, field) where(arr[idx]field, arr[idx+1]field)
71 #define SSEStore(what, arr, idx, field) { \
72 storel(&arr[idx]field, what); \
73 storeh(&arr[idx+1]field, what); \
74 }
75 enum {cosmoMask=0x3};
76 #else
77 #error("SSE not available");
78 #endif
79 #endif
80#endif
81
82#endif