Go to the documentation of this file.
23 #ifndef COMPAT_CUDA_CUDA_RUNTIME_H
24 #define COMPAT_CUDA_CUDA_RUNTIME_H
27 #define __global__ __attribute__((global))
28 #define __device__ __attribute__((device))
29 #define __device_builtin__ __attribute__((device_builtin))
30 #define __align__(N) __attribute__((aligned(N)))
31 #define __inline__ __inline__ __attribute__((always_inline))
33 #define max(a, b) ((a) > (b) ? (a) : (b))
34 #define min(a, b) ((a) < (b) ? (a) : (b))
35 #define abs(x) ((x) < 0 ? -(x) : (x))
37 #define atomicAdd(a, b) (__atomic_fetch_add(a, b, __ATOMIC_SEQ_CST))
67 typedef struct uint3 dim3;
71 unsigned char x, y, z,
w;
76 unsigned short x, y, z,
w;
90 #define GETCOMP(reg, comp) \
91 asm("mov.u32 %0, %%" #reg "." #comp ";" : "=r"(tmp)); \
94 #define GET(name, reg) static inline __device__ uint3 name() {\
103 GET(getBlockIdx, ctaid)
104 GET(getBlockDim, ntid)
105 GET(getThreadIdx, tid)
108 #define blockIdx (getBlockIdx())
109 #define blockDim (getBlockDim())
110 #define threadIdx (getThreadIdx())
113 #define make_int2(a, b) ((int2){.x = a, .y = b})
114 #define make_uchar2(a, b) ((uchar2){.x = a, .y = b})
115 #define make_ushort2(a, b) ((ushort2){.x = a, .y = b})
116 #define make_float2(a, b) ((float2){.x = a, .y = b})
117 #define make_int4(a, b, c, d) ((int4){.x = a, .y = b, .z = c, .w = d})
118 #define make_uchar4(a, b, c, d) ((uchar4){.x = a, .y = b, .z = c, .w = d})
119 #define make_ushort4(a, b, c, d) ((ushort4){.x = a, .y = b, .z = c, .w = d})
120 #define make_float4(a, b, c, d) ((float4){.x = a, .y = b, .z = c, .w = d})
123 #define TEX2D(type, ret) static inline __device__ void conv(type* out, unsigned a, unsigned b, unsigned c, unsigned d) {*out = (ret);}
125 TEX2D(
unsigned char,
a & 0xFF)
126 TEX2D(
unsigned short,
a & 0xFFFF)
140 unsigned ret1, ret2, ret3, ret4;
141 asm(
"tex.2d.v4.u32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" :
142 "=r"(ret1),
"=r"(ret2),
"=r"(ret3),
"=r"(ret4) :
143 "l"(texObject),
"f"(x),
"f"(y));
144 conv(&
ret, ret1, ret2, ret3, ret4);
152 asm(
"tex.2d.v4.f32.f32 {%0, %1, %2, %3}, [%4, {%5, %6}];" :
154 "l"(texObject),
"f"(x),
"f"(y));
187 static inline __device__ float __expf(
float a) {
return __nvvm_ex2_approx_f(
a * (
float)__builtin_log2(__builtin_exp(1))); }
static __device__ float __expf(float a)
__device__ T tex2D(cudaTextureObject_t texObject, float x, float y)
static __device__ float __cosf(float a)
static __device__ float floorf(float a)
static __device__ float ceilf(float a)
#define make_ushort2(a, b)
static __device__ float __sinf(float a)
static __device__ float trunc(float a)
static __device__ float fabsf(float a)
static __device__ float ceil(float a)
__device__ float2 tex2D< float2 >(cudaTextureObject_t texObject, float x, float y)
static __device__ float floor(float a)
__device_builtin__ unsigned long long cudaTextureObject_t
static int conv(int samples, float **pcm, char *buf, int channels)
static __device__ float fabs(float a)
#define __device_builtin__
#define make_uchar4(a, b, c, d)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
#define make_ushort4(a, b, c, d)
#define make_uchar2(a, b)
#define make_float4(a, b, c, d)
__device__ float tex2D< float >(cudaTextureObject_t texObject, float x, float y)
__device__ float4 tex2D< float4 >(cudaTextureObject_t texObject, float x, float y)
#define make_float2(a, b)
static __device__ float truncf(float a)