/* * Author: doe300 * * See the file "LICENSE" for the full license governing this code. */ #ifndef VC4CL_RELATIONAL_H #define VC4CL_RELATIONAL_H #include "_config.h" #include "_overloads.h" #include "_intrinsics.h" #ifndef COMPARISON_1 #define COMPARISON_1(func, content) \ INLINE FUNC_1(int##16, func, float##16, val) CONST \ { \ return (content) ? -1 : 0; \ } \ INLINE FUNC_1(int##8, func, float##8, val) CONST \ { \ return (content) ? -1 : 0; \ } \ INLINE FUNC_1(int##4, func, float##4, val) CONST \ { \ return (content) ? -1 : 0; \ } \ INLINE FUNC_1(int##3, func, float##3, val) CONST \ { \ return (content) ? -1 : 0; \ } \ INLINE FUNC_1(int##2, func, float##2, val) CONST \ { \ return (content) ? -1 : 0; \ } \ INLINE FUNC_1(int, func, float, val) CONST \ { /* 1 instead of -1 here on purpose! */ \ return (content) ? 1 : 0; \ } #endif #ifndef COMPARISON_2 #define COMPARISON_2(func, content) \ INLINE FUNC_2(int##16, func, float##16, x, float##16, y) CONST \ { \ return (content) ? -1 : 0; \ } \ INLINE FUNC_2(int##8, func, float##8, x, float##8, y) CONST \ { \ return (content) ? -1 : 0; \ } \ INLINE FUNC_2(int##4, func, float##4, x, float##4, y) CONST \ { \ return (content) ? -1 : 0; \ } \ INLINE FUNC_2(int##3, func, float##3, x, float##3, y) CONST \ { \ return (content) ? -1 : 0; \ } \ INLINE FUNC_2(int##2, func, float##2, x, float##2, y) CONST \ { \ return (content) ? -1 : 0; \ } \ INLINE FUNC_2(int, func, float, x, float, y) CONST \ { /* 1 instead of -1 here on purpose! */ \ return (content) ? 1 : 0; \ } #endif #ifndef FOR_ALL_ELEMENTS #define FOR_ALL_ELEMENTS(func, type, op, conv) \ INLINE FUNC_1(int, func, type##16, x) CONST \ { \ /* (s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sa, sb, sc, sd, se, sf) */ \ type##16 val0 = conv(x); \ /* (s0 op s1, s1 op s2, s2 op s3, s3 op s4, s4 op s5, s5 op s6, s6 op s7, s7 op s8, s8 op s9, s9 op sa, sa op sb, sb op sc, sc op sd, sd op se, se op sf, sf op s0) */ \ val0 = val0 op vc4cl_vector_rotate(val0, -1); \ /* (s0 op s1 op s2 op s3, s1 op s2 op s3 op s4, s2 op s3 op s4 op s5, s3 op s4 op s5 op s6, s4 op s5 op s6 op s7, s5 op s6 op s7 op s8, s6 op s7 op s8 op s9, s7 op s8 op s9 op sa, s8 op s9 op sa op sb, s9 op sa op sb op sc, sa op sb op sc op sd, sb op sc op sd op se, sc op sd op se op sf, ...) */ \ const type##16 val1 = val0 op vc4cl_vector_rotate(val0, -2); \ /* (s0 op s1 op s2 op s3 op s4 op s5 op s6 op s7, ..., s8 op s9 op sa op ab op sc op sd op se op sf, ...) */ \ const type##16 val2 = val1 op vc4cl_vector_rotate(val1, -4); \ /* s0 op s1 op s2 op s3 op s4 op s5 op s6 op s7 op s8 op s9 op sa op ab op sc op sd op se op sf */ \ return (val2 op val1 op vc4cl_vector_rotate(val2, -8)).x != 0; \ } \ INLINE FUNC_1(int, func, type##8, x) CONST \ { \ /* (s0, s1, s2, s3, s4, s5, s6, s7) */ \ type##8 val0 = conv(x); \ /* (s0 op s1, s1 op s2, s2 op s3, s3 op s4, s4 op s5, s5 op s6, s6 op s7, s7 op ?) */ \ val0 = val0 op vc4cl_vector_rotate(val0, -1); \ /* (s0 op s1 op s2 op s3, s1 op s2 op s3 op s4, s2 op s3 op s4 op s5, s3 op s4 op s5 op s6, s4 op s5 op s6 op s7, s5 op s6 op s7 op ?, s6 op s7 op ? op ?, s7 op ? op ? op ?) */ \ const type##8 val1 = val0 op vc4cl_vector_rotate(val0, -2); \ /* s0 op s1 op s2 op s3 op s4 op s5 op s6 op s7 */ \ return (val1 op vc4cl_vector_rotate(val1, -4)).x != 0; \ } \ INLINE FUNC_1(int, func, type##4, x) CONST \ { \ /* (x, y, z, w) */ \ type##4 val0 = conv(x); \ /* (x op y, y op z, z op w, w op ?) */ \ val0 = val0 op vc4cl_vector_rotate(val0, -1); \ /* (z op w, w op ?, ? op ?, ? op ?) */ \ const type##4 val1 = vc4cl_vector_rotate(val0, -2); \ /* (x op y op z op w, ...) */ \ return (val0 op val1).x != 0; \ } \ INLINE FUNC_1(int, func, type##3, x) CONST \ { \ type##3 val = conv(x); \ return (val.x op val.y op val.z) != 0; \ } \ INLINE FUNC_1(int, func, type##2, x) CONST \ { \ type##2 val = conv(x); \ return (val.x op val.y) != 0; \ } \ INLINE FUNC_1(int, func, type, x) CONST \ { \ type val = conv(x); \ return val != 0; \ } #endif #ifndef SELECT_SCALAR #define SELECT_SCALAR(type, maskType, content) \ INLINE FUNC_3(type, select, type, a, type, b, maskType, c) CONST \ { \ return content; \ } #endif #ifndef SELECT_VECTOR #define SELECT_VECTOR(type, maskType, content) \ INLINE FUNC_3(type##2, select, type##2, a, type##2, b, maskType##2, c) CONST \ { \ typedef int##2 int_t; \ content \ } \ INLINE FUNC_3(type##3, select, type##3, a, type##3, b, maskType##3, c) CONST \ { \ typedef int##3 int_t; \ content \ } \ INLINE FUNC_3(type##4, select, type##4, a, type##4, b, maskType##4, c) CONST \ { \ typedef int##4 int_t; \ content \ } \ INLINE FUNC_3(type##8, select, type##8, a, type##8, b, maskType##8, c) CONST \ { \ typedef int##8 int_t; \ content \ } \ INLINE FUNC_3(type##16, select, type##16, a, type##16, b, maskType##16, c) CONST \ { \ typedef int##16 int_t; \ content \ } #endif /* * The checks for NaNs as defined in the specification are done in the intrinsic of the comparison operators: * * "The relational functions isequal, isgreater, isgreaterequal, isless, islessequal, and islessgreater * always return 0 if either argument is not a number (NaN). isnotequal returns 1 if one or both * arguments are not a number (NaN) and the argument type is a scalar [...]" * - OpenCL 1.2, section 6.12.6 Relational Functions */ COMPARISON_2(isequal, x == y) COMPARISON_2(isnotequal, x != y) COMPARISON_2(isgreater, x > y) COMPARISON_2(isgreaterequal, x >= y) COMPARISON_2(isless, x < y) COMPARISON_2(islessequal, x <= y) COMPARISON_2(islessgreater, (x < y) || (x > y)) // From : "A finite value is any floating-point value that is neither infinite nor NaN (Not-A-Number)." COMPARISON_1(isfinite, !vc4cl_is_inf_nan(val)) COMPARISON_1(isinf, (vc4cl_bitcast_uint(val) & NAN) == INF) COMPARISON_1(isnan, vc4cl_is_nan(val)) // From : "Returns whether x is a normal value: i.e., whether it is neither infinity, NaN, zero or subnormal." COMPARISON_1(isnormal, !isinf(val) && !isnan(val) && ((vc4cl_bitcast_uint(val) & 0x7F800000) != 0) /* neither zero nor denormal */) COMPARISON_2(isordered, isequal(x, x) && isequal(y, y)) COMPARISON_2(isunordered, isnan(x) || isnan(y)) // for vector,directly use asr, for scalar shr. This is way more efficient than everything else (1 instruction) INLINE FUNC_1(int16, signbit, float16, val) CONST { return vc4cl_asr(vc4cl_bitcast_uint(val), 31); } INLINE FUNC_1(int8, signbit, float8, val) CONST { return vc4cl_asr(vc4cl_bitcast_uint(val), 31); } INLINE FUNC_1(int4, signbit, float4, val) CONST { return vc4cl_asr(vc4cl_bitcast_uint(val), 31); } INLINE FUNC_1(int3, signbit, float3, val) CONST { return vc4cl_asr(vc4cl_bitcast_uint(val), 31); } INLINE FUNC_1(int2, signbit, float2, val) CONST { return vc4cl_asr(vc4cl_bitcast_uint(val), 31); } INLINE FUNC_1(int, signbit, float, val) CONST { return vc4cl_bitcast_uint(val) >> 31; } FOR_ALL_ELEMENTS(any, char, |, vc4cl_msb_set) FOR_ALL_ELEMENTS(any, short, |, vc4cl_msb_set) FOR_ALL_ELEMENTS(any, int, |, vc4cl_msb_set) FOR_ALL_ELEMENTS(any, long, |, vc4cl_msb_set) FOR_ALL_ELEMENTS(all, char, &, vc4cl_msb_set) FOR_ALL_ELEMENTS(all, short, &, vc4cl_msb_set) FOR_ALL_ELEMENTS(all, int, &, vc4cl_msb_set) FOR_ALL_ELEMENTS(all, long, &, vc4cl_msb_set) //"Each bit of the result is the corresponding bit of a if the corresponding bit of c is 0. // Otherwise it is the corresponding bit of b." //based on pocl (pocl/lib/kernel/bitselect.cl) SIMPLE_3(uchar, bitselect, uchar, a, uchar, b, uchar, c, (~c & a) | (c & b)) SIMPLE_3(char, bitselect, char, a, char, b, char, c, (~c & a) | (c & b)) SIMPLE_3(ushort, bitselect, ushort, a, ushort, b, ushort, c, (~c & a) | (c & b)) SIMPLE_3(short, bitselect, short, a, short, b, short, c, (~c & a) | (c & b)) SIMPLE_3(uint, bitselect, uint, a, uint, b, uint, c, (~c & a) | (c & b)) SIMPLE_3(int, bitselect, int, a, int, b, int, c, (~c & a) | (c & b)) SIMPLE_3(ulong, bitselect, ulong, a, ulong, b, ulong, c, (~c & a) | (c & b)) SIMPLE_3(long, bitselect, long, a, long, b, long, c, (~c & a) | (c & b)) SIMPLE_3(float, bitselect, float, a, float, b, float, c, vc4cl_bitcast_float((~vc4cl_bitcast_uint(c) & vc4cl_bitcast_uint(a)) | (vc4cl_bitcast_uint(c) & vc4cl_bitcast_uint(b)))) //"For a scalar type, result = c ? b : a." SELECT_SCALAR(uchar, uchar, vc4cl_extend(c) ? b : a) SELECT_SCALAR(uchar, char, vc4cl_extend(c) ? b : a) SELECT_SCALAR(char, uchar, vc4cl_extend(c) ? b : a) SELECT_SCALAR(char, char, vc4cl_extend(c) ? b : a) SELECT_SCALAR(ushort, ushort, vc4cl_extend(c) ? b : a) SELECT_SCALAR(ushort, short, vc4cl_extend(c) ? b : a) SELECT_SCALAR(short, ushort, vc4cl_extend(c) ? b : a) SELECT_SCALAR(short, short, vc4cl_extend(c) ? b : a) SELECT_SCALAR(uint, uint, vc4cl_extend(c) ? b : a) SELECT_SCALAR(uint, int, vc4cl_extend(c) ? b : a) SELECT_SCALAR(int, uint, vc4cl_extend(c) ? b : a) SELECT_SCALAR(int, int, vc4cl_extend(c) ? b : a) SELECT_SCALAR(ulong, ulong, vc4cl_extend(c) ? b : a) SELECT_SCALAR(ulong, long, vc4cl_extend(c) ? b : a) SELECT_SCALAR(long, ulong, vc4cl_extend(c) ? b : a) SELECT_SCALAR(long, long, vc4cl_extend(c) ? b : a) SELECT_SCALAR(float, uint, vc4cl_extend(c) ? b : a) SELECT_SCALAR(float, int, vc4cl_extend(c) ? b : a) //"For each component of a vector type, result[i] = if MSB of c[i] is set ? b[i] : a[i]" SELECT_VECTOR(uchar, uchar, { int_t mask = vc4cl_asr(vc4cl_extend(c) << 24, 31); return vc4cl_bitcast_uchar(mask & vc4cl_bitcast_int(vc4cl_extend(b)) | (~mask & vc4cl_bitcast_int(vc4cl_extend(a)))); }) SELECT_VECTOR(uchar, char, { int_t mask = vc4cl_asr(vc4cl_extend(c) << 24, 31); return vc4cl_bitcast_uchar(mask & vc4cl_bitcast_int(vc4cl_extend(b)) | (~mask & vc4cl_bitcast_int(vc4cl_extend(a)))); }) SELECT_VECTOR(char, char, { return vc4cl_msb_set(c) ? b : a; }) SELECT_VECTOR(char, uchar, { return vc4cl_msb_set(c) ? b : a; }) SELECT_VECTOR(ushort, ushort, { int_t mask = vc4cl_asr(vc4cl_extend(c) << 16, 31); return vc4cl_bitcast_ushort(mask & vc4cl_bitcast_int(vc4cl_extend(b)) | (~mask & vc4cl_bitcast_int(vc4cl_extend(a)))); }) SELECT_VECTOR(ushort, short, { int_t mask = vc4cl_asr(vc4cl_extend(c) << 16, 31); return vc4cl_bitcast_ushort(mask & vc4cl_bitcast_int(vc4cl_extend(b)) | (~mask & vc4cl_bitcast_int(vc4cl_extend(a)))); }) SELECT_VECTOR(short, short, { return vc4cl_msb_set(c) ? b : a; }) SELECT_VECTOR(short, ushort, { return vc4cl_msb_set(c) ? b : a; }) SELECT_VECTOR(uint, uint, { return vc4cl_msb_set(c) ? b : a; }) SELECT_VECTOR(uint, int, { return vc4cl_msb_set(c) ? b : a; }) SELECT_VECTOR(int, int, { return vc4cl_msb_set(c) ? b : a; }) SELECT_VECTOR(int, uint, { return vc4cl_msb_set(c) ? b : a; }) SELECT_VECTOR(ulong, ulong, { return vc4cl_msb_set(c) ? b : a; }) SELECT_VECTOR(ulong, long, { return vc4cl_msb_set(c) ? b : a; }) SELECT_VECTOR(long, long, { return vc4cl_msb_set(c) ? b : a; }) SELECT_VECTOR(long, ulong, { return vc4cl_msb_set(c) ? b : a; }) SELECT_VECTOR(float, uint, { return vc4cl_msb_set(c) ? b : a; }) SELECT_VECTOR(float, int, { return vc4cl_msb_set(c) ? b : a; }) #undef COMPARISON_1 #undef COMPARISON_2 #undef FOR_ALL_ELEMENTS #undef SELECT_SCALAR #undef SELECT_VECTOR #endif /* VC4CL_RELATIONAL_H */