Go to the documentation of this file.
24 #define GRK_SKIP_POISON
26 #include <xmmintrin.h>
29 #include <emmintrin.h>
32 #include <tmmintrin.h>
35 #include <immintrin.h>
41 #define VREG_INT_COUNT 8
44 #define VREG_INT_COUNT 4
49 #if (defined(__SSE2__) || defined(__AVX2__))
55 #define LOAD_CST(x) _mm256_set1_epi32(x)
56 #define LOAD(x) _mm256_load_si256((const VREG*)(x))
57 #define LOADU(x) _mm256_loadu_si256((const VREG*)(x))
58 #define STORE(x,y) _mm256_store_si256((VREG*)(x),(y))
59 #define STOREU(x,y) _mm256_storeu_si256((VREG*)(x),(y))
60 #define ADD(x,y) _mm256_add_epi32((x),(y))
61 #define AND(x,y) _mm256_and_si256((x),(y));
62 #define SUB(x,y) _mm256_sub_epi32((x),(y))
63 #define VMAX(x,y) _mm256_max_epi32((x),(y))
64 #define VMIN(x,y) _mm256_min_epi32((x),(y))
65 #define SAR(x,y) _mm256_srai_epi32((x),(y))
66 #define MUL(x,y) _mm256_mullo_epi32((x),(y))
69 #define LOADF(x) _mm256_load_ps((float const*)(x))
70 #define LOADUF(x) _mm256_loadu_ps((float const*)(x))
71 #define LOAD_CST_F(x)_mm256_set1_ps(x)
72 #define ADDF(x,y) _mm256_add_ps((x),(y))
73 #define MULF(x,y) _mm256_mul_ps((x),(y))
74 #define SUBF(x,y) _mm256_sub_ps((x),(y))
75 #define VMAXF(x,y) _mm256_max_ps((x),(y))
76 #define VMINF(x,y) _mm256_min_ps((x),(y))
77 #define STOREF(x,y) _mm256_store_ps((float*)(x),(y))
78 #define STOREUF(x,y) _mm256_storeu_ps((float*)(x),(y))
83 #define LOAD_CST(x) _mm_set1_epi32(x)
84 #define LOAD(x) _mm_load_si128((const VREG*)(x))
85 #define LOADU(x) _mm_loadu_si128((const VREG*)(x))
86 #define STORE(x,y) _mm_store_si128((VREG*)(x),(y))
87 #define STOREU(x,y) _mm_storeu_si128((VREG*)(x),(y))
88 #define ADD(x,y) _mm_add_epi32((x),(y))
89 #define AND(x,y) _mm_and_si128((x),(y));
90 #define SUB(x,y) _mm_sub_epi32((x),(y))
92 #define VMAX(x,y) _mm_max_epi32((x),(y))
94 #define VMIN(x,y) _mm_min_epi32((x),(y))
98 #define MUL(x,y) _mm_mullo_epi32((x),(y))
99 #define SAR(x,y) _mm_srai_epi32((x),(y))
100 #define LOADF(x) _mm_load_ps((float const*)(x))
101 #define LOADUF(x) _mm_loadu_ps((float const*)(x))
102 #define LOAD_CST_F(x) _mm_set1_ps(x)
103 #define ADDF(x,y) _mm_add_ps((x),(y))
104 #define MULF(x,y) _mm_mul_ps((x),(y))
105 #define SUBF(x,y) _mm_sub_ps((x),(y))
106 #define VMAXF(x,y) _mm_max_ps((x),(y))
107 #define VMINF(x,y) _mm_min_ps((x),(y))
108 #define STOREF(x,y) _mm_store_ps((float*)(x),(y))
109 #define STOREUF(x,y) _mm_storeu_ps((float*)(x),(y))
113 #define ADD3(x,y,z) ADD(ADD(x,y),z)
115 #define VCLAMP(x,min,max) VMIN(VMAX(x, min), max)
117 #define VCLAMPF(x,min,max) VMINF(VMAXF(x, min), max)