![]() |
AI Engine-ML Intrinsics User Guide (v2024.2)
|
Intrinsics allowing you perform vector shuffles. More...
Intrinsics allowing you perform vector shuffles.
This table gives the different shuffle modes. For an illustration see Illustration of Shuffle Modes.
Element Size | Matrix Transpose Dimensions | Mode Value | |||
---|---|---|---|---|---|
Forward operation (Deinterleaving) | Backward Operation (Interleaving) | ||||
extract low | extract high | extract low | extract high | ||
8-bit | 64x2 → 2x64 | shuffle_T8_64x2_lo | shuffle_T8_64x2_hi | shuffle_T8_2x64_lo | shuffle_T8_2x64_hi |
16-bit | 32x2 → 2x32 | shuffle_T16_32x2_lo | shuffle_T16_32x2_hi | shuffle_T16_2x32_lo | shuffle_T16_2x32_hi |
32-bit | 16x2 → 2x16 | shuffle_T32_16x2_lo | shuffle_T32_16x2_hi | shuffle_T32_2x16_lo | shuffle_T32_2x16_hi |
64-bit | 8x2 → 2x8 | shuffle_T64_8x2_lo | shuffle_T64_8x2_hi | shuffle_T64_2x8_lo | shuffle_T64_2x8_hi |
128-bit | 4x2 → 2x4 | shuffle_T128_4x2_lo | shuffle_T128_4x2_hi | shuffle_T128_2x4_lo | shuffle_T128_2x4_hi |
256-bit | 2x2 → 2x2 | shuffle_T256_2x2_lo | shuffle_T256_2x2_hi | shuffle_T256_2x2_lo | shuffle_T256_2x2_hi |
512-bit | 1x2 → 2x1 | shuffle_T512_1x2_lo | shuffle_T512_1x2_hi | shuffle_T512_1x2_lo | shuffle_T512_1x2_hi |
16-bit | 16x4 → 4x16 | shuffle_T16_16x4_lo | shuffle_T16_16x4_hi | shuffle_T16_4x16_lo | shuffle_T16_4x16_hi |
16-bit | 8x4 → 4x8 | shuffle_T16_8x4 | - | shuffle_T16_4x8 | - |
32-bit | 8x4 → 4x8 | shuffle_T32_8x4_lo | shuffle_T32_8x4_hi | shuffle_T32_4x8_lo | shuffle_T32_4x8_hi |
32-bit | 4x4 → 4x4 | shuffle_T32_4x4 | - | shuffle_T32_4x4 | - |
8-bit | 8x8 → 8x8 | shuffle_T8_8x8 | - | shuffle_T32_8x8 | - |
8-bit | 16x4 → 4x16 | shuffle_T8_16x4 | - | shuffle_T8_4x16 | - |
16-bit | 16x[a,b] → 16x[b,a] | shuffle_T16_1x2_flip | - | shuffle_T16_1x2_flip | - |
16-bit | 4x4 → 4x4 | shuffle_T16_4x4 | - | shuffle_T16_4x4 | - |
16-bit | 4x2 → 2x4 | shuffle_T16_4x2 | - | shuffle_T16_2x4 | - |
16-bit | 8x2 → 2x8 | shuffle_T16_8x2 | - | shuffle_T16_2x8 | - |
16-bit | 16x2 → 2x16 | shuffle_T16_16x2 | - | shuffle_T16_2x16 | - |
8-bit | 8x4 → 4x8 | shuffle_T8_8x4 | - | shuffle_T8_4x8 | - |
Modules | |
Illustration of Shuffle Modes | |
Shuffle two vectors | |||||||
Shuffle two vectors
| |||||||
v16int32 | shuffle (v16int32, v16int32, unsigned int) | ||||||
v32int16 | shuffle (v32int16, v32int16, unsigned int) | ||||||
v64int8 | shuffle (v64int8, v64int8, unsigned int) | ||||||
v128int4 | shuffle (v128int4, v128int4, unsigned int) | ||||||
v16uint32 | shuffle (v16uint32, v16uint32, unsigned int) | ||||||
v32uint16 | shuffle (v32uint16, v32uint16, unsigned int) | ||||||
v64uint8 | shuffle (v64uint8, v64uint8, unsigned int) | ||||||
v128uint4 | shuffle (v128uint4, v128uint4, unsigned int) | ||||||
v8cint32 | shuffle (v8cint32, v8cint32, unsigned int) | ||||||
v16cint16 | shuffle (v16cint16, v16cint16, unsigned int) | ||||||
v32bfloat16 | shuffle (v32bfloat16, v32bfloat16, unsigned int) | ||||||
v16cbfloat16 | shuffle (v16cbfloat16, v16cbfloat16, unsigned int) | ||||||
v16float | shuffle (v16float, v16float, unsigned int) | ||||||
v8cfloat | shuffle (v8cfloat, v8cfloat, unsigned int) | ||||||
Shuffle one vector | |||||
Shuffle one vector
| |||||
v16int32 | shuffle (v16int32 a, unsigned int mode) | ||||
v32int16 | shuffle (v32int16 a, unsigned int mode) | ||||
v64int8 | shuffle (v64int8 a, unsigned int mode) | ||||
v128int4 | shuffle (v128int4 a, unsigned int mode) | ||||
v16uint32 | shuffle (v16uint32 a, unsigned int mode) | ||||
v32uint16 | shuffle (v32uint16 a, unsigned int mode) | ||||
v64uint8 | shuffle (v64uint8 a, unsigned int mode) | ||||
v128uint4 | shuffle (v128uint4 a, unsigned int mode) | ||||
v8cint32 | shuffle (v8cint32 a, unsigned int mode) | ||||
v16cint16 | shuffle (v16cint16 a, unsigned int mode) | ||||
v32bfloat16 | shuffle (v32bfloat16 a, unsigned int mode) | ||||
v16cbfloat16 | shuffle (v16cbfloat16 a, unsigned int mode) | ||||
v16float | shuffle (v16float a, unsigned int mode) | ||||
v8cfloat | shuffle (v8cfloat a, unsigned int mode) | ||||
Updating all elements with same value and shuffle | |||
Broadcasts input value to all vector lanes
| |||
v64int8 | shuffle_s8 (int b, unsigned int m) | ||
v32int16 | shuffle_s16 (int b, unsigned int m) | ||
v16int32 | shuffle_s32 (int b, unsigned int m) | ||
v16int32 | shuffle_v2s32 (v2int32 b, unsigned int m) | ||
v64uint8 | shuffle_u8 (unsigned int b, unsigned int m) | ||
v32uint16 | shuffle_u16 (unsigned int b, unsigned int m) | ||
v16uint32 | shuffle_u32 (unsigned int b, unsigned int m) | ||
v16uint32 | shuffle_v2u32 (v2uint32 b, unsigned int m) | ||
v16cint16 | shuffle_c16 (cint16 b, unsigned int m) | ||
v32bfloat16 | shuffle_bfloat16 (bfloat16 b, unsigned int m) | ||
v16float | shuffle_float (float b, unsigned int m) | ||
v16int32 | shuffle_s64 (long long b, unsigned int m) | ||
v16uint32 | shuffle_u64 (unsigned long long b, unsigned int m) | ||
v8cint32 | shuffle_c32 (cint32 b, unsigned int m) | ||
Shuffle a sparse vector | |||||
Shuffle a sparse vector
| |||||
v256int4_sparse | shuffle (v256int4_sparse qx, int itlv) | ||||
v128int8_sparse | shuffle (v128int8_sparse qx, int itlv) | ||||
v64int16_sparse | shuffle (v64int16_sparse qx, int itlv) | ||||
v256uint4_sparse | shuffle (v256uint4_sparse qx, int itlv) | ||||
v128uint8_sparse | shuffle (v128uint8_sparse qx, int itlv) | ||||
v64uint16_sparse | shuffle (v64uint16_sparse qx, int itlv) | ||||
v64bfloat16_sparse | shuffle (v64bfloat16_sparse qx, int itlv) | ||||
enum eShuffleMode |
Shuffle modes.
Definition of valid modes for vector shuffle
Enumerator | |
---|---|
shuffle_T8_64x2_lo | Transpose 64x2 matrix of 8 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T8_64x2_hi | Transpose 64x2 matrix of 8 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T16_32x2_lo | Transpose 32x2 matrix of 16 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T16_32x2_hi | Transpose 32x2 matrix of 16 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T32_16x2_lo | Transpose 16x2 matrix of 32 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T32_16x2_hi | Transpose 16x2 matrix of 32 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T64_8x2_lo | Transpose 8x2 matrix of 64 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T64_8x2_hi | Transpose 8x2 matrix of 64 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T128_4x2_lo | Transpose 4x2 matrix of 128 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T128_4x2_hi | Transpose 4x2 matrix of 128 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T256_2x2_lo | Transpose 2x2 matrix of 256 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T256_2x2_hi | Transpose 2x2 matrix of 256 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T128_2x4_lo | Transpose 2x4 matrix of 128 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T128_2x4_hi | Transpose 2x4 matrix of 128 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T64_2x8_lo | Transpose 2x8 matrix of 64 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T64_2x8_hi | Transpose 2x8 matrix of 64 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T32_2x16_lo | Transpose 2x16 matrix of 32 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T32_2x16_hi | Transpose 2x16 matrix of 32 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T16_2x32_lo | Transpose 2x32 matrix of 16 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T16_2x32_hi | Transpose 2x32 matrix of 16 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T8_2x64_lo | Transpose 2x64 matrix of 8 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T8_2x64_hi | Transpose 2x64 matrix of 8 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T512_1x2_lo | Transpose 1x2 matrix of 512 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T512_1x2_hi | Transpose 1x2 matrix of 512 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T16_16x4_lo | Transpose 16x4 matrix of 16 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T16_16x4_hi | Transpose 16x4 matrix of 16 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T16_4x16_lo | Transpose 4x16 matrix of 16 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T16_4x16_hi | Transpose 4x16 matrix of 16 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T16_8x4 | Transpose 8x4 matrix of 16 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T16_4x8 | Transpose 4x8 matrix of 16 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T32_8x4_lo | Transpose 8x4 matrix of 32 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T32_8x4_hi | Transpose 8x4 matrix of 32 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T32_4x8_lo | Transpose 4x8 matrix of 32 bit values. Extract low 512 bits of result. See illustration. |
shuffle_T32_4x8_hi | Transpose 4x8 matrix of 32 bit values. Extract high 512 bits of result. See illustration. |
shuffle_T32_4x4 | Transpose 4x4 matrix of 32 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T8_8x8 | Transpose 4x4 matrix of 8 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T8_16x4 | Transpose 16x4 matrix of 8 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T8_4x16 | Transpose 4x16 matrix of 8 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T16_1x2_flip | Flip inner dimension of 16x2 matrix of 16 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T16_4x4 | Transpose 4x4 matrix of 16 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T16_4x2 | Transpose 4x2 matrix of 16 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T16_2x4 | Transpose 2x4 matrix of 16 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T16_8x2 | Transpose 8x2 matrix of 16 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T16_2x8 | Transpose 2x8 matrix of 16 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T16_16x2 | Transpose 16x2 matrix of 16 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T16_2x16 | Transpose 2x16 matrix of 16 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T8_8x4 | Transpose 8x4 matrix of 8 bit values. Extract resulting 512 bit vector. See illustration. |
shuffle_T8_4x8 | Transpose 4x8 matrix of 8 bit values. Extract resulting 512 bit vector. See illustration. |
v128int8_sparse shuffle | ( | v128int8_sparse | qx, |
int | itlv | ||
) |
v128uint8_sparse shuffle | ( | v128uint8_sparse | qx, |
int | itlv | ||
) |
v16cbfloat16 shuffle | ( | v16cbfloat16 | a, |
unsigned int | mode | ||
) |
v16cbfloat16 shuffle | ( | v16cbfloat16 | , |
v16cbfloat16 | , | ||
unsigned int | |||
) |
v256int4_sparse shuffle | ( | v256int4_sparse | qx, |
int | itlv | ||
) |
v256uint4_sparse shuffle | ( | v256uint4_sparse | qx, |
int | itlv | ||
) |
v32bfloat16 shuffle | ( | v32bfloat16 | a, |
unsigned int | mode | ||
) |
v32bfloat16 shuffle | ( | v32bfloat16 | , |
v32bfloat16 | , | ||
unsigned int | |||
) |
v64bfloat16_sparse shuffle | ( | v64bfloat16_sparse | qx, |
int | itlv | ||
) |
v64int16_sparse shuffle | ( | v64int16_sparse | qx, |
int | itlv | ||
) |
v64uint16_sparse shuffle | ( | v64uint16_sparse | qx, |
int | itlv | ||
) |
v32bfloat16 shuffle_bfloat16 | ( | bfloat16 | b, |
unsigned int | m | ||
) |
v16cint16 shuffle_c16 | ( | cint16 | b, |
unsigned int | m | ||
) |
v8cint32 shuffle_c32 | ( | cint32 | b, |
unsigned int | m | ||
) |
v16float shuffle_float | ( | float | b, |
unsigned int | m | ||
) |
v32int16 shuffle_s16 | ( | int | b, |
unsigned int | m | ||
) |
v16int32 shuffle_s32 | ( | int | b, |
unsigned int | m | ||
) |
v16int32 shuffle_s64 | ( | long long | b, |
unsigned int | m | ||
) |
v64int8 shuffle_s8 | ( | int | b, |
unsigned int | m | ||
) |
v32uint16 shuffle_u16 | ( | unsigned int | b, |
unsigned int | m | ||
) |
v16uint32 shuffle_u32 | ( | unsigned int | b, |
unsigned int | m | ||
) |
v16uint32 shuffle_u64 | ( | unsigned long long | b, |
unsigned int | m | ||
) |
v64uint8 shuffle_u8 | ( | unsigned int | b, |
unsigned int | m | ||
) |