AI Engine Intrinsics User Guide  (AIE) r2p23
 All Data Structures Namespaces Functions Variables Typedefs Groups Pages

Overview

Integer vector comparison.

Performs the comparison between lanes (selected using the scheme) and return the result of the comparison as a bit in the return word.

Functions

unsigned int eq16 (v32int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a equal to comparison between lanes of xbuff.
 
unsigned int eq16 (v16int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a equal to comparison between lanes of xbuff.
 
unsigned int eq16 (v16int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, v16int32 ybuff, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a equal to comparison between lanes of xbuff and ybuff.
 
unsigned int eq32 (v64int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a equal to comparison between lanes of xbuff.
 
unsigned int eq32 (v32int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a equal to comparison between lanes of xbuff.
 
unsigned int eq32 (v32int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, v32int16 ybuff, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a equal to comparison between lanes of xbuff and ybuff.
 
unsigned int ge16 (v32int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a greater than or equal to comparison between lanes of xbuff.
 
unsigned int ge16 (v16int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a greater than or equal to comparison between lanes of xbuff.
 
unsigned int ge16 (v16int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, v16int32 ybuff, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a greater than or equal to comparison between lanes of xbuff and ybuff.
 
unsigned int ge32 (v64int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a greater than or equal to comparison between lanes of xbuff.
 
unsigned int ge32 (v32int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a greater than or equal to comparison between lanes of xbuff.
 
unsigned int ge32 (v32int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, v32int16 ybuff, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a greater than or equal to comparison between lanes of xbuff and ybuff.
 
unsigned int gt16 (v32int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a greater than comparison between lanes of xbuff.
 
unsigned int gt16 (v16int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a greater than comparison between lanes of xbuff.
 
unsigned int gt16 (v16int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, v16int32 ybuff, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a greater than comparison between lanes of xbuff and ybuff.
 
unsigned int gt32 (v64int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a greater than comparison between lanes of xbuff.
 
unsigned int gt32 (v32int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a greater than comparison between lanes of xbuff.
 
unsigned int gt32 (v32int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, v32int16 ybuff, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a greater than comparison between lanes of xbuff and ybuff.
 
unsigned int le16 (v32int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a less than or equal to comparison between lanes of xbuff.
 
unsigned int le16 (v16int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a less than or equal to comparison between lanes of xbuff.
 
unsigned int le16 (v16int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, v16int32 ybuff, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a less than or equal to comparison between lanes of xbuff and ybuff.
 
unsigned int le32 (v64int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a less than or equal to comparison between lanes of xbuff.
 
unsigned int le32 (v32int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a less than or equal to comparison between lanes of xbuff.
 
unsigned int le32 (v32int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, v32int16 ybuff, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a less than or equal to comparison between lanes of xbuff and ybuff.
 
unsigned int lt16 (v32int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a less than comparison between lanes of xbuff.
 
unsigned int lt16 (v16int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a less than comparison between lanes of xbuff.
 
unsigned int lt16 (v16int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, v16int32 ybuff, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a less than comparison between lanes of xbuff and ybuff.
 
unsigned int lt32 (v64int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a less than comparison between lanes of xbuff.
 
unsigned int lt32 (v32int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a less than comparison between lanes of xbuff.
 
unsigned int lt32 (v32int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, v32int16 ybuff, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a less than comparison between lanes of xbuff and ybuff.
 
unsigned int ne16 (v32int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a not equal to comparison between lanes of xbuff.
 
unsigned int ne16 (v16int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a not equal to comparison between lanes of xbuff.
 
unsigned int ne16 (v16int32 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, v16int32 ybuff, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi)
 Performs a not equal to comparison between lanes of xbuff and ybuff.
 
unsigned int ne32 (v64int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a not equal to comparison between lanes of xbuff.
 
unsigned int ne32 (v32int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a not equal to comparison between lanes of xbuff.
 
unsigned int ne32 (v32int16 xbuff, int xstart, unsigned int xoffsets, unsigned int xoffsets_hi, unsigned int xsquare, v32int16 ybuff, int ystart, unsigned int yoffsets, unsigned int yoffsets_hi, unsigned int ysquare)
 Performs a not equal to comparison between lanes of xbuff and ybuff.
 

Function Documentation

unsigned int eq16 ( v32int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a equal to comparison between lanes of xbuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] == x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
unsigned int eq16 ( v16int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a equal to comparison between lanes of xbuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] == x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 16 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
unsigned int eq16 ( v16int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
v16int32  ybuff,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a equal to comparison between lanes of xbuff and ybuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] == y[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 16 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ybuffInput buffer of 16 elements with 32-bit precision
ystartStarting position offset applied to all lanes of input from ybuffer for the second input
yoffsets4b offset for each lane, applied to the ybuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the ybuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
unsigned int eq32 ( v64int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a equal to comparison between lanes of xbuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] == x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 64 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
unsigned int eq32 ( v32int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a equal to comparison between lanes of xbuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] == x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
unsigned int eq32 ( v32int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
v32int16  ybuff,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a equal to comparison between lanes of xbuff and ybuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] == y[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ybuffInput buffer of 32 elements with 16-bit precision
ystartStarting position offset applied to all lanes of input from ybuffer for the second input
yoffsets4b offset for each lane, applied to the ybuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the ybuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
unsigned int ge16 ( v32int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a greater than or equal to comparison between lanes of xbuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] >= x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a greater than or equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int ge16 ( v16int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a greater than or equal to comparison between lanes of xbuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] >= x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a greater than or equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 16 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int ge16 ( v16int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
v16int32  ybuff,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a greater than or equal to comparison between lanes of xbuff and ybuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] >= y[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a greater than or equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 16 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ybuffInput buffer of 16 elements with 32-bit precision
ystartStarting position offset applied to all lanes of input from ybuffer for the second input
yoffsets4b offset for each lane, applied to the ybuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the ybuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff is the left operand and data from ybuff is the right operand.
unsigned int ge32 ( v64int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a greater than or equal to comparison between lanes of xbuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] >= x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a greater than or equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 64 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int ge32 ( v32int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a greater than or equal to comparison between lanes of xbuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] >= x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a greater than or equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int ge32 ( v32int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
v32int16  ybuff,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a greater than or equal to comparison between lanes of xbuff and ybuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] >= y[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a greater than or equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ybuffInput buffer of 32 elements with 16-bit precision
ystartStarting position offset applied to all lanes of input from ybuffer for the second input
yoffsets4b offset for each lane, applied to the ybuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the ybuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff is the left operand and data from ybuff is the right operand.
unsigned int gt16 ( v32int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a greater than comparison between lanes of xbuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] > x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a greater than comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int gt16 ( v16int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a greater than comparison between lanes of xbuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] > x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a greater than comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 16 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int gt16 ( v16int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
v16int32  ybuff,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a greater than comparison between lanes of xbuff and ybuff.

Buffers x and y are swapped, so parameters (offsets, start, step) follow the rules accordingly.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] > y[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a greater than comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 16 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ybuffInput buffer of 16 elements with 32-bit precision
ystartStarting position offset applied to all lanes of input from ybuffer for the second input
yoffsets4b offset for each lane, applied to the ybuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the ybuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff is the left operand and data from ybuff is the right operand.
unsigned int gt32 ( v64int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a greater than comparison between lanes of xbuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] > x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a greater than comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 64 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int gt32 ( v32int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a greater than comparison between lanes of xbuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] > x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a greater than comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int gt32 ( v32int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
v32int16  ybuff,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a greater than comparison between lanes of xbuff and ybuff.

Buffers x and y are swapped, so parameters (offsets, start, step) follow the rules accordingly.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] > y[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a greater than comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ybuffInput buffer of 32 elements with 16-bit precision
ystartStarting position offset applied to all lanes of input from ybuffer for the second input
yoffsets4b offset for each lane, applied to the ybuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the ybuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff is the left operand and data from ybuff is the right operand.
unsigned int le16 ( v32int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a less than or equal to comparison between lanes of xbuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] <= x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a less than or equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int le16 ( v16int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a less than or equal to comparison between lanes of xbuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] <= x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a less than or equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 16 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int le16 ( v16int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
v16int32  ybuff,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a less than or equal to comparison between lanes of xbuff and ybuff.

Buffers x and y are swapped, so parameters (offsets, start, step) follow the rules accordingly.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] <= y[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a less than or equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 16 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ybuffInput buffer of 16 elements with 32-bit precision
ystartStarting position offset applied to all lanes of input from ybuffer for the second input
yoffsets4b offset for each lane, applied to the ybuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the ybuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff is the left operand and data from ybuff is the right operand.
unsigned int le32 ( v64int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a less than or equal to comparison between lanes of xbuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] <= x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a less than or equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 64 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int le32 ( v32int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a less than or equal to comparison between lanes of xbuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] <= x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a less than or equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int le32 ( v32int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
v32int16  ybuff,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a less than or equal to comparison between lanes of xbuff and ybuff.

Buffers x and y are swapped, so parameters (offsets, start, step) follow the rules accordingly.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] <= y[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a less than or equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ybuffInput buffer of 32 elements with 16-bit precision
ystartStarting position offset applied to all lanes of input from ybuffer for the second input
yoffsets4b offset for each lane, applied to the ybuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the ybuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff is the left operand and data from ybuff is the right operand.
unsigned int lt16 ( v32int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a less than comparison between lanes of xbuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] < x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a less than comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int lt16 ( v16int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a less than comparison between lanes of xbuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] < x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a less than comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 16 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int lt16 ( v16int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
v16int32  ybuff,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a less than comparison between lanes of xbuff and ybuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] < y[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a less than comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 16 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ybuffInput buffer of 16 elements with 32-bit precision
ystartStarting position offset applied to all lanes of input from ybuffer for the second input
yoffsets4b offset for each lane, applied to the ybuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the ybuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff is the left operand and data from ybuff is the right operand.
unsigned int lt32 ( v64int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a less than comparison between lanes of xbuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] < x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a less than comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 64 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int lt32 ( v32int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a less than comparison between lanes of xbuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] < x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a less than comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff using 'xstart','xoffsets(_hi)' params is the left operand and data from xbuff using 'ystart','yoffsets(_hi)' params is the right operand.
unsigned int lt32 ( v32int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
v32int16  ybuff,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a less than comparison between lanes of xbuff and ybuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] < y[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a less than comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ybuffInput buffer of 32 elements with 16-bit precision
ystartStarting position offset applied to all lanes of input from ybuffer for the second input
yoffsets4b offset for each lane, applied to the ybuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the ybuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
  • Data from xbuff is the left operand and data from ybuff is the right operand.
unsigned int ne16 ( v32int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a not equal to comparison between lanes of xbuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] != x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a not equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
unsigned int ne16 ( v16int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a not equal to comparison between lanes of xbuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] != x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a not equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 16 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
unsigned int ne16 ( v16int32  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
v16int32  ybuff,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi 
)

Performs a not equal to comparison between lanes of xbuff and ybuff.

for (int i = 0; i < 16; i++)
idx = f( xstart, xoffsets[i]);
idy = f( ystart, yoffsets[i]);
cmp[i-th bit] = x[idx] != y[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0])
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7])
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a not equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 16 elements with 32-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 8th lane
ybuffInput buffer of 16 elements with 32-bit precision
ystartStarting position offset applied to all lanes of input from ybuffer for the second input
yoffsets4b offset for each lane, applied to the ybuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the ybuffer. LSB apply to 8th lane
Note
  • For more information on how the function f() selects data from the buffers go here.
unsigned int ne32 ( v64int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a not equal to comparison between lanes of xbuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] != x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a not equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 64 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
unsigned int ne32 ( v32int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a not equal to comparison between lanes of xbuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] != x[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a not equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ystartStarting position offset applied to all lanes of input from xbuffer for the second input
yoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.
unsigned int ne32 ( v32int16  xbuff,
int  xstart,
unsigned int  xoffsets,
unsigned int  xoffsets_hi,
unsigned int  xsquare,
v32int16  ybuff,
int  ystart,
unsigned int  yoffsets,
unsigned int  yoffsets_hi,
unsigned int  ysquare 
)

Performs a not equal to comparison between lanes of xbuff and ybuff.

for (int i = 0; i < 32; i++)
idx = f( xstart, xoffsets[i],xsquare);
idy = f( ystart, yoffsets[i],ysquare);
cmp[i-th bit] = x[idx] != y[idy]
xoffsets, xoffsets_hi, yoffsets, yoffsets_hi have 8 offset values each. 4 bits per offset.
For Example: for v16int32 output type, idx for output_lane_0 = f(xstart,xoffsets[0],xsquare)
For Example: for v16int32 output type, idx for output_lane_15 = f(xstart,xoffsets_hi[7],xsquare)
In case of v32int16, 1 offset is used for 2 adjacent lanes.
For more information on how the function f() selects data from the buffers refer to Lane selection note below.
Returns
Value of each bit is the result of a not equal to comparison between lanes of xbuff where the result of lane 0 goes to the LSB.
Parameters
xbuffInput buffer of 32 elements with 16-bit precision
xstartStarting position offset applied to all lanes of input from X buffer
xoffsets4b offset for each lane, applied to the xbuffer. LSB apply to first lane
xoffsets_hi4b offset for each lane, applied to the xbuffer. LSB apply to 16th lane
xsquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs to be less than 4. max value for this field is (0x3333)
ybuffInput buffer of 32 elements with 16-bit precision
ystartStarting position offset applied to all lanes of input from ybuffer for the second input
yoffsets4b offset for each lane, applied to the ybuffer. LSB apply to first lane
yoffsets_hi4b offset for each lane, applied to the ybuffer. LSB apply to 16th lane
ysquareSelect order of the mini-permute square (default=0x3210). LSB apply to first element. Value per lane needs be less than 4. max value for this field is (0x3333)
Note
  • This intrinsic uses the 'square' parameter, to have more information on how to use this please go here
  • For more information on how the function f() selects data from the buffers go here.