AI Engine API User Guide (AIE-API) 2024.1
|
AIE provides hardware support to accelerate special multiplications that can be used to accelerate specific application use cases like (but not limited to) signal processing.
Classes | |
struct | aie::sliding_mul_ch_ops< Outputs, Channels, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag > |
struct | aie::sliding_mul_ops< Lanes, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag > |
This type provides a parametrized multiplication that implements the following compute pattern: More... | |
struct | aie::sliding_mul_sym_ops< Lanes, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag > |
struct | aie::sliding_mul_sym_uct_ops< Lanes, Points, CoeffStep, DataStep, CoeffType, DataType, AccumTag > |
Typedefs | |
template<unsigned Outputs, unsigned Channels, unsigned Points, int CoeffStep, int DataStepX, ElemBaseType CoeffType, ElemBaseType DataType, AccumElemBaseType AccumTag = detail::default_accum_tag_t<CoeffType, DataType>> | |
using | aie::sliding_mul_ch_x_ops = sliding_mul_ch_ops< Outputs, Channels, Points, CoeffStep, DataStepX, 1, CoeffType, DataType, AccumTag > |
template<unsigned Outputs, unsigned Channels, unsigned Points, int CoeffStep, int DataStep, ElemBaseType CoeffType, ElemBaseType DataType, AccumElemBaseType AccumTag = detail::default_accum_tag_t<CoeffType, DataType>> | |
using | aie::sliding_mul_ch_xy_ops = sliding_mul_ch_ops< Outputs, Channels, Points, CoeffStep, DataStep, DataStep, CoeffType, DataType, AccumTag > |
template<unsigned Outputs, unsigned Channels, unsigned Points, int CoeffStep, int DataStepY, ElemBaseType CoeffType, ElemBaseType DataType, AccumElemBaseType AccumTag = detail::default_accum_tag_t<CoeffType, DataType>> | |
using | aie::sliding_mul_ch_y_ops = sliding_mul_ch_ops< Outputs, Channels, Points, CoeffStep, 1, DataStepY, CoeffType, DataType, AccumTag > |
template<unsigned Lanes, unsigned Points, int CoeffStep, int DataStepX, ElemBaseType CoeffType, ElemBaseType DataType, AccumElemBaseType AccumTag = detail::default_accum_tag_t<CoeffType, DataType>> | |
using | aie::sliding_mul_sym_x_ops = sliding_mul_sym_ops< Lanes, Points, CoeffStep, DataStepX, 1, CoeffType, DataType, AccumTag > |
template<unsigned Lanes, unsigned Points, int CoeffStep, int DataStepXY, ElemBaseType CoeffType, ElemBaseType DataType, AccumElemBaseType AccumTag = detail::default_accum_tag_t<CoeffType, DataType>> | |
using | aie::sliding_mul_sym_xy_ops = sliding_mul_sym_ops< Lanes, Points, CoeffStep, DataStepXY, DataStepXY, CoeffType, DataType, AccumTag > |
template<unsigned Lanes, unsigned Points, int CoeffStep, int DataStepY, ElemBaseType CoeffType, ElemBaseType DataType, AccumElemBaseType AccumTag = detail::default_accum_tag_t<CoeffType, DataType>> | |
using | aie::sliding_mul_sym_y_ops = sliding_mul_sym_ops< Lanes, Points, CoeffStep, 1, DataStepY, CoeffType, DataType, AccumTag > |
template<unsigned Lanes, unsigned Points, int CoeffStep, int DataStepX, ElemBaseType CoeffType, ElemBaseType DataType, AccumElemBaseType AccumTag = detail::default_accum_tag_t<CoeffType, DataType>> | |
using | aie::sliding_mul_x_ops = sliding_mul_ops< Lanes, Points, CoeffStep, DataStepX, 1, CoeffType, DataType, AccumTag > |
Similar to sliding_mul_ops, but DataStepY is always 1. | |
template<unsigned Lanes, unsigned Points, int CoeffStep, int DataStepXY, ElemBaseType CoeffType, ElemBaseType DataType, AccumElemBaseType AccumTag = detail::default_accum_tag_t<CoeffType, DataType>> | |
using | aie::sliding_mul_xy_ops = sliding_mul_ops< Lanes, Points, CoeffStep, DataStepXY, DataStepXY, CoeffType, DataType, AccumTag > |
Similar to sliding_mul_ops, but DataStepX is equal to DataStepY. | |
template<unsigned Lanes, unsigned Points, int CoeffStep, int DataStepY, ElemBaseType CoeffType, ElemBaseType DataType, AccumElemBaseType AccumTag = detail::default_accum_tag_t<CoeffType, DataType>> | |
using | aie::sliding_mul_y_ops = sliding_mul_ops< Lanes, Points, CoeffStep, 1, DataStepY, CoeffType, DataType, AccumTag > |
Similar to sliding_mul_ops, but DataStepX is always 1. | |
Functions | |
template<unsigned Lanes, AccumOrOp Acc, Vector VecCoeff, Vector VecData, size_t N> requires ((N >= 1) && Acc::size() == Lanes && is_valid_mul_op_v<typename VecCoeff::value_type, typename VecData::value_type>) | |
auto | aie::accumulate (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const std::array< VecData, N > &data) -> operand_base_type_t< Acc > |
Performs a weighted addition over multiple vectors and accumulates the result into an existing accumulator. | |
template<unsigned Lanes, AccumOrOp Acc, Vector VecCoeff, Vector VecData, Vector... NextVecData> requires ((is_same_vector_v<VecData, NextVecData> && ...) && Acc::size() == Lanes && is_valid_mul_op_v<typename VecCoeff::value_type, typename VecData::value_type>) | |
auto | aie::accumulate (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const VecData &data, const NextVecData &...next_data) -> operand_base_type_t< Acc > |
Performs a weighted addition over multiple vectors and accumulates the result into an existing accumulator. | |
template<unsigned Lanes, unsigned CoeffStart = 0, AccumElemBaseType AccumTag = accauto, Vector VecCoeff, Vector VecData, Vector... NextVecData> | |
auto | aie::accumulate (const VecCoeff &coeff, const VecData &data, const NextVecData &...next_data) |
template<unsigned Lanes, AccumElemBaseType AccumTag = accauto, Vector VecCoeff, Vector VecData, size_t N> requires (N >= 1) | |
auto | aie::accumulate (const VecCoeff &coeff, unsigned coeff_start, const std::array< VecData, N > &data) -> accum< detail::accum_tag_or_default_t< AccumTag, typename VecCoeff::value_type, typename VecData::value_type >, Lanes > |
Performs a weighted addition over multiple vectors. | |
template<unsigned Lanes, AccumElemBaseType AccumTag = accauto, Vector VecCoeff, Vector VecData, Vector... NextVecData> requires ((is_same_vector_v<VecData, NextVecData> && ...)) | |
auto | aie::accumulate (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, const NextVecData &...next_data) -> accum< detail::accum_tag_or_default_t< AccumTag, typename VecCoeff::value_type, typename VecData::value_type >, Lanes > |
Performs a weighted addition over multiple vectors. | |
struct aie::sliding_mul_ch_ops |
This type provides a parametrized multiplication that implements the following compute pattern:
Types (coeff x data) | Native lanes | Native points | Channels | CoeffStep | DataStep | coeff_start | data_start |
---|---|---|---|---|---|---|---|
8b x 8b | 4 | 4 | 8 | 1 | 1 | Unsigned | Signed |
8b x 8b | 8 | 8 | 4 | 1 | 1 | Unsigned | Signed |
Lanes = N * Native lanes
, N calls to the underlying intrinsic are made. For Lanes < Native lanes
, a single call is made and the requested lanes extracted .Outputs | Number of output samples per channel. |
Channels | Number of channels. |
Points | Number of data elements used to compute each lane. |
CoeffStep | Step used to select elements from the coeff buffer. This step is applied to element selection within a lane. |
DataStepX | Step used to select elements from the data buffer. This step is applied to element selection within a lane. |
DataStepY | Step used to select elements from the data buffer. This step is applied to element selection within a lane. |
CoeffType | Type of the coefficient elements. |
DataType | Type of the data elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
Public Types | |
using | accum_type = accum< detail::accum_tag_or_default_t< AccumTag, CoeffType, DataType >, Lanes > |
using | coeff_type = typename impl_type::coeff_type |
using | data_type = typename impl_type::data_type |
using | impl_type = detail::sliding_mul_ch< Outputs, Channels, Points, CoeffStep, DataStepX, DataStepY, accum_bits, CoeffType, DataType > |
enum class | MulType { Mul , Acc_Mul , NegMul } |
Static Public Member Functions | |
template<AccumOrOp Acc, VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<typename VecCoeff::value_type, typename VecData::value_type> && (VecCoeff::bits() <= max_coeff_bits)) | |
static constexpr accum_type | mac (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start) |
Performs a multiply-add with the pattern defined by the class parameters using the input coefficient and data arguments. | |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<typename VecCoeff::value_type, typename VecData::value_type> && (VecCoeff::bits() <= max_coeff_bits)) | |
static constexpr accum_type | mul (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start) |
Performs the multiplication pattern defined by the class parameters using the input coefficient and data arguments. | |
template<MulType Mul, VectorOrOp VecCoeff, VectorOrOp VecData, AccumOrOp... Acc> requires (is_valid_mul_op_v<typename VecCoeff::value_type, typename VecData::value_type>) | |
static constexpr accum_type | mul_common (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start, const Acc &...acc) |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<typename VecCoeff::value_type, typename VecData::value_type> && (VecCoeff::bits() <= max_coeff_bits)) | |
static constexpr accum_type | negmul (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start) |
Performs a negation of the multiplication pattern defined by the class parameters using the input coefficient and data arguments. | |
Static Public Attributes | |
static constexpr unsigned | columns_per_mul = impl_type::columns_per_mul |
static constexpr unsigned | Lanes = Outputs * Channels |
static constexpr unsigned | lanes = impl_type::lanes |
static constexpr unsigned | lanes_per_mul = impl_type::lanes_per_mul |
static constexpr unsigned | num_mul = impl_type::num_mul |
static constexpr unsigned | points = impl_type::points |
using aie::sliding_mul_ch_ops< Outputs, Channels, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag >::accum_type = accum<detail::accum_tag_or_default_t<AccumTag, CoeffType, DataType>, Lanes> |
using aie::sliding_mul_ch_ops< Outputs, Channels, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag >::coeff_type = typename impl_type::coeff_type |
using aie::sliding_mul_ch_ops< Outputs, Channels, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag >::data_type = typename impl_type::data_type |
using aie::sliding_mul_ch_ops< Outputs, Channels, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag >::impl_type = detail::sliding_mul_ch<Outputs, Channels, Points, CoeffStep, DataStepX, DataStepY, accum_bits, CoeffType, DataType> |
|
strong |
|
inlinestaticconstexpr |
Performs a multiply-add with the pattern defined by the class parameters using the input coefficient and data arguments.
acc | Accumulator that is added to the result of the multiplication. |
coeff | Vector of coefficients. Vectors limited to 256b and 512b on AIE and AIE-ML respectively. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
data_start | Index of the first data element to be used in the multiplication. |
|
inlinestaticconstexpr |
Performs the multiplication pattern defined by the class parameters using the input coefficient and data arguments.
coeff | Vector of coefficients. Vectors limited to 256b and 512b on AIE and AIE-ML respectively. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
data_start | Index of the first data element to be used in the multiplication. |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
Performs a negation of the multiplication pattern defined by the class parameters using the input coefficient and data arguments.
coeff | Vector of coefficients. Vectors limited to 256b and 512b on AIE and AIE-ML respectively. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
data_start | Index of the first data element to be used in the multiplication. |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
struct aie::sliding_mul_ops |
This type provides a parametrized multiplication that implements the following compute pattern:
Types (coeff x data) | Native accum. | Native lanes | Native points | CoeffStep | DataStepX | DataStepY | coeff_start | data_start | |
---|---|---|---|---|---|---|---|---|---|
8b x 8b | AIE-ML | acc32 | 32 | 8 | 1,2 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed |
16b x 16b | AIE | acc48 | 8 16 | 32/Lanes | 1,2,3,4 | 1 | 1 | Unsigned smaller than 16 | Signed |
AIE-ML | acc32 | 16 | 4 | 1,2 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
acc64 | 16 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | ||
16b x 32b | AIE | acc48 | 8 16 | 16/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2 1 | Unsigned smaller than 16 | Signed |
acc80 | 8 | 16/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2 | Unsigned smaller than 16 | Signed | ||
AIE-ML | acc64 | 16 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
32b x 16b | AIE | acc48 | 8 16 | 16/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2 1 | Unsigned smaller than 16 | Signed |
acc80 | 8 | 16/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2 | Unsigned smaller than 16 | Signed | ||
AIE-ML | acc64 | 16 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
32b x 32b | AIE | acc80 | 4 8 | 8/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
AIE-ML | acc64 | 16 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
16b x c16b | AIE | cacc48 | 4 8 | 16/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
AIE-ML | cacc64 | 161 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
16b x c32b | AIE | cacc48 | 4 8 | 8/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
cacc80 | 4 | 8/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed | ||
AIE-ML | cacc64 | 161 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
32b x c16b | AIE | cacc48 | 4 8 | 8/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
cacc80 | 4 | 8/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed | ||
AIE-ML | cacc64 | 161 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
32b x c32b | AIE | cacc80 | 2 4 | 4/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
AIE-ML | cacc64 | 161 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
c16b x 16b | AIE | cacc48 | 4 8 | 16/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
AIE-ML | cacc64 | 161 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
c16b x 32b | AIE | cacc48 | 4 8 | 8/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
cacc80 | 4 | 8/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed | ||
AIE-ML | cacc64 | 161 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
c16b x c16b | AIE | cacc48 | 4 8 | 8/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
AIE-ML | cacc64 | 161 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
c16b x c32b | AIE | cacc48 | 4 | 4/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
cacc80 | 4 | 4/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed | ||
AIE-ML | cacc64 | 81 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
c32b x 16b | AIE | cacc48 | 4 8 | 8/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
cacc80 | 4 | 8/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed | ||
AIE-ML | cacc64 | 161 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
c32b x 32b | AIE | cacc80 | 2 4 | 4/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
AIE-ML | cacc64 | 161 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
c32b x c16b | AIE | cacc48 | 4 | 4/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
cacc80 | 4 | 4/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed | ||
AIE-ML | cacc64 | 81 | 4 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
c32b x c32b | AIE | cacc80 | 2 | 2/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
AIE-ML | cacc64 | 8 | 1 | 1 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed |
Types (coeff x data) | Native accum. | Native lanes | Native points | CoeffStep | DataStepX | DataStepY | coeff_start | data_start | |
---|---|---|---|---|---|---|---|---|---|
bfloat16 x bfloat16 | AIE-ML | accfloat | 16 | 1 | 1,2 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed |
float x float | AIE | accfloat | 8 | 1 | 1,2,3,4 | 1,2,3,4 | 1,2 | Unsigned smaller than 16 | Signed |
AIE-ML | accfloat | 16 | 1 | 1,2 | 1,2 | 1,2 (needs to match DataStepX) | Unsigned | Signed | |
float x cfloat | AIE | caccfloat | 4 | 1 | 1,2,3 | 1,2,3,4 | 1,2,3 | Unsigned smaller than 16 | Signed |
cfloat x float | AIE | caccfloat | 4 | 1 | 1,2,3,4 | 1,2,3,4 | 1,2,3 | Unsigned smaller than 16 | Signed |
cfloat x cfloat | AIE | caccfloat | 4 | 1 | 1,2,3 | 1,2,3,4 | 1,2,3 | Unsigned smaller than 16 | Signed |
Lanes = N * Native lanes
, N calls to the underlying intrinsic are made. For Lanes < Native lanes
, a single call is made and the requested lanes extracted.Lanes | Number of output elements. |
Points | Number of data elements used to compute each lane. |
CoeffStep | Step used to select elements from the coeff buffer. This step is applied to element selection within a lane. |
DataStepX | Step used to select elements from the data buffer. This step is applied to element selection within a lane. |
DataStepY | Step used to select elements from the data buffer. This step is applied to element selection accross lanes. |
CoeffType | Type of the coefficient elements. |
DataType | Type of the data elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
Public Types | |
using | accum_type = accum< detail::accum_tag_or_default_t< AccumTag, CoeffType, DataType >, Lanes > |
using | coeff_type = typename impl_type::coeff_type |
using | data_type = typename impl_type::data_type |
using | impl_type = detail::sliding_mul< Lanes, Points, CoeffStep, DataStepX, DataStepY, accum_bits, CoeffType, DataType > |
enum class | MulType { Mul , Acc_Mul , NegMul } |
Static Public Member Functions | |
template<AccumOrOp Acc, VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<typename VecCoeff::value_type, typename VecData::value_type> && (VecCoeff::bits() <= max_coeff_bits)) | |
static constexpr accum_type | mac (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start) |
Performs a multiply-add with the pattern defined by the class parameters using the input coefficient and data arguments. | |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<typename VecCoeff::value_type, typename VecData::value_type> && (VecCoeff::bits() <= max_coeff_bits)) | |
static constexpr accum_type | mul (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start) |
Performs the multiplication pattern defined by the class parameters using the input coefficient and data arguments. | |
template<MulType Mul, VectorOrOp VecCoeff, VectorOrOp VecData, AccumOrOp... Acc> requires (is_valid_mul_op_v<typename VecCoeff::value_type, typename VecData::value_type>) | |
static constexpr accum_type | mul_common (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start, const Acc &...acc) |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<typename VecCoeff::value_type, typename VecData::value_type> && (VecCoeff::bits() <= max_coeff_bits)) | |
static constexpr accum_type | negmul (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start) |
Performs a negation of the multiplication pattern defined by the class parameters using the input coefficient and data arguments. | |
Static Public Attributes | |
static constexpr unsigned | columns_per_mul = impl_type::columns_per_mul |
static constexpr unsigned | lanes = impl_type::lanes |
static constexpr unsigned | lanes_per_mul = impl_type::lanes_per_mul |
static constexpr unsigned | num_mul = impl_type::num_mul |
static constexpr unsigned | points = impl_type::points |
using aie::sliding_mul_ops< Lanes, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag >::accum_type = accum<detail::accum_tag_or_default_t<AccumTag, CoeffType, DataType>, Lanes> |
using aie::sliding_mul_ops< Lanes, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag >::coeff_type = typename impl_type::coeff_type |
using aie::sliding_mul_ops< Lanes, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag >::data_type = typename impl_type::data_type |
using aie::sliding_mul_ops< Lanes, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag >::impl_type = detail::sliding_mul<Lanes, Points, CoeffStep, DataStepX, DataStepY, accum_bits, CoeffType, DataType> |
|
strong |
|
inlinestaticconstexpr |
Performs a multiply-add with the pattern defined by the class parameters using the input coefficient and data arguments.
acc | Accumulator that is added to the result of the multiplication. |
coeff | Vector of coefficients. Vectors limited to 256b and 512b on AIE and AIE-ML respectively. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
data_start | Index of the first data element to be used in the multiplication. |
|
inlinestaticconstexpr |
Performs the multiplication pattern defined by the class parameters using the input coefficient and data arguments.
coeff | Vector of coefficients. Vectors limited to 256b and 512b on AIE and AIE-ML respectively. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
data_start | Index of the first data element to be used in the multiplication. |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
Performs a negation of the multiplication pattern defined by the class parameters using the input coefficient and data arguments.
coeff | Vector of coefficients. Vectors limited to 256b and 512b on AIE and AIE-ML respectively. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
data_start | Index of the first data element to be used in the multiplication. |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
struct aie::sliding_mul_sym_ops |
This type provides a parametrized multiplication that implements the following compute pattern:
Types (coeff x data) | Native lanes | Native points | CoeffStep | DataStepX | DataStepY | coeff_start | data_start |
---|---|---|---|---|---|---|---|
16b x 16b | 8 16 | 64/Lanes | 1,2,3,4 | 1 | 1 | Unsigned smaller than 16 | Signed |
16b x 32b | 8 16 | 32/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2 1 | Unsigned smaller than 16 | Signed |
32b x 16b | 8 16 | 32/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2 1 | Unsigned smaller than 16 | Signed |
16b x c16b | 4 8 | 32/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
c16b x 16b | 4 8 | 32/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
c16b x c16b | 4 8 | 16/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
c16b x 32b | 4 8 | 16/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
32b x c16b | 4 8 | 16/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
c32b x 16b | 4 8 | 16/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
16b x c32b | 4 8 | 16/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
c32b x c16b | 4 | 2 | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
c16b x c32b | 4 | 2 | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
Types (coeff x data) | Native lanes | Native points | CoeffStep | DataStepX | DataStepY | coeff_start | data_start |
---|---|---|---|---|---|---|---|
32b x 16b | 8 | 4 | 1,2,3,4 | 1,2,3,4 | 1,2 | Unsigned smaller than 16 | Signed |
16b x 32b | 8 | 4 | 1,2,3,4 | 1,2,3,4 | 1,2 | Unsigned smaller than 16 | Signed |
32b x 32b | 4 | 4 | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 1,2 | Unsigned smaller than 16 | Signed |
32b x c16b | 4 | 4 | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
c16b x 32b | 4 | 4 | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
c32b x 16b | 4 | 2 | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
16b x c32b | 4 | 4 | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
c32b x c16b | 4 | 2 | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
c16b x c32b | 4 | 2 | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
c32b x 32b | 2 4 | 8/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
32b x c32b | 2 4 | 8/Lanes | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
c32b x c32b | 2 | 2 | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
Lanes = N * Native lanes
, N calls to the underlying intrinsic are made. For Lanes < Native lanes
, a single call is made and the requested lanes extracted.Lanes | Number of output elements. |
Points | Number of data elements used to compute each lane. |
CoeffStep | Step used to select elements from the coeff buffer. This step is applied to element selection within a lane. |
DataStepX | Step used to select elements from the data buffer. This step is applied to element selection within a lane. |
DataStepY | Step used to select elements from the data buffer. This step is applied to element selection across lanes. |
CoeffType | Type of the coefficient elements. |
DataType | Type of the data elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
Public Types | |
using | accum_type = accum< detail::accum_tag_or_default_t< AccumTag, CoeffType, DataType >, Lanes > |
using | coeff_type = typename impl_type::coeff_type |
using | data_type = typename impl_type::data_type |
using | impl_type = detail::sliding_mul_sym< Lanes, Points, CoeffStep, DataStepX, DataStepY, accum_bits, CoeffType, DataType > |
enum class | SymMulType { Sym , Antisym , Acc_Sym , Acc_Antisym } |
Static Public Member Functions | |
template<AccumOrOp Acc, VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mac_antisym (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start) |
Performs the antisymmetric multiply-add pattern defined by the class parameters using the input coefficient and data arguments. | |
template<AccumOrOp Acc, VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mac_antisym (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned ldata_start, unsigned rdata_start) |
Performs the antisymmetric multiply-add pattern defined by the class parameters using the input coefficient and data arguments. | |
template<AccumOrOp Acc, VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mac_antisym (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const VecData &ldata, unsigned ldata_start, const VecData &rdata, unsigned rdata_start) |
Performs the antisymmetric multiply-add pattern defined by the class parameters using the input coefficient and data arguments. | |
template<AccumOrOp Acc, VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mac_sym (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start) |
Performs the symmetric multiply-add pattern defined by the class parameters using the input coefficient and data arguments. | |
template<AccumOrOp Acc, VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mac_sym (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned ldata_start, unsigned rdata_start) |
Performs the symmetric multiply-add pattern defined by the class parameters using the input coefficient and data arguments. | |
template<AccumOrOp Acc, VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mac_sym (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const VecData &ldata, unsigned ldata_start, const VecData &rdata, unsigned rdata_start) |
Performs the symmetric multiply-add pattern defined by the class parameters using the input coefficient and data arguments. | |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_antisym (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start) |
Performs the antisymmetric multiplication pattern defined by the class parameters using the input coefficient and data arguments. | |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_antisym (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned ldata_start, unsigned rdata_start) |
Performs the antisymmetric multiplication pattern defined by the class parameters using the input coefficient and data arguments. | |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_antisym (const VecCoeff &coeff, unsigned coeff_start, const VecData &ldata, unsigned ldata_start, const VecData &rdata, unsigned rdata_start) |
Performs the antisymmetric multiplication pattern defined by the class parameters using the input coefficient and data arguments. | |
template<SymMulType MulType, VectorOrOp VecCoeff, VectorOrOp VecData, AccumOrOp... Acc> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_common (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start, const Acc &...acc) |
template<SymMulType MulType, VectorOrOp VecCoeff, VectorOrOp VecData, AccumOrOp... Acc> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_common (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned ldata_start, unsigned rdata_start, const Acc &...acc) |
template<SymMulType MulType, VectorOrOp VecCoeff, VectorOrOp VecData, AccumOrOp... Acc> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_common (const VecCoeff &coeff, unsigned coeff_start, const VecData &ldata, unsigned ldata_start, const VecData &rdata, unsigned rdata_start, const Acc &...acc) |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_sym (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start) |
Performs the symmetric multiplication pattern defined by the class parameters using the input coefficient and data arguments. | |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_sym (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned ldata_start, unsigned rdata_start) |
Performs the symmetric multiplication pattern defined by the class parameters using the input coefficient and data arguments. | |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_sym (const VecCoeff &coeff, unsigned coeff_start, const VecData &ldata, unsigned ldata_start, const VecData &rdata, unsigned rdata_start) |
Performs the symmetric multiplication pattern defined by the class parameters using the input coefficient and data arguments. | |
Static Public Attributes | |
static constexpr unsigned | columns_per_mul = impl_type::columns_per_mul |
static constexpr unsigned | lanes = impl_type::lanes |
static constexpr unsigned | lanes_per_mul = impl_type::lanes_per_mul |
static constexpr unsigned | num_mul = impl_type::num_mul |
static constexpr unsigned | points = impl_type::points |
using aie::sliding_mul_sym_ops< Lanes, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag >::accum_type = accum<detail::accum_tag_or_default_t<AccumTag, CoeffType, DataType>, Lanes> |
using aie::sliding_mul_sym_ops< Lanes, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag >::coeff_type = typename impl_type::coeff_type |
using aie::sliding_mul_sym_ops< Lanes, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag >::data_type = typename impl_type::data_type |
using aie::sliding_mul_sym_ops< Lanes, Points, CoeffStep, DataStepX, DataStepY, CoeffType, DataType, AccumTag >::impl_type = detail::sliding_mul_sym<Lanes, Points, CoeffStep, DataStepX, DataStepY, accum_bits, CoeffType, DataType> |
|
strong |
|
inlinestaticconstexpr |
Performs the antisymmetric multiply-add pattern defined by the class parameters using the input coefficient and data arguments.
This variant allows two separate start indices for left/right elements.
acc | Accumulator to be added to the result of the multiplication. |
coeff | Vector of coefficients. On AIE the size is limited to vectors of up to 256 bits. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
data_start | Index of the first data element to be used in the multiplication. |
|
inlinestaticconstexpr |
Performs the antisymmetric multiply-add pattern defined by the class parameters using the input coefficient and data arguments.
This variant allows two separate start indices for left/right elements.
acc | Accumulator to be added to the result of the multiplication. |
coeff | Vector of coefficients. On AIE the size is limited to vectors of up to 256 bits. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
ldata_start | Index of the first left data element to be used in the multiplication. |
rdata_start | Index of the first right data element to be used in the multiplication. |
|
inlinestaticconstexpr |
Performs the antisymmetric multiply-add pattern defined by the class parameters using the input coefficient and data arguments.
This variant uses two input buffers for left/right elements.
acc | Accumulator to be added to the result of the multiplication. |
coeff | Vector of coefficients. On AIE the size is limited to vectors of up to 256 bits. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
ldata | Vector of left data samples. The size is limitted to vectors of up to 512 bits. |
ldata_start | Index of the first left data element to be used in the multiplication. |
rdata | Vector of right data samples. The size is limitted to vectors of up to 512 bits. |
rdata_start | Index of the first right data element to be used in the multiplication. |
|
inlinestaticconstexpr |
Performs the symmetric multiply-add pattern defined by the class parameters using the input coefficient and data arguments.
acc | Accumulator to be added to the result of the multiplication. |
coeff | Vector of coefficients. On AIE the size is limited to vectors of up to 256 bits. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
data_start | Index of the first data element to be used in the multiplication. |
|
inlinestaticconstexpr |
Performs the symmetric multiply-add pattern defined by the class parameters using the input coefficient and data arguments.
This variant allows two separate start indices for left/right elements.
acc | Accumulator to be added to the result of the multiplication. |
coeff | Vector of coefficients. On AIE the size is limited to vectors of up to 256 bits. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
ldata_start | Index of the first left data element to be used in the multiplication. |
rdata_start | Index of the first right data element to be used in the multiplication. |
|
inlinestaticconstexpr |
Performs the symmetric multiply-add pattern defined by the class parameters using the input coefficient and data arguments.
This variant uses two input buffers for left/right elements.
acc | Accumulator to be added to the result of the multiplication. |
coeff | Vector of coefficients. On AIE the size is limited to vectors of up to 256 bits. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
ldata | Vector of left data samples. The size is limitted to vectors of up to 512 bits. |
ldata_start | Index of the first left data element to be used in the multiplication. |
rdata | Vector of right data samples. The size is limitted to vectors of up to 512 bits. |
rdata_start | Index of the first right data element to be used in the multiplication. |
|
inlinestaticconstexpr |
Performs the antisymmetric multiplication pattern defined by the class parameters using the input coefficient and data arguments.
This variant allows two separate start indices for left/right elements.
coeff | Vector of coefficients. On AIE the size is limited to vectors of up to 256 bits. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
data_start | Index of the first data element to be used in the multiplication. |
|
inlinestaticconstexpr |
Performs the antisymmetric multiplication pattern defined by the class parameters using the input coefficient and data arguments.
This variant allows two separate start indices for left/right elements.
coeff | Vector of coefficients. On AIE the size is limited to vectors of up to 256 bits. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
ldata_start | Index of the first left data element to be used in the multiplication. |
rdata_start | Index of the first right data element to be used in the multiplication. |
|
inlinestaticconstexpr |
Performs the antisymmetric multiplication pattern defined by the class parameters using the input coefficient and data arguments.
This variant uses two input buffers for left/right elements.
coeff | Vector of coefficients. On AIE the size is limited to vectors of up to 256 bits. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
ldata | Vector of left data samples. The size is limitted to vectors of up to 512 bits. |
ldata_start | Index of the first left data element to be used in the multiplication. |
rdata | Vector of right data samples. The size is limitted to vectors of up to 512 bits. |
rdata_start | Index of the first right data element to be used in the multiplication. |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
Performs the symmetric multiplication pattern defined by the class parameters using the input coefficient and data arguments.
coeff | Vector of coefficients. On AIE the size is limited to vectors of up to 256 bits. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
data_start | Index of the first data element to be used in the multiplication. |
|
inlinestaticconstexpr |
Performs the symmetric multiplication pattern defined by the class parameters using the input coefficient and data arguments.
This variant allows two separate start indices for left/right elements.
coeff | Vector of coefficients. On AIE the size is limited to vectors of up to 256 bits. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
data | Vector of data samples. |
ldata_start | Index of the first left data element to be used in the multiplication. |
rdata_start | Index of the first right data element to be used in the multiplication. |
|
inlinestaticconstexpr |
Performs the symmetric multiplication pattern defined by the class parameters using the input coefficient and data arguments.
This variant uses two input buffers for left/right elements.
coeff | Vector of coefficients. On AIE the size is limited to vectors of up to 256 bits. |
coeff_start | Index of the first coefficient element to be used in the multiplication. |
ldata | Vector of left data samples. The size is limitted to vectors of up to 512 bits. |
ldata_start | Index of the first left data element to be used in the multiplication. |
rdata | Vector of right data samples. The size is limitted to vectors of up to 512 bits. |
rdata_start | Index of the first right data element to be used in the multiplication. |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
struct aie::sliding_mul_sym_uct_ops |
This type provides a parametrized multiplication across the lower half of its lanes (equivalent to sliding_mul_sym_ops), and upshifts one selected set of data in the upper half of the lanes.
It implements the following compute pattern:
Types (coeff x data) | Native lanes | Native points | CoeffStep | DataStepX | DataStepY | coeff_start | data_start |
---|---|---|---|---|---|---|---|
c16b x c16b | 4 | 4 | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
32b x c16b | 4 | 4 | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
c32b x c16b | 4 | 2 | 1,2,3,4 | 1,2,3,4 | 1,2,3,4 | Unsigned smaller than 16 | Signed |
Lanes = N * Native lanes
, N calls to the underlying intrinsic are made. For Lanes < Native lanes
, a single call is made and the requested lanes extracted.Lanes | Number of output elements. |
Points | Number of data elements used to compute each lane in the first half of the output Lanes. |
CoeffStep | Step used to select elements from the coeff buffer. This step is applied to element selection within a lane. |
DataStep | Step used to select elements from the data buffer. This step is applied to element selection within a lane and across lanes. |
CoeffType | Type of the coefficient elements. |
DataType | Type of the data elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
Public Types | |
using | accum_type = accum< detail::accum_tag_or_default_t< AccumTag, CoeffType, DataType >, Lanes > |
using | coeff_type = typename impl_type::coeff_type |
using | data_type = typename impl_type::data_type |
using | impl_type = detail::sliding_mul_sym_uct< Lanes, Points, CoeffStep, DataStep, accum_bits, CoeffType, DataType > |
enum class | SymMulType { Sym , Antisym , Acc_Sym , Acc_Antisym } |
Static Public Member Functions | |
template<AccumOrOp Acc, VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mac_antisym_uct (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start, unsigned uct_shift) |
template<AccumOrOp Acc, VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mac_antisym_uct (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const VecData &ldata, unsigned ldata_start, const VecData &rdata, unsigned rdata_start, unsigned uct_shift) |
template<AccumOrOp Acc, VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mac_sym_uct (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start, unsigned uct_shift) |
template<AccumOrOp Acc, VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mac_sym_uct (const Acc &acc, const VecCoeff &coeff, unsigned coeff_start, const VecData &ldata, unsigned ldata_start, const VecData &rdata, unsigned rdata_start, unsigned uct_shift) |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_antisym_uct (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start, unsigned uct_shift) |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_antisym_uct (const VecCoeff &coeff, unsigned coeff_start, const VecData &ldata, unsigned ldata_start, const VecData &rdata, unsigned rdata_start, unsigned uct_shift) |
template<SymMulType MulType, VectorOrOp VecCoeff, VectorOrOp VecData, AccumOrOp... Acc> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_common (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start, unsigned uct_shift, const Acc &...acc) |
template<SymMulType MulType, VectorOrOp VecCoeff, VectorOrOp VecData, AccumOrOp... Acc> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_common (const VecCoeff &coeff, unsigned coeff_start, const VecData &ldata, unsigned ldata_start, const VecData &rdata, unsigned rdata_start, unsigned uct_shift, const Acc &...acc) |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_sym_uct (const VecCoeff &coeff, unsigned coeff_start, const VecData &data, unsigned data_start, unsigned uct_shift) |
template<VectorOrOp VecCoeff, VectorOrOp VecData> requires (is_valid_mul_op_v<CoeffType, DataType>) | |
static constexpr accum_type | mul_sym_uct (const VecCoeff &coeff, unsigned coeff_start, const VecData &ldata, unsigned ldata_start, const VecData &rdata, unsigned rdata_start, unsigned uct_shift) |
Static Public Attributes | |
static constexpr unsigned | columns_per_mul = impl_type::columns_per_mul |
static constexpr unsigned | lanes = impl_type::lanes |
static constexpr unsigned | lanes_per_mul = impl_type::lanes_per_mul |
static constexpr unsigned | num_mul = impl_type::num_mul |
static constexpr unsigned | points = impl_type::points |
using aie::sliding_mul_sym_uct_ops< Lanes, Points, CoeffStep, DataStep, CoeffType, DataType, AccumTag >::accum_type = accum<detail::accum_tag_or_default_t<AccumTag, CoeffType, DataType>, Lanes> |
using aie::sliding_mul_sym_uct_ops< Lanes, Points, CoeffStep, DataStep, CoeffType, DataType, AccumTag >::coeff_type = typename impl_type::coeff_type |
using aie::sliding_mul_sym_uct_ops< Lanes, Points, CoeffStep, DataStep, CoeffType, DataType, AccumTag >::data_type = typename impl_type::data_type |
using aie::sliding_mul_sym_uct_ops< Lanes, Points, CoeffStep, DataStep, CoeffType, DataType, AccumTag >::impl_type = detail::sliding_mul_sym_uct<Lanes, Points, CoeffStep, DataStep, accum_bits, CoeffType, DataType> |
|
strong |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
|
inlinestaticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
|
staticconstexpr |
using aie::sliding_mul_ch_x_ops = typedef sliding_mul_ch_ops<Outputs, Channels, Points, CoeffStep, DataStepX, 1, CoeffType, DataType, AccumTag> |
Similar to sliding_mul_ch_ops, but DataStepY is always 1.
Outputs | Number of output samples. |
Channels | Number of channels. |
Points | Number of data elements used to compute each lane. |
CoeffStep | Step used to select elements from the coeff buffer. This step is applied to element selection within a lane. |
DataStepX | Step used to select elements from the data buffer. This step is applied to element selection within a lane. |
CoeffType | Type of the coefficient elements. |
DataType | Type of the data elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
using aie::sliding_mul_ch_xy_ops = typedef sliding_mul_ch_ops<Outputs, Channels, Points, CoeffStep, DataStep, DataStep, CoeffType, DataType, AccumTag> |
Similar to sliding_mul_ch_ops, but DataStepX is equal to DataStepY.
Outputs | Number of output samples. |
Channels | Number of channels. |
Points | Number of data elements used to compute each lane. |
CoeffStep | Step used to select elements from the coeff buffer. This step is applied to element selection within a lane. |
DataStep | Step used to select elements from the data buffer. This step is applied to element selection within a lane. |
CoeffType | Type of the coefficient elements. |
DataType | Type of the data elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
using aie::sliding_mul_ch_y_ops = typedef sliding_mul_ch_ops<Outputs, Channels, Points, CoeffStep, 1, DataStepY, CoeffType, DataType, AccumTag> |
Similar to sliding_mul_ch_ops, but DataStepX is always 1.
Outputs | Number of output samples. |
Channels | Number of channels. |
Points | Number of data elements used to compute each lane. |
CoeffStep | Step used to select elements from the coeff buffer. This step is applied to element selection within a lane. |
DataStepY | Step used to select elements from the data buffer. This step is applied to element selection within a lane. |
CoeffType | Type of the coefficient elements. |
DataType | Type of the data elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
using aie::sliding_mul_sym_x_ops = typedef sliding_mul_sym_ops<Lanes, Points, CoeffStep, DataStepX, 1, CoeffType, DataType, AccumTag> |
Similar to sliding_mul_sym_ops, but DataStepY is always 1.
Lanes | Number of output elements. |
Points | Number of data elements used to compute each lane. |
CoeffStep | Step used to select elements from the coeff buffer. This step is applied to element selection within a lane. |
DataStepX | Step used to select elements from the data buffer. This step is applied to element selection within a lane. |
CoeffType | Type of the coefficient elements. |
DataType | Type of the data elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
using aie::sliding_mul_sym_xy_ops = typedef sliding_mul_sym_ops<Lanes, Points, CoeffStep, DataStepXY, DataStepXY, CoeffType, DataType, AccumTag> |
Similar to sliding_mul_sym_ops, but DataStepX is equal to DataStepY.
Lanes | Number of output elements. |
Points | Number of data elements used to compute each lane. |
CoeffStep | Step used to select elements from the coeff buffer. This step is applied to element selection within a lane. |
DataStepXY | Step used to select elements from the data buffer. This step is applied to element selection within a lane and across lanes. |
CoeffType | Type of the coefficient elements. |
DataType | Type of the data elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
using aie::sliding_mul_sym_y_ops = typedef sliding_mul_sym_ops<Lanes, Points, CoeffStep, 1, DataStepY, CoeffType, DataType, AccumTag> |
Similar to sliding_mul_sym_ops, but DataStepX is always 1.
Lanes | Number of output elements. |
Points | Number of data elements used to compute each lane. |
CoeffStep | Step used to select elements from the coeff buffer. This step is applied to element selection within a lane. |
DataStepY | Step used to select elements from the data buffer. This step is applied to element selection across lanes. |
CoeffType | Type of the coefficient elements. |
DataType | Type of the data elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
using aie::sliding_mul_x_ops = typedef sliding_mul_ops<Lanes, Points, CoeffStep, DataStepX, 1, CoeffType, DataType, AccumTag> |
Similar to sliding_mul_ops, but DataStepY is always 1.
Lanes | Number of output elements. |
Points | Number of data elements used to compute each lane. |
CoeffStep | Step used to select elements from the coeff buffer. This step is applied to element selection within a lane. |
DataStepX | Step used to select elements from the data buffer. This step is applied to element selection within a lane. |
CoeffType | Type of the coefficient elements. |
DataType | Type of the data elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
using aie::sliding_mul_xy_ops = typedef sliding_mul_ops<Lanes, Points, CoeffStep, DataStepXY, DataStepXY, CoeffType, DataType, AccumTag> |
Similar to sliding_mul_ops, but DataStepX is equal to DataStepY.
Lanes | Number of output elements. |
Points | Number of data elements used to compute each lane. |
CoeffStep | Step used to select elements from the coeff buffer. This step is applied to element selection within a lane. |
DataStepXY | Step used to select elements from the data buffer. This step is applied to element selection within a lane and across lanes. |
CoeffType | Type of the coefficient elements. |
DataType | Type of the data elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
using aie::sliding_mul_y_ops = typedef sliding_mul_ops<Lanes, Points, CoeffStep, 1, DataStepY, CoeffType, DataType, AccumTag> |
Similar to sliding_mul_ops, but DataStepX is always 1.
Lanes | Number of output elements. |
Points | Number of data elements used to compute each lane. |
CoeffStep | Step used to select elements from the coeff buffer. This step is applied to element selection within a lane. |
DataStepY | Step used to select elements from the data buffer. This step is applied to element selection accross lanes. |
CoeffType | Type of the coefficient elements. |
DataType | Type of the data elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
auto aie::accumulate | ( | const Acc & | acc, |
const VecCoeff & | coeff, | ||
unsigned | coeff_start, | ||
const std::array< VecData, N > & | data | ||
) | -> operand_base_type_t<Acc> |
Performs a weighted addition over multiple vectors and accumulates the result into an existing accumulator.
Given a finite number of vectors \( d_1, d_2, ..., d_C \) and a weight vector \( coef = ( \alpha_1, \alpha_2, ..., \alpha_n ) \) for \( n \geq (C + start) \) and an accumulator \( acc \), the result is an accumulator of the form \( out = acc + \sum_{i=1}^{C} \alpha_{i + start} * d_i \).
Each of the data vectors is scaled by its corresponding element of coeff vector.
The operation consists on a parametrized multiplication that implements the following compute pattern:
In the following example, accumulate multiplies data0 by 1, data1 by 2 and data2 by 3 respectively:
Lanes | Number of output elements. |
acc | Accumulator to which the result of the accumulation is added (or subtracted). The type must meet aie::AccumOrOp. |
coeff | Vector of coefficients. Size is limited to 256b and 512b on AIE and AIE-ML respectively. |
coeff_start | First element from the coeff vector to be used. |
data | Array of data vectors |
auto aie::accumulate | ( | const Acc & | acc, |
const VecCoeff & | coeff, | ||
unsigned | coeff_start, | ||
const VecData & | data, | ||
const NextVecData &... | next_data | ||
) | -> operand_base_type_t<Acc> |
Performs a weighted addition over multiple vectors and accumulates the result into an existing accumulator.
Given a finite number of vectors \( d_1, d_2, ..., d_C \) and a weight vector \( coef = ( \alpha_1, \alpha_2, ..., \alpha_n ) \) for \( n \geq (C + start) \) and an accumulator \( acc \), the result is an accumulator of the form \( out = acc + \sum_{i=1}^{C} \alpha_{i + start} * d_i \).
Each of the data vectors is scaled by its corresponding element of coeff vector.
The operation consists on a parametrized multiplication that implements the following compute pattern:
In the following example, accumulate multiplies data0 by 1, data1 by 2 and data2 by 3 respectively:
Lanes | Number of output elements. |
acc | Accumulator to which the result of the accumulation is added (or subtracted). The type must meet aie::AccumOrOp. |
coeff | Vector of coefficients. Size is limited to 256b and 512b on AIE and AIE-ML respectively. |
coeff_start | First element from the coeff vector to be used. |
data | First vector of data. |
next_data | Remaining data vectors. |
auto aie::accumulate | ( | const VecCoeff & | coeff, |
const VecData & | data, | ||
const NextVecData &... | next_data | ||
) |
auto aie::accumulate | ( | const VecCoeff & | coeff, |
unsigned | coeff_start, | ||
const std::array< VecData, N > & | data | ||
) | -> accum<detail::accum_tag_or_default_t<AccumTag, typename VecCoeff::value_type, typename VecData::value_type>, Lanes> |
Performs a weighted addition over multiple vectors.
Given a finite number of vectors \( d_1, d_2, ..., d_C \) and a weight vector \( coef = ( \alpha_1, \alpha_2, ..., \alpha_n ) \), for \( n \geq (C + start) \), the result is an accumulator of the form \( out = \sum_{i=1}^{C} \alpha_{i + start} * d_i \).
Each of the data vectors is scaled by its corresponding element of coeff vector.
This function provides a parametrized multiplication that implements the following compute pattern:
Lanes | Number of output elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
coeff | Vector of coefficients. Vectors limited to 256b and 512b on AIE and AIE-ML respectively. |
coeff_start | First element from the coeff vector to be used. |
data | Array of data vectors |
auto aie::accumulate | ( | const VecCoeff & | coeff, |
unsigned | coeff_start, | ||
const VecData & | data, | ||
const NextVecData &... | next_data | ||
) | -> accum<detail::accum_tag_or_default_t<AccumTag, typename VecCoeff::value_type, typename VecData::value_type>, Lanes> |
Performs a weighted addition over multiple vectors.
Given a finite number of vectors \( d_1, d_2, ..., d_C \) and a weight vector \( coef = ( \alpha_1, \alpha_2, ..., \alpha_n ) \), for \( n \geq (C + start) \), the result is an accumulator of the form \( out = \sum_{i=1}^{C} \alpha_{i + start} * d_i \).
Each of the data vectors is scaled by its corresponding element of coeff vector.
This function provides a parametrized multiplication that implements the following compute pattern:
Lanes | Number of output elements. |
AccumTag | Accumulator tag that specifies the required accumulation bits. The class must be compatible with the result of the multiplication of the coefficient and data types (real/complex). |
coeff | Vector of coefficients. Vectors limited to 256b and 512b on AIE and AIE-ML respectively. |
coeff_start | First element from the coeff vector to be used. |
data | First vector of data. |
next_data | Rest of the data vectors. |