Basic Math Functions for Neural Network Computation
-
void riscv_nn_accumulate_q7_to_q15(q15_t *pDst, const q7_t *pSrc, uint32_t length)
-
void riscv_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size)
-
q7_t *riscv_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs, const q7_t *rhs, const int32_t input_offset, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, q7_t *out)
-
q7_t *riscv_nn_depthwise_conv_nt_t_s8(const q7_t *lhs, const q7_t *rhs, const int32_t input_offset, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, q7_t *out)
-
riscv_status riscv_nn_mat_mul_core_1x_s8(int32_t row_elements, const int8_t *row_base, const int8_t *col_base, int32_t *const sum_col, int32_t *const output)
-
int8_t *riscv_nn_mat_mul_core_4x_s8(const int32_t row_elements, const int32_t offset, const int8_t *row_base, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)
-
riscv_status riscv_nn_mat_mult_nt_t_s8(const q7_t *lhs, const q7_t *rhs, const q31_t *bias, q7_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max)
-
void riscv_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t blockSize)
-
void riscv_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize)
-
riscv_status riscv_nn_vec_mat_mult_t_s16(const q15_t *lhs, const q7_t *rhs, const q63_t *bias, q15_t *dst, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max)
-
riscv_status riscv_nn_vec_mat_mult_t_s8(const q7_t *lhs, const q7_t *rhs, const q31_t *bias, q7_t *dst, const int32_t lhs_offset, const int32_t rhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max, const int32_t address_offset)
-
riscv_status riscv_nn_vec_mat_mult_t_svdf_s8(const q7_t *lhs, const q7_t *rhs, q15_t *dst, const int32_t lhs_offset, const int32_t rhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max)
- group NNBasicMath
Basic Math Functions for Neural Network Computation.
Functions
-
void riscv_nn_accumulate_q7_to_q15(q15_t *pDst, const q7_t *pSrc, uint32_t length)
Converts the elements from a q7 vector and accumulate to a q15 vector.
The equation used for the conversion process is:
- Description:
- Parameters
*src – [in] points to the q7 input vector
*dst – [out] points to the q15 output vector
block_size – [in] length of the input vector
-
void riscv_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size)
Non-saturating addition of elements of a q7 vector.
2^24 samples can be added without saturating the result.
- Description:
The equation used for the conversion process is:
- Parameters
*input – [in] Pointer to the q7 input vector
*output – [out] Pointer to the q31 output variable.
block_size – [in] length of the input vector
-
q7_t *riscv_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs, const q7_t *rhs, const int32_t input_offset, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, q7_t *out)
Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs.
Note
If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following.
Output shift
Output multiplier
Output bias
rhs
- Parameters
lhs – [in] Input left-hand side matrix
rhs – [in] Input right-hand side matrix (transposed)
lhs_offset – [in] LHS matrix offset(input offset). Range: -127 to 128
num_ch – [in] Number of channels in LHS/RHS
out_shift – [in] Per channel output shift. Length of vector is equal to number of channels
out_mult – [in] Per channel output multiplier. Length of vector is equal to number of channels
out_offset – [in] Offset to be added to the output values. Range: -127 to 128
activation_min – [in] Minimum value to clamp the output to. Range: int8
activation_max – [in] Maximum value to clamp the output to. Range: int8
row_x_col – [in] (row_dimension * col_dimension) of LHS/RHS matrix
output_bias – [in] Per channel output bias. Length of vector is equal to number of channels
out – [in] Output pointer
- Returns
The function returns one of the two
Updated output pointer if an implementation is available
NULL if no implementation is available.
-
q7_t *riscv_nn_depthwise_conv_nt_t_s8(const q7_t *lhs, const q7_t *rhs, const int32_t input_offset, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, q7_t *out)
Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. Dimensions are the same for lhs and rhs.
Note
If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following.
Output shift
Output multiplier
Output bias
rhs
- Parameters
lhs – [in] Input left-hand side matrix
rhs – [in] Input right-hand side matrix (transposed)
lhs_offset – [in] LHS matrix offset(input offset). Range: -127 to 128
num_ch – [in] Number of channels in LHS/RHS
out_shift – [in] Per channel output shift. Length of vector is equal to number of channels.
out_mult – [in] Per channel output multiplier. Length of vector is equal to number of channels.
out_offset – [in] Offset to be added to the output values. Range: -127 to 128
activation_min – [in] Minimum value to clamp the output to. Range: int8
activation_max – [in] Maximum value to clamp the output to. Range: int8
row_x_col – [in] (row_dimension * col_dimension) of LHS/RHS matrix
output_bias – [in] Per channel output bias. Length of vector is equal to number of channels.
out – [in] Output pointer
- Returns
The function returns one of the two
Updated output pointer if an implementation is available
NULL if no implementation is available.
-
riscv_status riscv_nn_mat_mul_core_1x_s8(int32_t row_elements, const int8_t *row_base, const int8_t *col_base, int32_t *const sum_col, int32_t *const output)
General Matrix-multiplication without requantization for one row & one column.
Pseudo-code *output = 0 sum_col = 0 for (i = 0; i < row_elements; i++) *output += row_base[i] * col_base[i] sum_col += col_base[i]
- Parameters
row_elements – [in] number of row elements
row_base – [in] pointer to row operand
col_base – [in] pointer to col operand
sum_col – [out] pointer to store sum of column elements
output – [out] pointer to store result of multiply-accumulate
- Returns
The function returns the multiply-accumulated result of the row by column.
-
int8_t *riscv_nn_mat_mul_core_4x_s8(const int32_t row_elements, const int32_t offset, const int8_t *row_base, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)
Matrix-multiplication with requantization & activation function for four rows and one column.
Compliant to TFLM int8 specification. MVE implementation only
- Parameters
row_elements – [in] number of row elements
offset – [in] offset between rows. Can be the same as row_elements. For e.g, in a 1x1 conv scenario with stride as 1.
row_base – [in] pointer to row operand
col_base – [in] pointer to col operand
out_ch – [in] Number of output channels
conv_params – [in] Pointer to convolution parameters like offsets and activation values
quant_params – [in] Pointer to per-channel quantization parameters
bias – [in] Pointer to per-channel bias
output – [out] Pointer to output where int8 results are stored.
- Returns
The function returns the updated output pointer or NULL if implementation is not available.
-
riscv_status riscv_nn_mat_mult_nt_t_s8(const q7_t *lhs, const q7_t *rhs, const q31_t *bias, q7_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max)
General Matrix-multiplication function with per-channel requantization. This function assumes:
LHS input matrix NOT transposed (nt)
RHS input matrix transposed (t)
Note
This operation also performs the broadcast bias addition before the requantization
- Parameters
lhs – [in] Pointer to the LHS input matrix
rhs – [in] Pointer to the RHS input matrix
bias – [in] Pointer to the bias vector. The length of this vector is equal to the number of output columns (or RHS input rows)
dst – [out] Pointer to the output matrix with “m” rows and “n” columns
dst_multipliers – [in] Pointer to the multipliers vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
dst_shifts – [in] Pointer to the shifts vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
lhs_rows – [in] Number of LHS input rows
rhs_rows – [in] Number of RHS input rows
rhs_cols – [in] Number of LHS/RHS input columns
lhs_offset – [in] Offset to be applied to the LHS input value
dst_offset – [in] Offset to be applied the output result
activation_min – [in] Minimum value to clamp down the output. Range : int8
activation_max – [in] Maximum value to clamp up the output. Range : int8
- Returns
The function returns
RISCV_MATH_SUCCESS
-
void riscv_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t blockSize)
Q7 vector multiplication with variable output shifts.
q7 vector multiplication with variable output shifts
Scaling and Overflow Behavior:
The function uses saturating arithmetic. Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
- Parameters
*pSrcA – [in] pointer to the first input vector
*pSrcB – [in] pointer to the second input vector
*pDst – [out] pointer to the output vector
out_shift – [in] amount of right-shift for output
blockSize – [in] number of samples in each vector
-
void riscv_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize)
Q7 vector multiplication with variable output shifts.
q7 vector multiplication with variable output shifts
Scaling and Overflow Behavior:
The function uses saturating arithmetic. Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
- Parameters
*pSrcA – [in] pointer to the first input vector
*pSrcB – [in] pointer to the second input vector
*pDst – [out] pointer to the output vector
out_shift – [in] amount of right-shift for output
blockSize – [in] number of samples in each vector
-
riscv_status riscv_nn_vec_mat_mult_t_s16(const q15_t *lhs, const q7_t *rhs, const q63_t *bias, q15_t *dst, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max)
s16 Vector by Matrix (transposed) multiplication
- Parameters
lhs – [in] Input left-hand side vector
rhs – [in] Input right-hand side matrix (transposed)
bias – [in] Input bias
dst – [out] Output vector
dst_multiplier – [in] Output multiplier
dst_shift – [in] Output shift
rhs_cols – [in] Number of columns in the right-hand side input matrix
rhs_rows – [in] Number of rows in the right-hand side input matrix
activation_min – [in] Minimum value to clamp the output to. Range: int16
activation_max – [in] Maximum value to clamp the output to. Range: int16
- Returns
The function returns
RISCV_MATH_SUCCESS
-
riscv_status riscv_nn_vec_mat_mult_t_s8(const q7_t *lhs, const q7_t *rhs, const q31_t *bias, q7_t *dst, const int32_t lhs_offset, const int32_t rhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max, const int32_t address_offset)
s8 Vector by Matrix (transposed) multiplication
- Parameters
lhs – [in] Input left-hand side vector
rhs – [in] Input right-hand side matrix (transposed)
bias – [in] Input bias
dst – [out] Output vector
lhs_offset – [in] Offset to be added to the input values of the left-hand side vector. Range: -127 to 128
rhs_offset – [in] Not used
dst_offset – [in] Offset to be added to the output values. Range: -127 to 128
dst_multiplier – [in] Output multiplier
dst_shift – [in] Output shift
rhs_cols – [in] Number of columns in the right-hand side input matrix
rhs_rows – [in] Number of rows in the right-hand side input matrix
activation_min – [in] Minimum value to clamp the output to. Range: int8
activation_max – [in] Maximum value to clamp the output to. Range: int8
address_offset – [in] Memory position offset for dst. First output is stored at ‘dst’, the second at ‘dst + address_offset’ and so on. Default value is typically 1.
- Returns
The function returns
RISCV_MATH_SUCCESS
-
riscv_status riscv_nn_vec_mat_mult_t_svdf_s8(const q7_t *lhs, const q7_t *rhs, q15_t *dst, const int32_t lhs_offset, const int32_t rhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max)
s8 Vector by Matrix (transposed) multiplication with s16 output
- Parameters
lhs – [in] Input left-hand side vector
rhs – [in] Input right-hand side matrix (transposed)
dst – [out] Output vector
lhs_offset – [in] Offset to be added to the input values of the left-hand side vector. Range: -127 to 128
rhs_offset – [in] Not used
scatter_offset – [in] Address offset for dst. First output is stored at ‘dst’, the second at ‘dst + scatter_offset’ and so on.
dst_multiplier – [in] Output multiplier
dst_shift – [in] Output shift
rhs_cols – [in] Number of columns in the right-hand side input matrix
rhs_rows – [in] Number of rows in the right-hand side input matrix
activation_min – [in] Minimum value to clamp the output to. Range: int16
activation_max – [in] Maximum value to clamp the output to. Range: int16
- Returns
The function returns
RISCV_MATH_SUCCESS
-
void riscv_nn_accumulate_q7_to_q15(q15_t *pDst, const q7_t *pSrc, uint32_t length)