Basic Math Functions for Neural Network Computation

void riscv_nn_accumulate_q7_to_q15(q15_t *pDst, const q7_t *pSrc, uint32_t length)
void riscv_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size)
q7_t *riscv_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs, const q7_t *rhs, const int32_t input_offset, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, q7_t *out)
q7_t *riscv_nn_depthwise_conv_nt_t_s8(const q7_t *lhs, const q7_t *rhs, const int32_t input_offset, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, q7_t *out)
riscv_status riscv_nn_mat_mul_core_1x_s8(int32_t row_elements, const int8_t *row_base, const int8_t *col_base, int32_t *const sum_col, int32_t *const output)
int8_t *riscv_nn_mat_mul_core_4x_s8(const int32_t row_elements, const int32_t offset, const int8_t *row_base, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)
riscv_status riscv_nn_mat_mult_nt_t_s8(const q7_t *lhs, const q7_t *rhs, const q31_t *bias, q7_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max)
void riscv_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t blockSize)
void riscv_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize)
riscv_status riscv_nn_vec_mat_mult_t_s16(const q15_t *lhs, const q7_t *rhs, const q63_t *bias, q15_t *dst, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max)
riscv_status riscv_nn_vec_mat_mult_t_s8(const q7_t *lhs, const q7_t *rhs, const q31_t *bias, q7_t *dst, const int32_t lhs_offset, const int32_t rhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max, const int32_t address_offset)
riscv_status riscv_nn_vec_mat_mult_t_svdf_s8(const q7_t *lhs, const q7_t *rhs, q15_t *dst, const int32_t lhs_offset, const int32_t rhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max)
group NNBasicMath

Basic Math Functions for Neural Network Computation.

Functions

void riscv_nn_accumulate_q7_to_q15(q15_t *pDst, const q7_t *pSrc, uint32_t length)

Converts the elements from a q7 vector and accumulate to a q15 vector.

The equation used for the conversion process is:

Description:

Parameters
  • *src[in] points to the q7 input vector

  • *dst[out] points to the q15 output vector

  • block_size[in] length of the input vector

void riscv_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size)

Non-saturating addition of elements of a q7 vector.

2^24 samples can be added without saturating the result.

Description:

The equation used for the conversion process is:

Parameters
  • *input[in] Pointer to the q7 input vector

  • *output[out] Pointer to the q31 output variable.

  • block_size[in] length of the input vector

q7_t *riscv_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs, const q7_t *rhs, const int32_t input_offset, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, q7_t *out)

Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs.

Note

If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following.

  • Output shift

  • Output multiplier

  • Output bias

  • rhs

Parameters
  • lhs[in] Input left-hand side matrix

  • rhs[in] Input right-hand side matrix (transposed)

  • lhs_offset[in] LHS matrix offset(input offset). Range: -127 to 128

  • num_ch[in] Number of channels in LHS/RHS

  • out_shift[in] Per channel output shift. Length of vector is equal to number of channels

  • out_mult[in] Per channel output multiplier. Length of vector is equal to number of channels

  • out_offset[in] Offset to be added to the output values. Range: -127 to 128

  • activation_min[in] Minimum value to clamp the output to. Range: int8

  • activation_max[in] Maximum value to clamp the output to. Range: int8

  • row_x_col[in] (row_dimension * col_dimension) of LHS/RHS matrix

  • output_bias[in] Per channel output bias. Length of vector is equal to number of channels

  • out[in] Output pointer

Returns

The function returns one of the two

  • Updated output pointer if an implementation is available

  • NULL if no implementation is available.

q7_t *riscv_nn_depthwise_conv_nt_t_s8(const q7_t *lhs, const q7_t *rhs, const int32_t input_offset, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, q7_t *out)

Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. Dimensions are the same for lhs and rhs.

Note

If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following.

  • Output shift

  • Output multiplier

  • Output bias

  • rhs

Parameters
  • lhs[in] Input left-hand side matrix

  • rhs[in] Input right-hand side matrix (transposed)

  • lhs_offset[in] LHS matrix offset(input offset). Range: -127 to 128

  • num_ch[in] Number of channels in LHS/RHS

  • out_shift[in] Per channel output shift. Length of vector is equal to number of channels.

  • out_mult[in] Per channel output multiplier. Length of vector is equal to number of channels.

  • out_offset[in] Offset to be added to the output values. Range: -127 to 128

  • activation_min[in] Minimum value to clamp the output to. Range: int8

  • activation_max[in] Maximum value to clamp the output to. Range: int8

  • row_x_col[in] (row_dimension * col_dimension) of LHS/RHS matrix

  • output_bias[in] Per channel output bias. Length of vector is equal to number of channels.

  • out[in] Output pointer

Returns

The function returns one of the two

  • Updated output pointer if an implementation is available

  • NULL if no implementation is available.

riscv_status riscv_nn_mat_mul_core_1x_s8(int32_t row_elements, const int8_t *row_base, const int8_t *col_base, int32_t *const sum_col, int32_t *const output)

General Matrix-multiplication without requantization for one row & one column.

Pseudo-code *output = 0 sum_col = 0 for (i = 0; i < row_elements; i++) *output += row_base[i] * col_base[i] sum_col += col_base[i]

Parameters
  • row_elements[in] number of row elements

  • row_base[in] pointer to row operand

  • col_base[in] pointer to col operand

  • sum_col[out] pointer to store sum of column elements

  • output[out] pointer to store result of multiply-accumulate

Returns

The function returns the multiply-accumulated result of the row by column.

int8_t *riscv_nn_mat_mul_core_4x_s8(const int32_t row_elements, const int32_t offset, const int8_t *row_base, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)

Matrix-multiplication with requantization & activation function for four rows and one column.

Compliant to TFLM int8 specification. MVE implementation only

Parameters
  • row_elements[in] number of row elements

  • offset[in] offset between rows. Can be the same as row_elements. For e.g, in a 1x1 conv scenario with stride as 1.

  • row_base[in] pointer to row operand

  • col_base[in] pointer to col operand

  • out_ch[in] Number of output channels

  • conv_params[in] Pointer to convolution parameters like offsets and activation values

  • quant_params[in] Pointer to per-channel quantization parameters

  • bias[in] Pointer to per-channel bias

  • output[out] Pointer to output where int8 results are stored.

Returns

The function returns the updated output pointer or NULL if implementation is not available.

riscv_status riscv_nn_mat_mult_nt_t_s8(const q7_t *lhs, const q7_t *rhs, const q31_t *bias, q7_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max)

General Matrix-multiplication function with per-channel requantization. This function assumes:

  • LHS input matrix NOT transposed (nt)

  • RHS input matrix transposed (t)

Note

This operation also performs the broadcast bias addition before the requantization

Parameters
  • lhs[in] Pointer to the LHS input matrix

  • rhs[in] Pointer to the RHS input matrix

  • bias[in] Pointer to the bias vector. The length of this vector is equal to the number of output columns (or RHS input rows)

  • dst[out] Pointer to the output matrix with “m” rows and “n” columns

  • dst_multipliers[in] Pointer to the multipliers vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)

  • dst_shifts[in] Pointer to the shifts vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)

  • lhs_rows[in] Number of LHS input rows

  • rhs_rows[in] Number of RHS input rows

  • rhs_cols[in] Number of LHS/RHS input columns

  • lhs_offset[in] Offset to be applied to the LHS input value

  • dst_offset[in] Offset to be applied the output result

  • activation_min[in] Minimum value to clamp down the output. Range : int8

  • activation_max[in] Maximum value to clamp up the output. Range : int8

Returns

The function returns RISCV_MATH_SUCCESS

void riscv_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t blockSize)

Q7 vector multiplication with variable output shifts.

q7 vector multiplication with variable output shifts

Scaling and Overflow Behavior:

The function uses saturating arithmetic. Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.

Parameters
  • *pSrcA[in] pointer to the first input vector

  • *pSrcB[in] pointer to the second input vector

  • *pDst[out] pointer to the output vector

  • out_shift[in] amount of right-shift for output

  • blockSize[in] number of samples in each vector

void riscv_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize)

Q7 vector multiplication with variable output shifts.

q7 vector multiplication with variable output shifts

Scaling and Overflow Behavior:

The function uses saturating arithmetic. Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.

Parameters
  • *pSrcA[in] pointer to the first input vector

  • *pSrcB[in] pointer to the second input vector

  • *pDst[out] pointer to the output vector

  • out_shift[in] amount of right-shift for output

  • blockSize[in] number of samples in each vector

riscv_status riscv_nn_vec_mat_mult_t_s16(const q15_t *lhs, const q7_t *rhs, const q63_t *bias, q15_t *dst, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max)

s16 Vector by Matrix (transposed) multiplication

Parameters
  • lhs[in] Input left-hand side vector

  • rhs[in] Input right-hand side matrix (transposed)

  • bias[in] Input bias

  • dst[out] Output vector

  • dst_multiplier[in] Output multiplier

  • dst_shift[in] Output shift

  • rhs_cols[in] Number of columns in the right-hand side input matrix

  • rhs_rows[in] Number of rows in the right-hand side input matrix

  • activation_min[in] Minimum value to clamp the output to. Range: int16

  • activation_max[in] Maximum value to clamp the output to. Range: int16

Returns

The function returns RISCV_MATH_SUCCESS

riscv_status riscv_nn_vec_mat_mult_t_s8(const q7_t *lhs, const q7_t *rhs, const q31_t *bias, q7_t *dst, const int32_t lhs_offset, const int32_t rhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max, const int32_t address_offset)

s8 Vector by Matrix (transposed) multiplication

Parameters
  • lhs[in] Input left-hand side vector

  • rhs[in] Input right-hand side matrix (transposed)

  • bias[in] Input bias

  • dst[out] Output vector

  • lhs_offset[in] Offset to be added to the input values of the left-hand side vector. Range: -127 to 128

  • rhs_offset[in] Not used

  • dst_offset[in] Offset to be added to the output values. Range: -127 to 128

  • dst_multiplier[in] Output multiplier

  • dst_shift[in] Output shift

  • rhs_cols[in] Number of columns in the right-hand side input matrix

  • rhs_rows[in] Number of rows in the right-hand side input matrix

  • activation_min[in] Minimum value to clamp the output to. Range: int8

  • activation_max[in] Maximum value to clamp the output to. Range: int8

  • address_offset[in] Memory position offset for dst. First output is stored at ‘dst’, the second at ‘dst + address_offset’ and so on. Default value is typically 1.

Returns

The function returns RISCV_MATH_SUCCESS

riscv_status riscv_nn_vec_mat_mult_t_svdf_s8(const q7_t *lhs, const q7_t *rhs, q15_t *dst, const int32_t lhs_offset, const int32_t rhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max)

s8 Vector by Matrix (transposed) multiplication with s16 output

Parameters
  • lhs[in] Input left-hand side vector

  • rhs[in] Input right-hand side matrix (transposed)

  • dst[out] Output vector

  • lhs_offset[in] Offset to be added to the input values of the left-hand side vector. Range: -127 to 128

  • rhs_offset[in] Not used

  • scatter_offset[in] Address offset for dst. First output is stored at ‘dst’, the second at ‘dst + scatter_offset’ and so on.

  • dst_multiplier[in] Output multiplier

  • dst_shift[in] Output shift

  • rhs_cols[in] Number of columns in the right-hand side input matrix

  • rhs_rows[in] Number of rows in the right-hand side input matrix

  • activation_min[in] Minimum value to clamp the output to. Range: int16

  • activation_max[in] Maximum value to clamp the output to. Range: int16

Returns

The function returns RISCV_MATH_SUCCESS