Convolution
-
riscv_nmsis_nn_status riscv_nn_depthwise_conv_nt_t_padded_s8(const int8_t *lhs, const int8_t *rhs, const int32_t input_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out)
-
int16_t *riscv_nn_depthwise_conv_nt_t_s16(const int16_t *lhs, const int8_t *rhs, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int64_t *const output_bias, int16_t *out)
-
riscv_nmsis_nn_status riscv_nn_depthwise_conv_nt_t_s8(const int8_t *lhs, const int8_t *rhs, const int32_t input_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out)
-
riscv_nmsis_nn_status riscv_nn_mat_mul_core_1x_s8(int32_t row_elements, const int32_t skipped_row_elements, const int8_t *row_base_ref, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)
-
int8_t *riscv_nn_mat_mul_core_4x_s8(const int32_t row_elements, const int32_t offset, const int8_t *row_base, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)
-
int16_t *riscv_nn_mat_mult_kernel_s16(const int8_t *input_a, const int16_t *input_b, const int32_t output_ch, const int32_t *out_shift, const int32_t *out_mult, const int16_t activation_min, const int16_t activation_max, const int32_t num_col_a, const int64_t *const output_bias, int16_t *out_0)
-
riscv_nmsis_nn_status riscv_nn_mat_mult_nt_t_s8(const int8_t *lhs, const int8_t *rhs, const int32_t *bias, int8_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, const int32_t lhs_cols_offset)
- group supportConvolution
Support functions for Convolution and DW Convolution.
Functions
-
riscv_nmsis_nn_status riscv_nn_depthwise_conv_nt_t_padded_s8(const int8_t *lhs, const int8_t *rhs, const int32_t input_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out)
Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs.
Note
If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following.
Output shift
Output multiplier
Output bias
rhs
- Parameters
lhs – [in] Input left-hand side matrix
rhs – [in] Input right-hand side matrix (transposed)
lhs_offset – [in] LHS matrix offset(input offset). Range: -127 to 128
active_ch – [in] Subset of total_ch processed
total_ch – [in] Number of channels in LHS/RHS
out_shift – [in] Per channel output shift. Length of vector is equal to number of channels
out_mult – [in] Per channel output multiplier. Length of vector is equal to number of channels
out_offset – [in] Offset to be added to the output values. Range: -127 to 128
activation_min – [in] Minimum value to clamp the output to. Range: int8
activation_max – [in] Maximum value to clamp the output to. Range: int8
row_x_col – [in] (row_dimension * col_dimension) of LHS/RHS matrix
output_bias – [in] Per channel output bias. Length of vector is equal to number of channels
out – [in] Output pointer
- Returns
The function returns one of the two
Updated output pointer if an implementation is available
NULL if no implementation is available.
-
int16_t *riscv_nn_depthwise_conv_nt_t_s16(const int16_t *lhs, const int8_t *rhs, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int64_t *const output_bias, int16_t *out)
Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. Dimensions are the same for lhs and rhs.
Note
If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following.
Output shift
Output multiplier
Output bias
rhs
- Parameters
lhs – [in] Input left-hand side matrix
rhs – [in] Input right-hand side matrix (transposed)
num_ch – [in] Number of channels in LHS/RHS
out_shift – [in] Per channel output shift. Length of vector is equal to number of channels.
out_mult – [in] Per channel output multiplier. Length of vector is equal to number of channels.
activation_min – [in] Minimum value to clamp the output to. Range: int8
activation_max – [in] Maximum value to clamp the output to. Range: int8
row_x_col – [in] (row_dimension * col_dimension) of LHS/RHS matrix
output_bias – [in] Per channel output bias. Length of vector is equal to number of channels.
out – [in] Output pointer
- Returns
The function returns one of the two
Updated output pointer if an implementation is available
NULL if no implementation is available.
-
riscv_nmsis_nn_status riscv_nn_depthwise_conv_nt_t_s8(const int8_t *lhs, const int8_t *rhs, const int32_t input_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out)
Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. Dimensions are the same for lhs and rhs.
Note
If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following.
Output shift
Output multiplier
Output bias
rhs
- Parameters
lhs – [in] Input left-hand side matrix
rhs – [in] Input right-hand side matrix (transposed)
lhs_offset – [in] LHS matrix offset(input offset). Range: -127 to 128
active_ch – [in] Subset of total_ch processed
total_ch – [in] Number of channels in LHS/RHS
out_shift – [in] Per channel output shift. Length of vector is equal to number of channels.
out_mult – [in] Per channel output multiplier. Length of vector is equal to number of channels.
out_offset – [in] Offset to be added to the output values. Range: -127 to 128
activation_min – [in] Minimum value to clamp the output to. Range: int8
activation_max – [in] Maximum value to clamp the output to. Range: int8
row_x_col – [in] (row_dimension * col_dimension) of LHS/RHS matrix
output_bias – [in] Per channel output bias. Length of vector is equal to number of channels.
out – [in] Output pointer
- Returns
The function returns one of the two
Updated output pointer if an implementation is available
NULL if no implementation is available.
-
riscv_nmsis_nn_status riscv_nn_mat_mul_core_1x_s8(int32_t row_elements, const int32_t skipped_row_elements, const int8_t *row_base_ref, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)
General Vector by Matrix multiplication with requantization and storage of result.
Pseudo-code *output = 0 sum_col = 0 for (j = 0; j < out_ch; j++) for (i = 0; i < row_elements; i++) *output += row_base_ref[i] * col_base_ref[i] sum_col += col_base_ref[i] scale sum_col using quant_params and bias store result in ‘output’
- Parameters
row_elements – [in] number of row elements
skipped_row_elements – [in] number of row elements skipped due to padding. row_elements + skipped_row_elements = (kernel_x * kernel_y) * input_ch
row_base_ref – [in] pointer to row operand
col_base_ref – [in] pointer to col operand
out_ch – [out] Number of output channels
conv_params – [in] Pointer to convolution parameters like offsets and activation values
quant_params – [in] Pointer to per-channel quantization parameters
bias – [in] Pointer to optional per-channel bias
output – [out] Pointer to output where int8 results are stored.
- Returns
The function performs matrix(row_base_ref) multiplication with vector(col_base_ref) and scaled result is stored in memory.
-
int8_t *riscv_nn_mat_mul_core_4x_s8(const int32_t row_elements, const int32_t offset, const int8_t *row_base, const int8_t *col_base_ref, const int32_t out_ch, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)
Matrix-multiplication with requantization & activation function for four rows and one column.
Compliant to TFLM int8 specification. MVE implementation only
- Parameters
row_elements – [in] number of row elements
offset – [in] offset between rows. Can be the same as row_elements. For e.g, in a 1x1 conv scenario with stride as 1.
row_base – [in] pointer to row operand
col_base – [in] pointer to col operand
out_ch – [in] Number of output channels
conv_params – [in] Pointer to convolution parameters like offsets and activation values
quant_params – [in] Pointer to per-channel quantization parameters
bias – [in] Pointer to per-channel bias
output – [out] Pointer to output where int8 results are stored.
- Returns
The function returns the updated output pointer or NULL if implementation is not available.
-
int16_t *riscv_nn_mat_mult_kernel_s16(const int8_t *input_a, const int16_t *input_b, const int32_t output_ch, const int32_t *out_shift, const int32_t *out_mult, const int16_t activation_min, const int16_t activation_max, const int32_t num_col_a, const int64_t *const output_bias, int16_t *out_0)
Matrix-multiplication function for convolution with per-channel requantization for 16 bits convolution.
This function does the matrix multiplication of weight matrix for all output channels with 2 columns from im2col and produces two elements/output_channel. The outputs are clamped in the range provided by activation min and max. Supported framework: TensorFlow Lite micro.
- Parameters
input_a – [in] pointer to operand A
input_b – [in] pointer to operand B, always consists of 2 vectors.
output_ch – [in] number of rows of A
out_shift – [in] pointer to per output channel requantization shift parameter.
out_mult – [in] pointer to per output channel requantization multiplier parameter.
activation_min – [in] minimum value to clamp the output to. Range : int16
activation_max – [in] maximum value to clamp the output to. Range : int16
num_col_a – [in] number of columns of A
output_bias – [in] per output channel bias. Range : int64
out_0 – [inout] pointer to output
- Returns
The function returns one of the two
The incremented output pointer for a successful operation or
NULL if implementation is not available.
-
riscv_nmsis_nn_status riscv_nn_mat_mult_nt_t_s8(const int8_t *lhs, const int8_t *rhs, const int32_t *bias, int8_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max, const int32_t lhs_cols_offset)
General Matrix-multiplication function with per-channel requantization. This function assumes:
LHS input matrix NOT transposed (nt)
RHS input matrix transposed (t)
Note
This operation also performs the broadcast bias addition before the requantization
- Parameters
lhs – [in] Pointer to the LHS input matrix
rhs – [in] Pointer to the RHS input matrix
bias – [in] Pointer to the bias vector. The length of this vector is equal to the number of output columns (or RHS input rows)
dst – [out] Pointer to the output matrix with “m” rows and “n” columns
dst_multipliers – [in] Pointer to the multipliers vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
dst_shifts – [in] Pointer to the shifts vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
lhs_rows – [in] Number of LHS input rows
rhs_rows – [in] Number of RHS input rows
rhs_cols – [in] Number of LHS/RHS input columns
lhs_offset – [in] Offset to be applied to the LHS input value
dst_offset – [in] Offset to be applied the output result
activation_min – [in] Minimum value to clamp down the output. Range : int8
activation_max – [in] Maximum value to clamp up the output. Range : int8
lhs_cols_offset – [in] Column offset between subsequent lhs_rows
- Returns
The function returns
RISCV_NMSIS_NN_SUCCESS
-
riscv_nmsis_nn_status riscv_nn_depthwise_conv_nt_t_padded_s8(const int8_t *lhs, const int8_t *rhs, const int32_t input_offset, const int32_t active_ch, const int32_t total_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, int8_t *out)