Convolution Functions
- GetBufferSizeNNConv- riscv_convolve_s16_get_buffer_size()
- riscv_convolve_wrapper_s16_get_buffer_size()
- riscv_convolve_wrapper_s16_get_buffer_size_dsp()
- riscv_convolve_s4_get_buffer_size()
- riscv_convolve_1_x_n_s4_get_buffer_size()
- riscv_convolve_1x1_s4_fast_get_buffer_size()
- riscv_convolve_wrapper_s4_get_buffer_size()
- riscv_convolve_wrapper_s4_get_buffer_size_dsp()
- riscv_convolve_s8_get_buffer_size()
- riscv_convolve_1_x_n_s8_get_buffer_size()
- riscv_convolve_1x1_s8_fast_get_buffer_size()
- riscv_convolve_wrapper_s8_get_buffer_size()
- riscv_convolve_wrapper_s8_get_buffer_size_dsp()
- riscv_depthwise_conv_fast_s16_get_buffer_size()
- riscv_depthwise_conv_wrapper_s16_get_buffer_size()
- riscv_depthwise_conv_wrapper_s16_get_buffer_size_dsp()
- riscv_depthwise_conv_s4_opt_get_buffer_size()
- riscv_depthwise_conv_wrapper_s4_get_buffer_size()
- riscv_depthwise_conv_wrapper_s4_get_buffer_size_dsp()
- riscv_depthwise_conv_s8_opt_get_buffer_size_dsp()
- riscv_depthwise_conv_s8_opt_get_buffer_size()
- riscv_depthwise_conv_wrapper_s8_get_buffer_size()
- riscv_depthwise_conv_wrapper_s8_get_buffer_size_dsp()
- riscv_transpose_conv_s8_get_buffer_size()
- riscv_transpose_conv_s8_get_reverse_conv_buffer_size()
 
- 
riscv_nmsis_nn_status riscv_convolve_1_x_n_s4(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 
riscv_nmsis_nn_status riscv_convolve_1_x_n_s8(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 
riscv_nmsis_nn_status riscv_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
- 
riscv_nmsis_nn_status riscv_convolve_1x1_s4(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 
riscv_nmsis_nn_status riscv_convolve_1x1_s4_fast(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 
riscv_nmsis_nn_status riscv_convolve_1x1_s8(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 
riscv_nmsis_nn_status riscv_convolve_1x1_s8_fast(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 
riscv_nmsis_nn_status riscv_convolve_even_s4(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *packed_filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 
riscv_nmsis_nn_status riscv_convolve_HWC_q15_basic(const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
- 
riscv_nmsis_nn_status riscv_convolve_HWC_q15_fast(const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
- 
riscv_nmsis_nn_status riscv_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
- 
riscv_nmsis_nn_status riscv_convolve_HWC_q7_basic(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
- 
riscv_nmsis_nn_status riscv_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
- 
riscv_nmsis_nn_status riscv_convolve_HWC_q7_fast(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
- 
riscv_nmsis_nn_status riscv_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
- 
riscv_nmsis_nn_status riscv_convolve_HWC_q7_RGB(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
- 
riscv_nmsis_nn_status riscv_convolve_s16(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const nmsis_nn_bias_data *bias_data, const nmsis_nn_dims *output_dims, int16_t *output_data)
- 
riscv_nmsis_nn_status riscv_convolve_s4(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *packed_filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 
riscv_nmsis_nn_status riscv_convolve_s8(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *upscale_dims, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 
riscv_nmsis_nn_status riscv_convolve_wrapper_s16(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const nmsis_nn_bias_data *bias_data, const nmsis_nn_dims *output_dims, int16_t *output_data)
- 
riscv_nmsis_nn_status riscv_convolve_wrapper_s4(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 
riscv_nmsis_nn_status riscv_convolve_wrapper_s8(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 
riscv_nmsis_nn_status riscv_depthwise_conv_3x3_s8(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- 
riscv_nmsis_nn_status riscv_depthwise_conv_fast_s16(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int64_t *bias, const nmsis_nn_dims *output_dims, int16_t *output)
- static void __attribute__ ((unused))
- 
static void depthwise_conv_s16_generic_s16(const int16_t *input, const uint16_t input_batches, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const int8_t *kernel, const uint16_t ch_mult, const uint16_t kernel_x, const uint16_t kernel_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int64_t *bias, int16_t *output, const int32_t *output_shift, const int32_t *output_mult, const uint16_t output_x, const uint16_t output_y, const int32_t output_activation_min, const int32_t output_activation_max, const uint16_t dilation_x, const uint16_t dilation_y)
- 
riscv_nmsis_nn_status riscv_depthwise_conv_s16(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int64_t *bias, const nmsis_nn_dims *output_dims, int16_t *output)
- 
static void depthwise_conv_s4_generic(const int8_t *input, const int32_t input_batches, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const int8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *output, const int32_t *output_shift, const int32_t *output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max, const int32_t dilation_x, const int32_t dilation_y)
- 
riscv_nmsis_nn_status riscv_depthwise_conv_s4(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- 
riscv_nmsis_nn_status riscv_depthwise_conv_s4_opt(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- 
static void depthwise_conv_s8_mult_4(const int8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const int8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *output, const int32_t *output_shift, const int32_t *output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max)
- 
static void depthwise_conv_s8_generic(const int8_t *input, const uint16_t input_batches, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const int8_t *kernel, const uint16_t output_ch, const uint16_t ch_mult, const uint16_t kernel_x, const uint16_t kernel_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, int8_t *output, const int32_t *output_shift, const int32_t *output_mult, const uint16_t output_x, const uint16_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max, const uint16_t dilation_x, const uint16_t dilation_y)
- 
riscv_nmsis_nn_status riscv_depthwise_conv_s8(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- 
riscv_nmsis_nn_status riscv_depthwise_conv_s8_opt(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- 
riscv_nmsis_nn_status riscv_depthwise_conv_wrapper_s16(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input, const nmsis_nn_dims *filter_dims, const int8_t *filter, const nmsis_nn_dims *bias_dims, const int64_t *bias, const nmsis_nn_dims *output_dims, int16_t *output)
- 
riscv_nmsis_nn_status riscv_depthwise_conv_wrapper_s4(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *filter, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- 
riscv_nmsis_nn_status riscv_depthwise_conv_wrapper_s8(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *filter, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- 
riscv_nmsis_nn_status riscv_depthwise_separable_conv_HWC_q7(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
- 
riscv_nmsis_nn_status riscv_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
- 
riscv_nmsis_nn_status riscv_transpose_conv_s8(const nmsis_nn_context *ctx, const nmsis_nn_context *output_ctx, const nmsis_nn_transpose_conv_params *transpose_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 
riscv_nmsis_nn_status riscv_transpose_conv_wrapper_s8(const nmsis_nn_context *ctx, const nmsis_nn_context *reverse_conv_ctx, const nmsis_nn_transpose_conv_params *transpose_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- group Convolution Functions
- Collection of convolution, depthwise convolution functions and their variants. - The convolution is implemented in 2 steps: im2col and General Matrix Multiplication(GEMM) - im2col is a process of converting each patch of image data into a column. After im2col, the convolution is computed as matrix-matrix multiplication. - To reduce the memory footprint, the im2col is performed partially. Each iteration, only a few column (i.e., patches) are generated followed by GEMM. - Functions - 
riscv_nmsis_nn_status riscv_convolve_1_x_n_s4(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 1xn convolution for s4 weights - Supported framework : TensorFlow Lite Micro 
- The following constrains on the arguments apply - stride.w * input_dims->c is a multiple of 4 
- Explicit constraints(since it is for 1xN convolution) -## input_dims->h equals 1 -## output_dims->h equals 1 -## filter_dims->h equals 1 - Todo:
- Remove constraint on output_dims->w to make the function generic. 
 
 
 - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. riscv_convolve_1_x_n_s4_get_buffer_size will return the buffer_size if required The caller is expected to clear the buffer, if applicable, for security reasons. 
- conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension 
- filter_data – [in] Filter data pointer. Data type: int8 as packed int4 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Optional bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int8 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_ARG_ERRORif argument constraints fail. or,- RISCV_NMSIS_NN_SUCCESSon successful completion.
 
 - 
riscv_nmsis_nn_status riscv_convolve_1_x_n_s8(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- 1xn convolution - Supported framework : TensorFlow Lite Micro 
- The following constrains on the arguments apply - input_dims->n equals 1 
- ouput_dims->w is a multiple of 4 
- Explicit constraints(since it is for 1xN convolution) -## input_dims->h equals 1 -## output_dims->h equals 1 -## filter_dims->h equals 1 - Todo:
- Remove constraint on output_dims->w to make the function generic. 
 
 
 - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. riscv_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required The caller is expected to clear the buffer, if applicable, for security reasons. 
- conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal spatial filter dimension 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Optional bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int8 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_ARG_ERRORif argument constraints fail. or,- RISCV_NMSIS_NN_SUCCESSon successful completion.
 
 - 
riscv_nmsis_nn_status riscv_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
- Fast Q7 version of 1x1 convolution (non-sqaure shape) - This function is optimized for convolution with 1x1 kernel size (i.e., dim_kernel_x=1 and dim_kernel_y=1). It can be used for the second half of MobileNets [1] after depthwise separable convolution. - This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2 - [1] MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications https://arxiv.org/abs/1704.04861 - Parameters:
- Im_in – [in] pointer to input tensor 
- dim_im_in_x – [in] input tensor dimention x 
- dim_im_in_y – [in] input tensor dimention y 
- ch_im_in – [in] number of input tensor channels 
- wt – [in] pointer to kernel weights 
- ch_im_out – [in] number of filters, i.e., output tensor channels 
- dim_kernel_x – [in] filter kernel size x 
- dim_kernel_y – [in] filter kernel size y 
- padding_x – [in] padding size x 
- padding_y – [in] padding size y 
- stride_x – [in] convolution stride x 
- stride_y – [in] convolution stride y 
- bias – [in] pointer to bias 
- bias_shift – [in] amount of left-shift for bias 
- out_shift – [in] amount of right-shift for output 
- Im_out – [inout] pointer to output tensor 
- dim_im_out_x – [in] output tensor dimension x 
- dim_im_out_y – [in] output tensor dimension y 
- bufferA – [inout] pointer to buffer space for input 
- bufferB – [inout] pointer to buffer space for output 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_SIZE_MISMATCHor- RISCV_NMSIS_NN_SUCCESSbased on the outcome of size checking.
 
 - 
riscv_nmsis_nn_status riscv_convolve_1x1_s4(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- s4 version for 1x1 convolution with support for non-unity stride values - Supported framework : TensorFlow Lite Micro 
- The following constrains on the arguments apply - conv_params->padding.w = conv_params->padding.h = 0 
 
 - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. None is required by this function. 
- conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] 
- filter_data – [in] Filter data pointer. Data type: int8 packed with 2x int4 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Optional bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int8 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_ARG_ERRORif argument constraints fail. or,- RISCV_NMSIS_NN_SUCCESSon successful completion.
 
 - 
riscv_nmsis_nn_status riscv_convolve_1x1_s4_fast(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- Fast s4 version for 1x1 convolution (non-square shape) - Supported framework : TensorFlow Lite Micro 
- The following constrains on the arguments apply - conv_params->padding.w = conv_params->padding.h = 0 
- conv_params->stride.w = conv_params->stride.h = 1 
 
 - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. riscv_convolve_1x1_s4_fast_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons. 
- conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] 
- filter_data – [in] Filter data pointer. Data type: int8 packed with 2x int4 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Optional bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int8 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_ARG_ERRORif argument constraints fail. or,- RISCV_NMSIS_NN_SUCCESSon successful completion.
 
 - 
riscv_nmsis_nn_status riscv_convolve_1x1_s8(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- s8 version for 1x1 convolution with support for non-unity stride values - Supported framework : TensorFlow Lite Micro 
- The following constrains on the arguments apply - conv_params->padding.w = conv_params->padding.h = 0 
 
 - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. None is required by this function. 
- conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Optional bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int8 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_ARG_ERRORif argument constraints fail. or,- RISCV_NMSIS_NN_SUCCESSon successful completion.
 
 - 
riscv_nmsis_nn_status riscv_convolve_1x1_s8_fast(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- Fast s8 version for 1x1 convolution (non-square shape) - Supported framework : TensorFlow Lite Micro 
- The following constrains on the arguments apply - conv_params->padding.w = conv_params->padding.h = 0 
- conv_params->stride.w = conv_params->stride.h = 1 
 
 - Supported framework : TensorFlow Lite Micro 
- The following constrains on the arguments apply - input_dims->c is a multiple of 4 
- conv_params->padding.w = conv_params->padding.h = 0 
- conv_params->stride.w = conv_params->stride.h = 1 
 
 - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. riscv_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer, if applicable, for security reasons. 
- conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Optional bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int8 
- ctx – [inout] Function context that contains the additional buffer if required by the function. riscv_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required 
- conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Optional bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int8 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_ARG_ERRORif argument constraints fail. or,- RISCV_NMSIS_NN_SUCCESSon successful completion.
- Returns:
- The function returns either - RISCV_NMSIS_NN_SIZE_MISMATCHif argument constraints fail. or,- RISCV_NMSIS_NN_SUCCESSon successful completion.
 
 - 
riscv_nmsis_nn_status riscv_convolve_even_s4(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *packed_filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
 - 
riscv_nmsis_nn_status riscv_convolve_HWC_q15_basic(const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
- Basic Q15 convolution function. - Buffer size: - bufferA size: ch_im_in*dim_kernel*dim_kernel - bufferB size: 0 - This basic version is designed to work for any input tensor and weight dimension. - Parameters:
- Im_in – [in] pointer to input tensor 
- dim_im_in – [in] input tensor dimention 
- ch_im_in – [in] number of input tensor channels 
- wt – [in] pointer to kernel weights 
- ch_im_out – [in] number of filters, i.e., output tensor channels 
- dim_kernel – [in] filter kernel size 
- padding – [in] padding sizes 
- stride – [in] convolution stride 
- bias – [in] pointer to bias 
- bias_shift – [in] amount of left-shift for bias 
- out_shift – [in] amount of right-shift for output 
- Im_out – [inout] pointer to output tensor 
- dim_im_out – [in] output tensor dimension 
- bufferA – [inout] pointer to buffer space for input 
- bufferB – [inout] pointer to buffer space for output 
 
- Returns:
- The function returns - RISCV_NMSIS_NN_SUCCESS
 
 - 
riscv_nmsis_nn_status riscv_convolve_HWC_q15_fast(const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
- Fast Q15 convolution function. - Buffer size: - bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - bufferB size: 0 - Input dimension constraints: - ch_im_in is multiple of 2 - ch_im_out is multiple of 2 - dim_im_out is a multiple of 2 - Parameters:
- Im_in – [in] pointer to input tensor 
- dim_im_in – [in] input tensor dimention 
- ch_im_in – [in] number of input tensor channels 
- wt – [in] pointer to kernel weights 
- ch_im_out – [in] number of filters, i.e., output tensor channels 
- dim_kernel – [in] filter kernel size 
- padding – [in] padding sizes 
- stride – [in] convolution stride 
- bias – [in] pointer to bias 
- bias_shift – [in] amount of left-shift for bias 
- out_shift – [in] amount of right-shift for output 
- Im_out – [inout] pointer to output tensor 
- dim_im_out – [in] output tensor dimension 
- bufferA – [inout] pointer to buffer space for input 
- bufferB – [inout] pointer to buffer space for output 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_SIZE_MISMATCHor- RISCV_NMSIS_NN_SUCCESSbased on the outcome of size checking.
 
 - 
riscv_nmsis_nn_status riscv_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
- Fast Q15 convolution function (non-sqaure shape) - Buffer size: - bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - bufferB size: 0 - Input dimension constraints: - ch_im_in is multiple of 2 - ch_im_out is multiple of 2 - Parameters:
- Im_in – [in] pointer to input tensor 
- dim_im_in_x – [in] input tensor dimention x 
- dim_im_in_y – [in] input tensor dimention y 
- ch_im_in – [in] number of input tensor channels 
- wt – [in] pointer to kernel weights 
- ch_im_out – [in] number of filters, i.e., output tensor channels 
- dim_kernel_x – [in] filter kernel size x 
- dim_kernel_y – [in] filter kernel size y 
- padding_x – [in] padding size x 
- padding_y – [in] padding size y 
- stride_x – [in] convolution stride x 
- stride_y – [in] convolution stride y 
- bias – [in] pointer to bias 
- bias_shift – [in] amount of left-shift for bias 
- out_shift – [in] amount of right-shift for output 
- Im_out – [inout] pointer to output tensor 
- dim_im_out_x – [in] output tensor dimension x 
- dim_im_out_y – [in] output tensor dimension y 
- bufferA – [inout] pointer to buffer space for input 
- bufferB – [inout] pointer to buffer space for output 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_SIZE_MISMATCHor- RISCV_NMSIS_NN_SUCCESSbased on the outcome of size checking.
 
 - 
riscv_nmsis_nn_status riscv_convolve_HWC_q7_basic(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
- Basic Q7 convolution function. - Buffer size: - bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - bufferB size: 0 - This basic version is designed to work for any input tensor and weight dimension. - Parameters:
- Im_in – [in] pointer to input tensor 
- dim_im_in – [in] input tensor dimention 
- ch_im_in – [in] number of input tensor channels 
- wt – [in] pointer to kernel weights 
- ch_im_out – [in] number of filters, i.e., output tensor channels 
- dim_kernel – [in] filter kernel size 
- padding – [in] padding sizes 
- stride – [in] convolution stride 
- bias – [in] pointer to bias 
- bias_shift – [in] amount of left-shift for bias 
- out_shift – [in] amount of right-shift for output 
- Im_out – [inout] pointer to output tensor 
- dim_im_out – [in] output tensor dimension 
- bufferA – [inout] pointer to buffer space for input 
- bufferB – [inout] pointer to buffer space for output 
 
- Returns:
- The function returns - RISCV_NMSIS_NN_SUCCESS
 
 - 
riscv_nmsis_nn_status riscv_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
- Basic Q7 convolution function (non-sqaure shape) - Basic Q7 convolution function (non-square shape) - Parameters:
- Im_in – [in] pointer to input tensor 
- dim_im_in_x – [in] input tensor dimention x 
- dim_im_in_y – [in] input tensor dimention y 
- ch_im_in – [in] number of input tensor channels 
- wt – [in] pointer to kernel weights 
- ch_im_out – [in] number of filters, i.e., output tensor channels 
- dim_kernel_x – [in] filter kernel size x 
- dim_kernel_y – [in] filter kernel size y 
- padding_x – [in] padding size x 
- padding_y – [in] padding size y 
- stride_x – [in] convolution stride x 
- stride_y – [in] convolution stride y 
- bias – [in] pointer to bias 
- bias_shift – [in] amount of left-shift for bias 
- out_shift – [in] amount of right-shift for output 
- Im_out – [inout] pointer to output tensor 
- dim_im_out_x – [in] output tensor dimension x 
- dim_im_out_y – [in] output tensor dimension y 
- bufferA – [inout] pointer to buffer space for input 
- bufferB – [inout] pointer to buffer space for output 
 
- Returns:
- The function returns - RISCV_NMSIS_NN_SUCCESS
 
 - 
riscv_nmsis_nn_status riscv_convolve_HWC_q7_fast(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
- Fast Q7 convolution function. - Buffer size: - bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - bufferB size: 0 - Input dimension constraints: - ch_im_in is multiple of 4 ( because of the SIMD32 read and swap ) - ch_im_out is multiple of 2 ( bacause 2x2 mat_mult kernel ) - The im2col converts the Q7 tensor input into Q15 column, which is stored in bufferA. There is reordering happenning during this im2col process with riscv_q7_to_q15_reordered_no_shift. For every four elements, the second and third elements are swapped. - The computation kernel riscv_nn_mat_mult_kernel_q7_q15_reordered does the GEMM computation with the reordered columns. - To speed-up the determination of the padding condition, we split the computation into 3x3 parts, i.e., {top, mid, bottom} X {left, mid, right}. This reduces the total number of boundary condition checks and improves the data copying performance. - Parameters:
- Im_in – [in] pointer to input tensor 
- dim_im_in – [in] input tensor dimention 
- ch_im_in – [in] number of input tensor channels 
- wt – [in] pointer to kernel weights 
- ch_im_out – [in] number of filters, i.e., output tensor channels 
- dim_kernel – [in] filter kernel size 
- padding – [in] padding sizes 
- stride – [in] convolution stride 
- bias – [in] pointer to bias 
- bias_shift – [in] amount of left-shift for bias 
- out_shift – [in] amount of right-shift for output 
- Im_out – [inout] pointer to output tensor 
- dim_im_out – [in] output tensor dimension 
- bufferA – [inout] pointer to buffer space for input 
- bufferB – [inout] pointer to buffer space for output 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_SIZE_MISMATCHor- RISCV_NMSIS_NN_SUCCESSbased on the outcome of size checking.
 
 - 
riscv_nmsis_nn_status riscv_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
- Fast Q7 convolution function (non-sqaure shape) - This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2 - Parameters:
- Im_in – [in] pointer to input tensor 
- dim_im_in_x – [in] input tensor dimention x 
- dim_im_in_y – [in] input tensor dimention y 
- ch_im_in – [in] number of input tensor channels 
- wt – [in] pointer to kernel weights 
- ch_im_out – [in] number of filters, i.e., output tensor channels 
- dim_kernel_x – [in] filter kernel size x 
- dim_kernel_y – [in] filter kernel size y 
- padding_x – [in] padding size x 
- padding_y – [in] padding size y 
- stride_x – [in] convolution stride x 
- stride_y – [in] convolution stride y 
- bias – [in] pointer to bias 
- bias_shift – [in] amount of left-shift for bias 
- out_shift – [in] amount of right-shift for output 
- Im_out – [inout] pointer to output tensor 
- dim_im_out_x – [in] output tensor dimension x 
- dim_im_out_y – [in] output tensor dimension y 
- bufferA – [inout] pointer to buffer space for input 
- bufferB – [inout] pointer to buffer space for output 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_SIZE_MISMATCHor- RISCV_NMSIS_NN_SUCCESSbased on the outcome of size checking.
 
 - 
riscv_nmsis_nn_status riscv_convolve_HWC_q7_RGB(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
- Q7 convolution function for RGB image. - Q7 version of convolution for RGB image. - Buffer size: - bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - bufferB size: 0 - Input dimension constraints: - ch_im_in equals 3 - This kernel is written exclusively for convolution with ch_im_in equals 3. This applies on the first layer of CNNs which has input image with RGB format. - Parameters:
- Im_in – [in] pointer to input tensor 
- dim_im_in – [in] input tensor dimention 
- ch_im_in – [in] number of input tensor channels 
- wt – [in] pointer to kernel weights 
- ch_im_out – [in] number of filters, i.e., output tensor channels 
- dim_kernel – [in] filter kernel size 
- padding – [in] padding sizes 
- stride – [in] convolution stride 
- bias – [in] pointer to bias 
- bias_shift – [in] amount of left-shift for bias 
- out_shift – [in] amount of right-shift for output 
- Im_out – [inout] pointer to output tensor 
- dim_im_out – [in] output tensor dimension 
- bufferA – [inout] pointer to buffer space for input 
- bufferB – [inout] pointer to buffer space for output 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_SIZE_MISMATCHor- RISCV_NMSIS_NN_SUCCESSbased on the outcome of size checking.
 
 - 
riscv_nmsis_nn_status riscv_convolve_s16(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const nmsis_nn_bias_data *bias_data, const nmsis_nn_dims *output_dims, int16_t *output_data)
- Basic s16 convolution function. - Supported framework: TensorFlow Lite micro 
- Additional memory is required for optimization. Refer to argument ‘ctx’ for details. 
 - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. riscv_convolve_s16_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer, if applicable, for security reasons. 
- conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). conv_params->input_offset : Not used conv_params->output_offset : Not used 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int16 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Struct with optional bias data pointer. Bias data type can be int64 or int32 depending flag in struct. 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int16 
 
- Returns:
- The function returns - RISCV_NMSIS_NN_SUCCESSif successful or- RISCV_NMSIS_NN_ARG_ERRORif incorrect arguments or- RISCV_NMSIS_NN_NO_IMPL_ERROR
 
 - 
riscv_nmsis_nn_status riscv_convolve_s4(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *packed_filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- Basic s4 convolution function. - Supported framework: TensorFlow Lite micro 
- Additional memory is required for optimization. Refer to argument ‘ctx’ for details. 
 - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. riscv_convolve_s4_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons. 
- conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions 
- filter_data – [in] Packed Filter data pointer. Data type: int8 packed with 2x int4 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Optional bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int8 
 
- Returns:
- The function returns - RISCV_NMSIS_NN_SUCCESS
 
 - 
riscv_nmsis_nn_status riscv_convolve_s8(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *upscale_dims, const nmsis_nn_dims *output_dims, int8_t *output_data)
- Basic s8 convolution function. - Supported framework: TensorFlow Lite micro 
- Additional memory is required for optimization. Refer to argument ‘ctx’ for details. 
 - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. riscv_convolve_s8_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer, if applicable, for security reasons. 
- conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, HK, WK, CK] where HK, WK and CK are the spatial filter dimensions. CK != C_IN is used for grouped convolution, in which case the required conditions are C_IN = N * CK and C_OUT = N * M for N groups of size M. 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Optional bias data pointer. Data type: int32 
- upscale_dims – [in] Inserts zeroes to upscale the input in h/w dimensions if set to 2. This is used for tranposed convolution. 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int8 
 
- Returns:
- The function returns - RISCV_NMSIS_NN_SUCCESSif successful or- RISCV_NMSIS_NN_ARG_ERRORif incorrect arguments or- RISCV_NMSIS_NN_NO_IMPL_ERROR
 
 - 
riscv_nmsis_nn_status riscv_convolve_wrapper_s16(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const nmsis_nn_bias_data *bias_data, const nmsis_nn_dims *output_dims, int16_t *output_data)
- s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in nmsis-nn to perform the convolution. - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. riscv_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required The caller is expected to clear the buffer, if applicable, for security reasons. 
- conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). conv_params->input_offset : Not used conv_params->output_offset : Not used 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int16 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Struct with optional bias data pointer. Bias data type can be int64 or int32 depending flag in struct. 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int16 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_ARG_ERRORif argument constraints fail. or,- RISCV_NMSIS_NN_SUCCESSon successful completion.
 
 - 
riscv_nmsis_nn_status riscv_convolve_wrapper_s4(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- s4 convolution layer wrapper function with the main purpose to call the optimal kernel available in nmsis-nn to perform the convolution. - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. riscv_convolve_wrapper_s4_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons. 
- conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions 
- filter_data – [in] Filter data pointer. Data type: int8 packed with 2x int4 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int8 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_ARG_ERRORif argument constraints fail. or,- RISCV_NMSIS_NN_SUCCESSon successful completion.
 
 - 
riscv_nmsis_nn_status riscv_convolve_wrapper_s8(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in nmsis-nn to perform the convolution. - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. riscv_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer ,if applicable, for security reasons. 
- conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). Range of conv_params->input_offset : [-127, 128] Range of conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int8 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_ARG_ERRORif argument constraints fail. or,- RISCV_NMSIS_NN_SUCCESSon successful completion.
 
 - 
riscv_nmsis_nn_status riscv_depthwise_conv_3x3_s8(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on the input arguments(documented below). Refer riscv_depthwise_conv_s8() for function argument details. - Supported framework : TensorFlow Lite Micro 
- The following constrains on the arguments apply - Number of input channel equals number of output channels 
- Filter height and width equals 3 
- Padding along x is either 0 or 1. 
 
 - Returns:
- The function returns one of the following - RISCV_NMSIS_NN_ARG_ERROR- Unsupported dimension of tensors- Unsupported pad size along the x axis - RISCV_NMSIS_NN_SUCCESS- Successful operation
 
 
 - 
riscv_nmsis_nn_status riscv_depthwise_conv_fast_s16(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int64_t *bias, const nmsis_nn_dims *output_dims, int16_t *output)
- Optimized s16 depthwise convolution function with constraint that in_channel equals out_channel. Refer riscv_depthwise_conv_s16() for function argument details. - RISCV_NMSIS_NN_SUCCESS- Successful operation- Supported framework: TensorFlow Lite 
- The following constrains on the arguments apply - Number of input channel equals number of output channels or ch_mult equals 1 
 
- Reccomended when number of channels is 4 or greater. 
 - Returns:
- The function returns one of the following - RISCV_NMSIS_NN_ARG_ERROR- ctx-buff == NULL and riscv_depthwise_conv_fast_s16_get_buffer_size() > 0 or input channel != output channel or ch_mult != 1
 
 - static void __attribute__ ((unused))
 - 
static void depthwise_conv_s16_generic_s16(const int16_t *input, const uint16_t input_batches, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const int8_t *kernel, const uint16_t ch_mult, const uint16_t kernel_x, const uint16_t kernel_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int64_t *bias, int16_t *output, const int32_t *output_shift, const int32_t *output_mult, const uint16_t output_x, const uint16_t output_y, const int32_t output_activation_min, const int32_t output_activation_max, const uint16_t dilation_x, const uint16_t dilation_y)
 - 
riscv_nmsis_nn_status riscv_depthwise_conv_s16(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int64_t *bias, const nmsis_nn_dims *output_dims, int16_t *output)
- Basic s16 depthwise convolution function that doesn’t have any constraints on the input dimensions. - Supported framework: TensorFlow Lite 
 - Parameters:
- ctx – [inout] Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if an additional buffer is required. exists if additional memory is. The caller is expected to clear the buffer, if applicable, for security reasons. 
- dw_conv_params – [in] Depthwise convolution parameters (e.g. strides, dilations, pads,…) conv_params->input_offset : Not used conv_params->output_offset : Not used 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] Batch argument N is not used. 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [1, H, W, C_OUT] 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Bias data pointer. Data type: int64 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [inout] Output data pointer. Data type: int16 
 
- Returns:
- The function returns - RISCV_NMSIS_NN_SUCCESS
 
 - 
static void depthwise_conv_s4_generic(const int8_t *input, const int32_t input_batches, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const int8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *output, const int32_t *output_shift, const int32_t *output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max, const int32_t dilation_x, const int32_t dilation_y)
 - 
riscv_nmsis_nn_status riscv_depthwise_conv_s4(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- Basic s4 depthwise convolution function that doesn’t have any constraints on the input dimensions. - Supported framework: TensorFlow Lite 
 - Parameters:
- ctx – [inout] Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if an additional buffer is required exists if additional memory is. The caller is expected to clear the buffer ,if applicable, for security reasons. 
- dw_conv_params – [in] Depthwise convolution parameters (e.g. strides, dilations, pads,…) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->input_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] Batch argument N is not used. 
- input – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [1, H, W, C_OUT] 
- kernel – [in] Filter data pointer. Data type: int8_t packed 4-bit weights, e.g four sequential weights [0x1, 0x2, 0x3, 0x4] packed as [0x21, 0x43]. 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias – [in] Bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output – [inout] Output data pointer. Data type: int8 
 
- Returns:
- The function returns - RISCV_NMSIS_NN_SUCCESS
 
 - 
riscv_nmsis_nn_status riscv_depthwise_conv_s4_opt(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- Optimized s4 depthwise convolution function with constraint that in_channel equals out_channel. Refer riscv_depthwise_conv_s4() for function argument details. - Supported framework: TensorFlow Lite 
- The following constrains on the arguments apply - Number of input channel equals number of output channels or ch_mult equals 1 
 
- Reccomended when number of channels is 4 or greater. 
 - Returns:
- The function returns one of the following - RISCV_NMSIS_NN_ARG_ERROR- input channel != output channel or ch_mult != 1- RISCV_NMSIS_NN_SUCCESS- Successful operation
 
 - 
static void depthwise_conv_s8_mult_4(const int8_t *input, const int32_t input_x, const int32_t input_y, const int32_t input_ch, const int8_t *kernel, const int32_t output_ch, const int32_t ch_mult, const int32_t kernel_x, const int32_t kernel_y, const int32_t pad_x, const int32_t pad_y, const int32_t stride_x, const int32_t stride_y, const int32_t *bias, int8_t *output, const int32_t *output_shift, const int32_t *output_mult, const int32_t output_x, const int32_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max)
 - 
static void depthwise_conv_s8_generic(const int8_t *input, const uint16_t input_batches, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const int8_t *kernel, const uint16_t output_ch, const uint16_t ch_mult, const uint16_t kernel_x, const uint16_t kernel_y, const uint16_t pad_x, const uint16_t pad_y, const uint16_t stride_x, const uint16_t stride_y, const int32_t *bias, int8_t *output, const int32_t *output_shift, const int32_t *output_mult, const uint16_t output_x, const uint16_t output_y, const int32_t output_offset, const int32_t input_offset, const int32_t output_activation_min, const int32_t output_activation_max, const uint16_t dilation_x, const uint16_t dilation_y)
 - 
riscv_nmsis_nn_status riscv_depthwise_conv_s8(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- Basic s8 depthwise convolution function that doesn’t have any constraints on the input dimensions. - Supported framework: TensorFlow Lite 
 - Parameters:
- ctx – [inout] Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if an additional buffer is required exists if additional memory is. The caller is expected to clear the buffer, if applicable, for security reasons. 
- dw_conv_params – [in] Depthwise convolution parameters (e.g. strides, dilations, pads,…) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->input_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] Batch argument N is not used. 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [1, H, W, C_OUT] 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [inout] Output data pointer. Data type: int8 
 
- Returns:
- The function returns - RISCV_NMSIS_NN_SUCCESS
 
 - 
riscv_nmsis_nn_status riscv_depthwise_conv_s8_opt(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *kernel, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. Refer riscv_depthwise_conv_s8() for function argument details. - Supported framework: TensorFlow Lite 
- The following constrains on the arguments apply - Number of input channel equals number of output channels or ch_mult equals 1 
 
- Reccomended when number of channels is 4 or greater. 
 - Returns:
- The function returns one of the following - RISCV_NMSIS_NN_ARG_ERROR- input channel != output channel or ch_mult != 1- RISCV_NMSIS_NN_SUCCESS- Successful operation
 
 - 
riscv_nmsis_nn_status riscv_depthwise_conv_wrapper_s16(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int16_t *input, const nmsis_nn_dims *filter_dims, const int8_t *filter, const nmsis_nn_dims *bias_dims, const int64_t *bias, const nmsis_nn_dims *output_dims, int16_t *output)
- Wrapper function to pick the right optimized s16 depthwise convolution function. - Supported framework: TensorFlow Lite 
- Picks one of the the following functions - riscv_depthwise_conv_s16() 
- riscv_depthwise_conv_fast_s16() - RISC-V CPUs with DSP extension only 
 
 - Parameters:
- ctx – [inout] Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if required. The caller is expected to clear the buffer, if applicable, for security reasons. 
- dw_conv_params – [in] Depthwise convolution parameters (e.g. strides, dilations, pads,…) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : Not used Range of dw_conv_params->output_offset : Not used 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1. 
- input_data – [in] Input (activation) data pointer. Data type: int16 
- filter_dims – [in] Filter tensor dimensions. Format: [1, H, W, C_OUT] 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Bias data pointer. Data type: int64 
- output_dims – [in] Output tensor dimensions. Format: [1, H, W, C_OUT] 
- output_data – [inout] Output data pointer. Data type: int16 
 
- Returns:
- The function returns - RISCV_NMSIS_NN_SUCCESS- Successful completion.
 
 - 
riscv_nmsis_nn_status riscv_depthwise_conv_wrapper_s4(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *filter, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- Wrapper function to pick the right optimized s4 depthwise convolution function. - Supported framework: TensorFlow Lite 
 - Parameters:
- ctx – [inout] Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if required. The caller is expected to clear the buffer ,if applicable, for security reasons. 
- dw_conv_params – [in] Depthwise convolution parameters (e.g. strides, dilations, pads,…) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1. 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [1, H, W, C_OUT] 
- filter_data – [in] Filter data pointer. Data type: int8_t packed 4-bit weights, e.g four sequential weights [0x1, 0x2, 0x3, 0x4] packed as [0x21, 0x43]. 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [1, H, W, C_OUT] 
- output_data – [inout] Output data pointer. Data type: int8 
 
- Returns:
- The function returns - RISCV_NMSIS_NN_SUCCESS- Successful completion.
 
 - 
riscv_nmsis_nn_status riscv_depthwise_conv_wrapper_s8(const nmsis_nn_context *ctx, const nmsis_nn_dw_conv_params *dw_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input, const nmsis_nn_dims *filter_dims, const int8_t *filter, const nmsis_nn_dims *bias_dims, const int32_t *bias, const nmsis_nn_dims *output_dims, int8_t *output)
- Wrapper function to pick the right optimized s8 depthwise convolution function. - Supported framework: TensorFlow Lite 
- Picks one of the the following functions - riscv_depthwise_conv_s8() 
- riscv_depthwise_conv_3x3_s8() - RISC-V CPUs with DSP extension only 
- riscv_depthwise_conv_s8_opt() 
 
- Check details of riscv_depthwise_conv_s8_opt() for potential data that can be accessed outside of the boundary. 
 - Parameters:
- ctx – [inout] Function context (e.g. temporary buffer). Check the function definition file to see if an additional buffer is required. Optional function {API}_get_buffer_size() provides the buffer size if required. The caller is expected to clear the buffer, if applicable, for security reasons. 
- dw_conv_params – [in] Depthwise convolution parameters (e.g. strides, dilations, pads,…) dw_conv_params->dilation is not used. Range of dw_conv_params->input_offset : [-127, 128] Range of dw_conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each output channel 
- input_dims – [in] Input (activation) tensor dimensions. Format: [H, W, C_IN] Batch argument N is not used and assumed to be 1. 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [1, H, W, C_OUT] 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [1, H, W, C_OUT] 
- output_data – [inout] Output data pointer. Data type: int8 
 
- Returns:
- The function returns - RISCV_NMSIS_NN_SUCCESS- Successful completion.
 
 - 
riscv_nmsis_nn_status riscv_depthwise_separable_conv_HWC_q7(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
- Q7 depthwise separable convolution function. - Buffer size: - bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - bufferB size: 0 - Input dimension constraints: - ch_im_in equals ch_im_out - Implementation: There are 3 nested loop here: Inner loop: calculate each output value with MAC instruction over an accumulator Mid loop: loop over different output channel Outer loop: loop over different output (x, y) - Parameters:
- Im_in – [in] pointer to input tensor 
- dim_im_in – [in] input tensor dimension 
- ch_im_in – [in] number of input tensor channels 
- wt – [in] pointer to kernel weights 
- ch_im_out – [in] number of filters, i.e., output tensor channels 
- dim_kernel – [in] filter kernel size 
- padding – [in] padding sizes 
- stride – [in] convolution stride 
- bias – [in] pointer to bias 
- bias_shift – [in] amount of left-shift for bias 
- out_shift – [in] amount of right-shift for output 
- Im_out – [inout] pointer to output tensor 
- dim_im_out – [in] output tensor dimension 
- bufferA – [inout] pointer to buffer space for input 
- bufferB – [inout] pointer to buffer space for output 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_SIZE_MISMATCHor- RISCV_NMSIS_NN_SUCCESSbased on the outcome of size checking.
 
 - 
riscv_nmsis_nn_status riscv_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
- Q7 depthwise separable convolution function (non-square shape) - This function is the version with full list of optimization tricks, but with some constraints: ch_im_in is equal to ch_im_out - Parameters:
- Im_in – [in] pointer to input tensor 
- dim_im_in_x – [in] input tensor dimension x 
- dim_im_in_y – [in] input tensor dimension y 
- ch_im_in – [in] number of input tensor channels 
- wt – [in] pointer to kernel weights 
- ch_im_out – [in] number of filters, i.e., output tensor channels 
- dim_kernel_x – [in] filter kernel size x 
- dim_kernel_y – [in] filter kernel size y 
- padding_x – [in] padding sizes x 
- padding_y – [in] padding sizes y 
- stride_x – [in] convolution stride x 
- stride_y – [in] convolution stride y 
- bias – [in] pointer to bias 
- bias_shift – [in] amount of left-shift for bias 
- out_shift – [in] amount of right-shift for output 
- Im_out – [inout] pointer to output tensor 
- dim_im_out_x – [in] output tensor dimension x 
- dim_im_out_y – [in] output tensor dimension y 
- bufferA – [inout] pointer to buffer space for input 
- bufferB – [inout] pointer to buffer space for output 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_SIZE_MISMATCHor- RISCV_NMSIS_NN_SUCCESSbased on the outcome of size checking.
 
 - 
riscv_nmsis_nn_status riscv_transpose_conv_s8(const nmsis_nn_context *ctx, const nmsis_nn_context *output_ctx, const nmsis_nn_transpose_conv_params *transpose_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- Basic s8 transpose convolution function. - Supported framework: TensorFlow Lite micro 
- Additional memory is required for optimization. Refer to arguments ‘ctx’ and ‘output_ctx’ for details. 
 - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. riscv_transpose_conv_s8_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer, if applicable, for security reasons. 
- output_ctx – [inout] Temporary scratch buffer. The size required size is: output width * output height * output channel * 4 The caller is expected to clear the buffer, if applicable, for security reasons. 
- transpose_conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). Range of transpose_conv_params->input_offset : [-127, 128] Range of transpose_conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each out channel. 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Optional bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int8 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_ARG_ERRORif argument constraints fail. or,- RISCV_NMSIS_NN_SUCCESSon successful completion.
 
 - 
riscv_nmsis_nn_status riscv_transpose_conv_wrapper_s8(const nmsis_nn_context *ctx, const nmsis_nn_context *reverse_conv_ctx, const nmsis_nn_transpose_conv_params *transpose_conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)
- Wrapper to select optimal transposed convolution algorithm depending on parameters. - Supported framework: TensorFlow Lite micro 
- Additional memory is required for optimization. Refer to arguments ‘ctx’ and ‘output_ctx’ for details. 
 - Parameters:
- ctx – [inout] Function context that contains the additional buffer if required by the function. riscv_transpose_conv_s8_get_buffer_size will return the buffer_size if required. The caller is expected to clear the buffer, if applicable, for security reasons. 
- output_ctx – [inout] Temporary scratch buffer. The size required size is: output width * output height * output channel * 4 The caller is expected to clear the buffer, if applicable, for security reasons. 
- transpose_conv_params – [in] Convolution parameters (e.g. strides, dilations, pads,…). Range of transpose_conv_params->input_offset : [-127, 128] Range of transpose_conv_params->output_offset : [-128, 127] 
- quant_params – [in] Per-channel quantization info. It contains the multiplier and shift values to be applied to each out channel. 
- input_dims – [in] Input (activation) tensor dimensions. Format: [N, H, W, C_IN] 
- input_data – [in] Input (activation) data pointer. Data type: int8 
- filter_dims – [in] Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial filter dimensions 
- filter_data – [in] Filter data pointer. Data type: int8 
- bias_dims – [in] Bias tensor dimensions. Format: [C_OUT] 
- bias_data – [in] Optional bias data pointer. Data type: int32 
- output_dims – [in] Output tensor dimensions. Format: [N, H, W, C_OUT] 
- output_data – [out] Output data pointer. Data type: int8 
 
- Returns:
- The function returns either - RISCV_NMSIS_NN_ARG_ERRORif argument constraints fail. or,- RISCV_NMSIS_NN_SUCCESSon successful completion.
 
 
- 
riscv_nmsis_nn_status riscv_convolve_1_x_n_s4(const nmsis_nn_context *ctx, const nmsis_nn_conv_params *conv_params, const nmsis_nn_per_channel_quant_params *quant_params, const nmsis_nn_dims *input_dims, const int8_t *input_data, const nmsis_nn_dims *filter_dims, const int8_t *filter_data, const nmsis_nn_dims *bias_dims, const int32_t *bias_data, const nmsis_nn_dims *output_dims, int8_t *output_data)