Neural Network Convolution Functions

riscv_status riscv_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
riscv_status riscv_convolve_HWC_q15_basic(const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
riscv_status riscv_convolve_HWC_q15_fast(const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
riscv_status riscv_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
riscv_status riscv_convolve_HWC_q7_basic(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
riscv_status riscv_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
riscv_status riscv_convolve_HWC_q7_fast(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
riscv_status riscv_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
riscv_status riscv_convolve_HWC_q7_RGB(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
riscv_status riscv_depthwise_conv_u8_basic_ver1(const uint8_t *input, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const uint8_t *kernel, const uint16_t kernel_x, const uint16_t kernel_y, const int16_t ch_mult, const int16_t pad_x, const int16_t pad_y, const int16_t stride_x, const int16_t stride_y, const int16_t dilation_x, const int16_t dilation_y, const int32_t *bias, const int32_t input_offset, const int32_t filter_offset, const int32_t output_offset, uint8_t *output, const uint16_t output_x, const uint16_t output_y, const int32_t output_activation_min, const int32_t output_activation_max, const int32_t out_shift, const int32_t out_mult)
riscv_status riscv_depthwise_separable_conv_HWC_q7(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)
riscv_status riscv_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)
group NNConv

Perform convolution layer.

The convolution is implemented in 2 steps: im2col and GEMM

im2col is a process of converting each patch of image data into a column. After im2col, the convolution is computed as matrix-matrix multiplication.

To reduce the memory footprint, the im2col is performed partially. Each iteration, only a few column (i.e., patches) are generated and computed with GEMM kernels similar to NMSIS-DSP riscv_mat_mult functions.

Functions

riscv_status riscv_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)

Fast Q7 version of 1x1 convolution (non-sqaure shape)

This function is optimized for convolution with 1x1 kernel size (i.e., dim_kernel_x=1 and dim_kernel_y=1). It can be used for the second half of MobileNets [1] after depthwise separable convolution.

Return

The function returns either RISCV_MATH_SIZE_MISMATCH or RISCV_MATH_SUCCESS based on the outcome of size checking.

Parameters
  • [in] Im_in: pointer to input tensor

  • [in] dim_im_in_x: input tensor dimention x

  • [in] dim_im_in_y: input tensor dimention y

  • [in] ch_im_in: number of input tensor channels

  • [in] wt: pointer to kernel weights

  • [in] ch_im_out: number of filters, i.e., output tensor channels

  • [in] dim_kernel_x: filter kernel size x

  • [in] dim_kernel_y: filter kernel size y

  • [in] padding_x: padding size x

  • [in] padding_y: padding size y

  • [in] stride_x: convolution stride x

  • [in] stride_y: convolution stride y

  • [in] bias: pointer to bias

  • [in] bias_shift: amount of left-shift for bias

  • [in] out_shift: amount of right-shift for output

  • [inout] Im_out: pointer to output tensor

  • [in] dim_im_out_x: output tensor dimension x

  • [in] dim_im_out_y: output tensor dimension y

  • [inout] bufferA: pointer to buffer space for input

  • [inout] bufferB: pointer to buffer space for output

This function is the version with full list of optimization tricks, but with some contraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2

[1] MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications https://arxiv.org/abs/1704.04861

riscv_status riscv_convolve_HWC_q15_basic(const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)

Basic Q15 convolution function.

Buffer size:

Return

The function returns RISCV_MATH_SUCCESS

Parameters
  • [in] Im_in: pointer to input tensor

  • [in] dim_im_in: input tensor dimention

  • [in] ch_im_in: number of input tensor channels

  • [in] wt: pointer to kernel weights

  • [in] ch_im_out: number of filters, i.e., output tensor channels

  • [in] dim_kernel: filter kernel size

  • [in] padding: padding sizes

  • [in] stride: convolution stride

  • [in] bias: pointer to bias

  • [in] bias_shift: amount of left-shift for bias

  • [in] out_shift: amount of right-shift for output

  • [inout] Im_out: pointer to output tensor

  • [in] dim_im_out: output tensor dimension

  • [inout] bufferA: pointer to buffer space for input

  • [inout] bufferB: pointer to buffer space for output

bufferA size: ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

This basic version is designed to work for any input tensor and weight dimension.

riscv_status riscv_convolve_HWC_q15_fast(const q15_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)

Fast Q15 convolution function.

Buffer size:

Return

The function returns either RISCV_MATH_SIZE_MISMATCH or RISCV_MATH_SUCCESS based on the outcome of size checking.

Parameters
  • [in] Im_in: pointer to input tensor

  • [in] dim_im_in: input tensor dimention

  • [in] ch_im_in: number of input tensor channels

  • [in] wt: pointer to kernel weights

  • [in] ch_im_out: number of filters, i.e., output tensor channels

  • [in] dim_kernel: filter kernel size

  • [in] padding: padding sizes

  • [in] stride: convolution stride

  • [in] bias: pointer to bias

  • [in] bias_shift: amount of left-shift for bias

  • [in] out_shift: amount of right-shift for output

  • [inout] Im_out: pointer to output tensor

  • [in] dim_im_out: output tensor dimension

  • [inout] bufferA: pointer to buffer space for input

  • [inout] bufferB: pointer to buffer space for output

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

Input dimension constraints:

ch_im_in is multiple of 2

ch_im_out is multipe of 2

riscv_status riscv_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q15_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q15_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q15_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)

Fast Q15 convolution function (non-sqaure shape)

Buffer size:

Return

The function returns either RISCV_MATH_SIZE_MISMATCH or RISCV_MATH_SUCCESS based on the outcome of size checking.

Parameters
  • [in] Im_in: pointer to input tensor

  • [in] dim_im_in_x: input tensor dimention x

  • [in] dim_im_in_y: input tensor dimention y

  • [in] ch_im_in: number of input tensor channels

  • [in] wt: pointer to kernel weights

  • [in] ch_im_out: number of filters, i.e., output tensor channels

  • [in] dim_kernel_x: filter kernel size x

  • [in] dim_kernel_y: filter kernel size y

  • [in] padding_x: padding size x

  • [in] padding_y: padding size y

  • [in] stride_x: convolution stride x

  • [in] stride_y: convolution stride y

  • [in] bias: pointer to bias

  • [in] bias_shift: amount of left-shift for bias

  • [in] out_shift: amount of right-shift for output

  • [inout] Im_out: pointer to output tensor

  • [in] dim_im_out_x: output tensor dimension x

  • [in] dim_im_out_y: output tensor dimension y

  • [inout] bufferA: pointer to buffer space for input

  • [inout] bufferB: pointer to buffer space for output

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

Input dimension constraints:

ch_im_in is multiple of 2

ch_im_out is multipe of 2

riscv_status riscv_convolve_HWC_q7_basic(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)

Basic Q7 convolution function.

Buffer size:

Return

The function returns RISCV_MATH_SUCCESS

Parameters
  • [in] Im_in: pointer to input tensor

  • [in] dim_im_in: input tensor dimention

  • [in] ch_im_in: number of input tensor channels

  • [in] wt: pointer to kernel weights

  • [in] ch_im_out: number of filters, i.e., output tensor channels

  • [in] dim_kernel: filter kernel size

  • [in] padding: padding sizes

  • [in] stride: convolution stride

  • [in] bias: pointer to bias

  • [in] bias_shift: amount of left-shift for bias

  • [in] out_shift: amount of right-shift for output

  • [inout] Im_out: pointer to output tensor

  • [in] dim_im_out: output tensor dimension

  • [inout] bufferA: pointer to buffer space for input

  • [inout] bufferB: pointer to buffer space for output

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

This basic version is designed to work for any input tensor and weight dimension.

riscv_status riscv_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)

Basic Q7 convolution function (non-sqaure shape)

Return

The function returns RISCV_MATH_SUCCESS

Parameters
  • [in] Im_in: pointer to input tensor

  • [in] dim_im_in_x: input tensor dimention x

  • [in] dim_im_in_y: input tensor dimention y

  • [in] ch_im_in: number of input tensor channels

  • [in] wt: pointer to kernel weights

  • [in] ch_im_out: number of filters, i.e., output tensor channels

  • [in] dim_kernel_x: filter kernel size x

  • [in] dim_kernel_y: filter kernel size y

  • [in] padding_x: padding size x

  • [in] padding_y: padding size y

  • [in] stride_x: convolution stride x

  • [in] stride_y: convolution stride y

  • [in] bias: pointer to bias

  • [in] bias_shift: amount of left-shift for bias

  • [in] out_shift: amount of right-shift for output

  • [inout] Im_out: pointer to output tensor

  • [in] dim_im_out_x: output tensor dimension x

  • [in] dim_im_out_y: output tensor dimension y

  • [inout] bufferA: pointer to buffer space for input

  • [inout] bufferB: pointer to buffer space for output

riscv_status riscv_convolve_HWC_q7_fast(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)

Fast Q7 convolution function.

Buffer size:

Return

The function returns either RISCV_MATH_SIZE_MISMATCH or RISCV_MATH_SUCCESS based on the outcome of size checking.

Parameters
  • [in] Im_in: pointer to input tensor

  • [in] dim_im_in: input tensor dimention

  • [in] ch_im_in: number of input tensor channels

  • [in] wt: pointer to kernel weights

  • [in] ch_im_out: number of filters, i.e., output tensor channels

  • [in] dim_kernel: filter kernel size

  • [in] padding: padding sizes

  • [in] stride: convolution stride

  • [in] bias: pointer to bias

  • [in] bias_shift: amount of left-shift for bias

  • [in] out_shift: amount of right-shift for output

  • [inout] Im_out: pointer to output tensor

  • [in] dim_im_out: output tensor dimension

  • [inout] bufferA: pointer to buffer space for input

  • [inout] bufferB: pointer to buffer space for output

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

Input dimension constraints:

ch_im_in is multiple of 4 ( because of the SIMD32 read and swap )

ch_im_out is multipe of 2 ( bacause 2x2 mat_mult kernel )

The im2col converts the Q7 tensor input into Q15 column, which is stored in bufferA. There is reordering happenning during this im2col process with riscv_q7_to_q15_reordered_no_shift. For every four elements, the second and third elements are swapped.

The computation kernel riscv_nn_mat_mult_kernel_q7_q15_reordered does the GEMM computation with the reordered columns.

To speed-up the determination of the padding condition, we split the computation into 3x3 parts, i.e., {top, mid, bottom} X {left, mid, right}. This reduces the total number of boundary condition checks and improves the data copying performance.

riscv_status riscv_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)

Fast Q7 convolution function (non-sqaure shape)

This function is the version with full list of optimization tricks, but with some contraints: ch_im_in is multiple of 4 ch_im_out is multiple of 2

Return

The function returns either RISCV_MATH_SIZE_MISMATCH or RISCV_MATH_SUCCESS based on the outcome of size checking.

Parameters
  • [in] Im_in: pointer to input tensor

  • [in] dim_im_in_x: input tensor dimention x

  • [in] dim_im_in_y: input tensor dimention y

  • [in] ch_im_in: number of input tensor channels

  • [in] wt: pointer to kernel weights

  • [in] ch_im_out: number of filters, i.e., output tensor channels

  • [in] dim_kernel_x: filter kernel size x

  • [in] dim_kernel_y: filter kernel size y

  • [in] padding_x: padding size x

  • [in] padding_y: padding size y

  • [in] stride_x: convolution stride x

  • [in] stride_y: convolution stride y

  • [in] bias: pointer to bias

  • [in] bias_shift: amount of left-shift for bias

  • [in] out_shift: amount of right-shift for output

  • [inout] Im_out: pointer to output tensor

  • [in] dim_im_out_x: output tensor dimension x

  • [in] dim_im_out_y: output tensor dimension y

  • [inout] bufferA: pointer to buffer space for input

  • [inout] bufferB: pointer to buffer space for output

riscv_status riscv_convolve_HWC_q7_RGB(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)

Q7 convolution function for RGB image.

Q7 version of convolution for RGB image.

Buffer size:

Return

The function returns either RISCV_MATH_SIZE_MISMATCH or RISCV_MATH_SUCCESS based on the outcome of size checking.

Parameters
  • [in] Im_in: pointer to input tensor

  • [in] dim_im_in: input tensor dimention

  • [in] ch_im_in: number of input tensor channels

  • [in] wt: pointer to kernel weights

  • [in] ch_im_out: number of filters, i.e., output tensor channels

  • [in] dim_kernel: filter kernel size

  • [in] padding: padding sizes

  • [in] stride: convolution stride

  • [in] bias: pointer to bias

  • [in] bias_shift: amount of left-shift for bias

  • [in] out_shift: amount of right-shift for output

  • [inout] Im_out: pointer to output tensor

  • [in] dim_im_out: output tensor dimension

  • [inout] bufferA: pointer to buffer space for input

  • [inout] bufferB: pointer to buffer space for output

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

Input dimension constraints:

ch_im_in equals 3

This kernel is written exclusively for convolution with ch_im_in equals 3. This applies on the first layer of CNNs which has input image with RGB format.

riscv_status riscv_depthwise_conv_u8_basic_ver1(const uint8_t *input, const uint16_t input_x, const uint16_t input_y, const uint16_t input_ch, const uint8_t *kernel, const uint16_t kernel_x, const uint16_t kernel_y, const int16_t ch_mult, const int16_t pad_x, const int16_t pad_y, const int16_t stride_x, const int16_t stride_y, const int16_t dilation_x, const int16_t dilation_y, const int32_t *bias, const int32_t input_offset, const int32_t filter_offset, const int32_t output_offset, uint8_t *output, const uint16_t output_x, const uint16_t output_y, const int32_t output_activation_min, const int32_t output_activation_max, const int32_t out_shift, const int32_t out_mult)

uint8 depthwise convolution function with asymmetric quantization for even number of channel multiplier and input channels. Unless specified otherwise, arguments are mandatory. Both square and non-square inputs are accepted.

uint8 depthwise convolution function with asymmetric quantization for even number of channel multiplier and input channels. Unless specified otherwise, arguments are mandatory.

Input constraints ch_mult is multiple of 2 kernel_x is multiple of 2

Return

The function returns one of the following RISCV_MATH_SIZE_MISMATCH - Not supported dimension of tensors RISCV_MATH_SUCCESS - Successful operation RISCV_MATH_ARGUMENT_ERROR - Implementation not available

Parameters
  • [in] input: Pointer to input tensor

  • [in] input_x: Width of input tensor

  • [in] input_y: Height of input tensor

  • [in] input_ch: Channels in input tensor

  • [in] kernel: Pointer to kernel weights

  • [in] kernel_x: Width of kernel

  • [in] kernel_y: Height of kernel

  • [in] ch_mult: Number of channel multiplier

  • [in] pad_x: Padding sizes x

  • [in] pad_y: Padding sizes y

  • [in] stride_x: Convolution stride along the width

  • [in] stride_y: Convolution stride along the height

  • [in] dilation_x: Dilation along width. Not used and intended for future enhancement.

  • [in] dilation_y: Dilation along height. Not used and intended for future enhancement.

  • [in] bias: Pointer to optional bias values. If no bias is availble, NULL is expected

  • [in] input_offset: Input tensor zero offset

  • [in] filter_offset: Kernel tensor zero offset

  • [in] output_offset: Output tensor zero offset

  • [inout] output: Pointer to output tensor

  • [in] output_x: Width of output tensor

  • [in] output_y: Height of output tensor

  • [in] output_activation_min: Minimum value to clamp the output to. Range : {0, 255}

  • [in] output_activation_max: Minimum value to clamp the output to. Range : {0, 255}

  • [in] out_shift: Amount of right-shift for output

  • [in] out_mult: Output multiplier for requantization

riscv_status riscv_depthwise_separable_conv_HWC_q7(const q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out, q15_t *bufferA, q7_t *bufferB)

Q7 depthwise separable convolution function.

Buffer size:

Return

The function returns either RISCV_MATH_SIZE_MISMATCH or RISCV_MATH_SUCCESS based on the outcome of size checking.

Parameters
  • [in] Im_in: pointer to input tensor

  • [in] dim_im_in: input tensor dimention

  • [in] ch_im_in: number of input tensor channels

  • [in] wt: pointer to kernel weights

  • [in] ch_im_out: number of filters, i.e., output tensor channels

  • [in] dim_kernel: filter kernel size

  • [in] padding: padding sizes

  • [in] stride: convolution stride

  • [in] bias: pointer to bias

  • [in] bias_shift: amount of left-shift for bias

  • [in] out_shift: amount of right-shift for output

  • [inout] Im_out: pointer to output tensor

  • [in] dim_im_out: output tensor dimension

  • [inout] bufferA: pointer to buffer space for input

  • [inout] bufferB: pointer to buffer space for output

bufferA size: 2*ch_im_in*dim_kernel*dim_kernel

bufferB size: 0

Input dimension constraints:

ch_im_in equals ch_im_out

Implementation: There are 3 nested loop here: Inner loop: calculate each output value with MAC instruction over an accumulator Mid loop: loop over different output channel Outer loop: loop over different output (x, y)

riscv_status riscv_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in, const uint16_t dim_im_in_x, const uint16_t dim_im_in_y, const uint16_t ch_im_in, const q7_t *wt, const uint16_t ch_im_out, const uint16_t dim_kernel_x, const uint16_t dim_kernel_y, const uint16_t padding_x, const uint16_t padding_y, const uint16_t stride_x, const uint16_t stride_y, const q7_t *bias, const uint16_t bias_shift, const uint16_t out_shift, q7_t *Im_out, const uint16_t dim_im_out_x, const uint16_t dim_im_out_y, q15_t *bufferA, q7_t *bufferB)

Q7 depthwise separable convolution function (non-square shape)

This function is the version with full list of optimization tricks, but with some contraints: ch_im_in is multiple of 2 ch_im_out is multiple of 2

Return

The function returns either RISCV_MATH_SIZE_MISMATCH or RISCV_MATH_SUCCESS based on the outcome of size checking.

Parameters
  • [in] Im_in: pointer to input tensor

  • [in] dim_im_in_x: input tensor dimention x

  • [in] dim_im_in_y: input tensor dimention y

  • [in] ch_im_in: number of input tensor channels

  • [in] wt: pointer to kernel weights

  • [in] ch_im_out: number of filters, i.e., output tensor channels

  • [in] dim_kernel_x: filter kernel size x

  • [in] dim_kernel_y: filter kernel size y

  • [in] padding_x: padding sizes x

  • [in] padding_y: padding sizes y

  • [in] stride_x: convolution stride x

  • [in] stride_y: convolution stride y

  • [in] bias: pointer to bias

  • [in] bias_shift: amount of left-shift for bias

  • [in] out_shift: amount of right-shift for output

  • [inout] Im_out: pointer to output tensor

  • [in] dim_im_out_x: output tensor dimension x

  • [in] dim_im_out_y: output tensor dimension y

  • [inout] bufferA: pointer to buffer space for input

  • [inout] bufferB: pointer to buffer space for output