Nuclei N2 SIMD DSP Additional Instructions

__STATIC_FORCEINLINE unsigned long long __RV_DKHMX8 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DKHMX16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DSMMUL (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DSMMUL_U (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL_U (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DKABS32 (unsigned long long a)
__STATIC_FORCEINLINE unsigned long long __RV_DKSLRA32 (unsigned long long a, int b)
__STATIC_FORCEINLINE unsigned long long __RV_DKADD32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DKSUB32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DRADD16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DSUB16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DRADD32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DSUB32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DMSR16 (unsigned long a, unsigned long b)
__STATIC_FORCEINLINE unsigned long long __RV_DMSR17 (unsigned long a, unsigned long b)
__STATIC_FORCEINLINE unsigned long long __RV_DMSR33 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DMXSR33 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long __RV_DREDAS16 (unsigned long long a)
__STATIC_FORCEINLINE unsigned long __RV_DREDSA16 (unsigned long long a)
__STATIC_FORCEINLINE int16_t __RV_DKCLIP64 (unsigned long long a)
__STATIC_FORCEINLINE unsigned long long __RV_DKMDA (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DKMXDA (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DSMDRS (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DSMXDS (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE long long __RV_DSMBB32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE long long __RV_DSMBB32_SRA14 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE long long __RV_DSMBB32_SRA32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE long long __RV_DSMBT32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE long long __RV_DSMBT32_SRA14 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE long long __RV_DSMBT32_SRA32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE long long __RV_DSMTT32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE long long __RV_DSMTT32_SRA14 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE long long __RV_DSMTT32_SRA32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DPKBB32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DPKBT32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DPKTT32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DPKTB32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DPKTB16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DPKBB16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DPKBT16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DPKTT16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DSRA16 (unsigned long long a, unsigned long b)
__STATIC_FORCEINLINE unsigned long long __RV_DADD16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DADD32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DSMBB16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DSMBT16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DSMTT16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DRCRSA16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DRCRSA32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DRCRAS16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DRCRAS32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DKCRAS16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DKCRSA16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DRSUB16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DSTSA32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DSTAS32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DKCRSA32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DKCRAS32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DCRSA32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DCRAS32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DKSTSA16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DKSTAS16 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DRSUB32 (unsigned long long a, unsigned long long b)
__STATIC_FORCEINLINE unsigned long long __RV_DPACK32 (signed long a, signed long b)
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD810 (unsigned long long a)
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD820 (unsigned long long a)
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD830 (unsigned long long a)
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD831 (unsigned long long a)
__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD832 (unsigned long long a)
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD810 (unsigned long long a)
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD820 (unsigned long long a)
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD830 (unsigned long long a)
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD831 (unsigned long long a)
__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD832 (unsigned long long a)
__RV_DSCLIP8(a, b)
__RV_DSCLIP16(a, b)
__RV_DSCLIP32(a, b)
group Nuclei N2 SIMD DSP Additional Instructions

(RV32 only)Nuclei Customized N2 DSP Instructions

This is Nuclei customized DSP N2 instructions only for RV32

Defines

__RV_DSCLIP8(a, b)

DSCLIP8 (8-bit Signed Saturation and Clip)

Type: SIMD

Syntax:

DSCLIP8 Rd, Rs1, imm3u[2:0]
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Limit the 8-bit signed integer elements of a register into a signed range simultaneously.

Description

:

This instruction limits the 8-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm3u and 2^imm3u-1, and writes the limited results to Rd. For example, if imm3u is 3, the 8-bit input values should be saturated between 7 and -8. If saturation is performed, set OV bit to 1.

Operations:

src = Rs1.B[x];
if (src > (2^imm3u)-1) {
  src = (2^imm3u)-1;
  OV = 1;
} else if (src < -2^imm3u) {
  src = -2^imm3u;
  OV = 1;
}
Rd.B[x] = src
x=7...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__RV_DSCLIP16(a, b)

DSCLIP16 (16-bit Signed Saturation and Clip)

Type: SIMD

Syntax:

DSCLIP16 Rd, Rs1, imm4u[3:0]
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Limit the 16-bit signed integer elements of a register into a signed range simultaneously.

Description

:

This instruction limits the 16-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm4u and 2^imm4u-1, and writes the limited results to Rd. For example, if imm4u is 3, the 32-bit input values should be saturated between 7 and -8. If saturation is performed, set OV bit to 1.

Operations:

src = Rs1.H[x];
if (src > (2^imm4u)-1) {
  src = (2^imm4u)-1;
  OV = 1;
} else if (src < -2^imm4u) {
  src = -2^imm4u;
  OV = 1;
}
Rd.H[x] = src
x=3...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__RV_DSCLIP32(a, b)

DSCLIP32 (32-bit Signed Saturation and Clip)

Type: SIMD

Syntax:

DSCLIP32 Rd, Rs1, imm5u[4:0]
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Limit the 32-bit signed integer elements of a register into a signed range simultaneously.

Description

:

This instruction limits the 32-bit signed integer elements stored in Rs1 into a signed integer range between -2^imm5u and 2^imm5u-1, and writes the limited results to Rd. For example, if imm5u is 3, the 32-bit input values should be saturated between 7 and -8. If saturation is performed, set OV bit to 1.

Operations:

src = Rs1.W[x];
if (src > (2^imm5u)-1) {
  src = (2^imm5u)-1;
  OV = 1;
} else if (src < -2^imm5u) {
  src = -2^imm5u;
  OV = 1;
}
Rd.W[x] = src
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

Functions

__STATIC_FORCEINLINE unsigned long long __RV_DKHMX8 (unsigned long long a, unsigned long long b)

DKHMX8 (64-bit SIMD Signed Crossed Saturating Q7 Multiply)

Type: SIMD

Syntax:

DKHMX8 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do Q7xQ7 element crossed multiplications simultaneously. The Q15 results are then reduced to Q7 numbers again.

Description

:

For the

KHM8 instruction, multiply the top 8-bit Q7 content of 16-bit chunks in Rs1 with the bottom 8-bit Q7 content of 16-bit chunks in Rs2. At the same time, multiply the bottom 8-bit Q7 content of 16-bit chunks in Rs1 with the top 8-bit Q7 content of 16-bit chunks in Rs2.

The Q14 results are then right-shifted 7-bits and saturated into Q7 values. The Q7 results are then written into Rd. When both the two Q7 inputs of a multiplication are 0x80, saturation will happen. The result will be saturated to 0x7F and the overflow flag OV will be set.

Operations:

op1t = Rs1.B[x+1]; op2t = Rs2.B[x]; // top
op1b = Rs1.B[x]; op2b = Rs2.B[x+1]; // bottom
for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  if (0x80 != aop | 0x80 != bop) {
    res = (aop s* bop) >> 7;
  } else {
    res= 0x7F;
    OV = 1;
  }
}
Rd.H[x/2] = concat(rest, resb);
for RV32, x=0,2,4,6

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKHMX16 (unsigned long long a, unsigned long long b)

DKHMX16 (64-bit SIMD Signed Crossed Saturating Q15 Multiply)

Type: SIMD

Syntax:

DKHMX16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do Q15xQ15 element crossed multiplications simultaneously. The Q31 results are then reduced to Q15 numbers again.

Description

:

For the

KHMX16 instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2.

The Q30 results are then right-shifted 15-bits and saturated into Q15 values. The Q15 results are then written into Rd. When both the two Q15 inputs of a multiplication are 0x8000, saturation will happen. The result will be saturated to 0x7FFF and the overflow flag OV will be set.

Operations:

op1t = Rs1.H[x+1]; op2t = Rs2.H[x]; // top
op1b = Rs1.H[x]; op2b = Rs2.H[x+1]; // bottom
for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  if (0x8000 != aop | 0x8000 != bop) {
    res = (aop s* bop) >> 15;
  } else {
    res= 0x7FFF;
    OV = 1;
  }
}
Rd.W[x/2] = concat(rest, resb);
for RV32, x=0,2

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSMMUL (unsigned long long a, unsigned long long b)

DSMMUL (64-bit MSW 32x32 Signed Multiply)

Type: SIMD

Syntax:

DSMMUL Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do MSW 32x32 element signed multiplications simultaneously. The results are written into Rd.

Description

:

This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The .u form of the instruction rounds up the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.

Operations:

op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  res = (aop s* bop)[63:32];
}
Rd = concat(rest, resb);
x=0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSMMUL_U (unsigned long long a, unsigned long long b)

DSMMUL.u (64-bit MSW 32x32 Unsigned Multiply)

Type: SIMD

Syntax:

DSMMUL.u Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do MSW 32x32 element unsigned multiplications simultaneously. The results are written into Rd.

Description

:

This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2 and writes the most significant 32-bit multiplication results to the corresponding 32-bit elements of Rd. The 32-bit elements of Rs1 and Rs2 are treated as unsigned integers. The .u form of the instruction rounds up the most significant 32-bit of the 64-bit multiplication results by adding a 1 to bit 31 of the results.

Operations:

op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  res = RUND(aop u* bop)[63:32];
}
Rd = concat(rest, resb);
x=0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL (unsigned long long a, unsigned long long b)

DKWMMUL (64-bit MSW 32x32 Signed Multiply & Double)

Type: SIMD

Syntax:

DKWMMUL Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do MSW 32x32 element signed multiplications simultaneously and double. The results are written into Rd.

Description

:

This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts the multiplication results one bit to the left and takes the most significant 32-bit results. If the shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The .u form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit 30 before the shift and saturation operations.

Operations:

op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
    res = sat.q31((aop s* bop) << 1)[63:32];
}
Rd = concat(rest, resb);
x=0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKWMMUL_U (unsigned long long a, unsigned long long b)

DKWMMUL.u (64-bit MSW 32x32 Unsigned Multiply & Double)

Type: SIMD

Syntax:

DKWMMUL.u Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do MSW 32x32 element unsigned multiplications simultaneously and double. The results are written into Rd.

Description

:

This instruction multiplies the 32-bit elements of Rs1 with the 32-bit elements of Rs2. It then shifts the multiplication results one bit to the left and takes the most significant 32-bit results. If the shifted result is greater than 2^31-1, it is saturated to 2^31-1 and the OV flag is set to 1. The final element result is written to Rd. The 32-bit elements of Rs1 and Rs2 are treated as signed integers. The .u form of the instruction additionally rounds up the 64-bit multiplication results by adding a 1 to bit 30 before the shift and saturation operations.

Operations:

op1t = Rs1.W[x+1]; op2t = Rs2.W[x+1]; // top
op1b = Rs1.W[x]; op2b = Rs2.W[x]; // bottom
for ((aop,bop,res) in [(op1t,op2t,rest), (op1b,op2b,resb)]) {
  res = sat.q31(RUND(aop u* bop) << 1)[63:32];
}
Rd = concat(rest, resb);
x=0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKABS32 (unsigned long long a)

DKABS32 (64-bit SIMD 32-bit Saturating Absolute)

Type: SIMD

Syntax:

DKABS32 Rd, Rs1
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Get the absolute value of 32-bit signed integer elements simultaneously.

Description

:

This instruction calculates the absolute value of 32-bit signed integer elements stored in Rs1 and writes the element results to Rd. If the input number is 0x8000_0000, this instruction generates 0x7fff_ffff as the output and sets the OV bit to 1.

Operations:

src = Rs1.W[x];
if (src == 0x8000_0000) {
  src = 0x7fff_ffff;
  OV = 1;
} else if (src[31] == 1)
  src = -src;
}
Rd.W[x] = src;
x=1...0

Parameters:

a[in] unsigned long long type of value stored in a

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKSLRA32 (unsigned long long a, int b)

DKSLRA32 (64-bit SIMD 32-bit Shift Left Logical with Saturation or Shift Right Arithmetic)

Type: SIMD

Syntax:

DKSLRA32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 31-bit elements logical left (positive) or arithmetic right (negative) shift operation with Q31 saturation for the left shift.

Description

:

The 31-bit data elements of Rs1 are left-shifted logically or right-shifted arithmetically based on the value of Rs2[5:0]. Rs2[5:0] is in the signed range of [-2^5, 2^5-1]. A positive Rs2[5:0] means logical left shift and a negative Rs2[4:0] means arithmetic right shift. The shift amount is the absolute value of Rs2[5:0]. However, the behavior of Rs2[5:0]==- 2^5 (0x20) is defined to be equivalent to the behavior of Rs2[5:0]==-(2^5-1) (0x21).

Operations:

if (Rs2[5:0] < 0) {
  sa = -Rs2[5:0];
  sa = (sa == 32)? 31 : sa;
  Rd.W[x] = SE32(Rs1.W[x][31:sa]);
} else {
  sa = Rs2[4:0];
  res[(31+sa):0] = Rs1.W[x] <<(logic) sa;
  if (res > (2^31)-1) {
  res[31:0] = 0x7fff_ffff; OV = 1;
} else if (res < -2^31) {
  res[31:0] = 0x8000_0000; OV = 1;
}
  Rd.W[x] = res[31:0];
}
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] int type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKADD32 (unsigned long long a, unsigned long long b)

DKADD32(64-bit SIMD 32-bit Signed Saturating Addition)

Type: SIMD

Syntax:

DKADD32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit signed integer element saturating additions simultaneously.

Description

:

This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed integer elements in Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.

Operations:

res[x] = Rs1.W[x] + Rs2.W[x];
if (res[x] > 0x7fff_ffff) {
  res[x] = 0x7fff_ffff;
  OV = 1;
} else if (res[x] < 0x8000_0000) {
  res[x] = 0x8000_0000;
  OV = 1;
}
Rd.W[x] = res[x];
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKSUB32 (unsigned long long a, unsigned long long b)

DKSUB32 (64-bit SIMD 32-bit Signed Saturating Subtraction)

Type: SIMD

Syntax:

DKSUB32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit signed integer element saturating subtractions simultaneously.

Description

:

This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to Rd.

Operations:

res[x] = Rs1.W[x] - Rs2.W[x];
if (res[x] > (2^31)-1) {
  res[x] = (2^31)-1;
  OV = 1;
} else if (res[x] < -2^31) {
  res[x] = -2^31;
  OV = 1;
}
Rd.W[x] = res[x];
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DRADD16 (unsigned long long a, unsigned long long b)

DRADD16 (64-bit SIMD 16-bit Halving Signed Addition)

Type: SIMD

Syntax:

DRADD16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 16-bit signed integer element additions simultaneously. The results are halved to avoid overflow or saturation.

Description

:

This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to Rd.

Operations:

Rd.H[x] = [(Rs1.H[x]) + (Rs2.H[x])] s>> 1;
x=3...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSUB16 (unsigned long long a, unsigned long long b)

DSUB16 (64-bit SIMD 16-bit Halving Signed Subtraction)

Type: SIMD

Syntax:

DSUB16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 16-bit integer element subtractions simultaneously.

Description

:

This instruction adds the 16-bit signed integer elements in Rs1 with the 16-bit signed integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to Rd.

Operations:

Rd.H[x] = [(Rs1.H[x]) - (Rs2.H[x])] ;
x=3...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DRADD32 (unsigned long long a, unsigned long long b)

DRADD32 (64-bit SIMD 32-bit Halving Signed Addition)

Type: SIMD

Syntax:

DRADD32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit signed integer element additions simultaneously. The results are halved to avoid overflow or saturation.

Description

:

This instruction adds the 32-bit signed integer elements in Rs1 with the 32-bit signed integer elements in Rs2. The results are first arithmetically right-shifted by 1 bit and then written to Rd.

Operations:

Rd.W[x] = [(Rs1.W[x]) + (Rs2.W[x])] s>> 1;
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSUB32 (unsigned long long a, unsigned long long b)

DSUB32 (64-bit SIMD 32-bit Halving Signed Subtraction)

Type: SIMD

Syntax:

DSUB32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit integer element subtractions simultaneously.

Description

:

This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1 . The results are written to Rd.

Operations:

Rd.W[x] = [(Rs1.E[x]) - (Rs2.E[x])] ;
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DMSR16 (unsigned long a, unsigned long b)

DMSR16 (Signed Multiply Halfs with Right Shift 16-bit and Cross Multiply Halfs with Right Shift 16-bit)

Type: SIMD

Syntax:

DMSR16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do two signed 16-bit multiplications and cross multiplications from the 16-bit elements of two registers; and each multiplications performs a right shift operation.

Description

:

For the

DMSR16 instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2, multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom16-bit Q15 content of 32-bit chunks in Rs2 and multiply the bottom16-bit Q15 content of 32-bit chunks in Rs1 with the top16-bit Q15 content of 32-bit chunks in Rs2. The Q31 results are then right-shifted 16-bits and clipped to Q15 values. The Q15 results are then written into Rd.

Operations:

Rd.H[0] = (Rs1.H[0] s* Rs2.H[0]) s>> 16
Rd.H[1] = (Rs1.H[1] s* Rs2.H[1]) s>> 16
Rd.H[2] = (Rs1.H[1] s* Rs2.H[0]) s>> 16
Rd.H[3] = (Rs1.H[0] s* Rs2.H[1]) s>> 16

Parameters:
  • a[in] unsigned long type of value stored in a

  • b[in] unsigned long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DMSR17 (unsigned long a, unsigned long b)

DMSR17 (Signed Multiply Halfs with Right Shift 17-bit and Cross Multiply Halfs with Right Shift 17-bit)

Type: SIMD

Syntax:

DMSR17 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do two signed 16-bit multiplications and cross multiplications from the 16-bit elements of two registers; and each multiplications performs a right shift operation.

Description

:

For the

DMSR17 instruction, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2, multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in Rs2. At the same time, multiply the top 16-bit Q15 content of 32-bit chunks in Rs1 with the bottom 16-bit Q15 content of 32-bit chunks in Rs2 and multiply the bottom 16-bit Q15 content of 32-bit chunks in Rs1 with the top 16-bit Q15 content of 32-bit chunks in Rs2. The Q31 results are then right-shifted 17-bits and clipped to Q15 values. The Q15 results are then written into Rd.

Operations:

Rd.H[0] = (Rs1.H[0] s* Rs2.H[0]) s>> 17
Rd.H[1] = (Rs1.H[1] s* Rs2.H[1]) s>> 17
Rd.H[2] = (Rs1.H[1] s* Rs2.H[0]) s>> 17
Rd.H[3] = (Rs1.H[0] s* Rs2.H[1]) s>> 17

Parameters:
  • a[in] unsigned long type of value stored in a

  • b[in] unsigned long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DMSR33 (unsigned long long a, unsigned long long b)

DMSR33 (Signed Multiply with Right Shift 33-bit and Cross Multiply with Right Shift 33-bit)

Type: SIMD

Syntax:

DMSR33 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do two signed 32-bit multiplications from the 32-bit elements of two registers, and each multiplications performs a right shift operation.

Description

:

For the

DMSR33 instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the top 32-bit Q31 content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64bit chunks in Rs1 with the bottom 32-bit Q31 content of 64-bit. The Q64 results are then right-shifted 33-bits and clipped to Q31 values. The Q31 results are then written into Rd.

Operations:

Rd.W[0] = (Rs1.W[0] s* Rs2.W[0]) s>> 33
Rd.W[1] = (Rs1.W[1] s* Rs2.W[1]) s>> 33

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DMXSR33 (unsigned long long a, unsigned long long b)

DMXSR33 (Signed Multiply with Right Shift 33-bit and Cross Multiply with Right Shift 33-bit)

Type: SIMD

Syntax:

DMXSR33 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do two signed 32-bit cross multiplications from the 32-bit elements of two registers, and each multiplications performs a right shift operation.

Description

:

For the

DMXSR33 instruction, multiply the top 32-bit Q31 content of 64-bit chunks in Rs1 with the bottom 32-bit Q31 content of 64-bit chunks in Rs2. At the same time, multiply the bottom 32-bit Q31 content of 64-bit chunks in Rs1 with the top 32-bit Q31 content of 64-bit chunks in Rs2. The Q63 results are then right-shifted 33-bits and clipped to Q31 values. The Q31 results are then written into Rd.

Operations:

Rd.W[0] = (Rs1.W[0] s* Rs2.W[1]) s>> 33
Rd.W[1] = (Rs1.W[1] s* Rs2.W[0]) s>> 33

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long __RV_DREDAS16 (unsigned long long a)

DREDAS16 (Reduced Addition and Reduced Subtraction)

Type: SIMD

Syntax:

DREDAS16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do halfs reduced subtraction and halfs reduced addition from a register. The result is written to Rd.

Description

:

For the

DREDAS16 instruction, subtract the top 16-bit Q15 element from the bottom 16-bit Q15 element of the bottom 32-bit Q31 content of 64-bit chunks in Rs1. At the same time, add the the top16-bit Q15 element with the bottom16-bit Q15 element of the top 32-bit Q31 content of 64-bit chunks in Rs1. The two Q15 results are then written into Rd.

Operations:

Rd.H[0] = Rs1.H[0] - Rs1.H[1]
Rd.H[1] = Rs1.H[2] + Rs1.H[3]

Parameters:

a[in] unsigned long long type of value stored in a

Returns:

value stored in unsigned long type

__STATIC_FORCEINLINE unsigned long __RV_DREDSA16 (unsigned long long a)

DREDSA16 (Reduced Subtraction and Reduced Addition)

Type: SIMD

Syntax:

DREDSA16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do halfs reduced subtraction and halfs reduced addition from a register. The result is written to Rd.

Description

:

For the

DREDSA16 instruction, add the top 16-bit Q15 element from the bottom 16-bit Q15 element of the bottom 32-bit Q31 content of 64-bit chunks in Rs1. At the same time, subtract the the top16-bit Q15 element with the bottom16-bit Q15 element of the top 32-bit Q31 content of 64-bit chunks in Rs1. The two Q15 results are then written into Rd.

Operations:

Rd.H[0] = Rs1.H[0] + Rs1.H[1]
Rd.H[1] = Rs1.H[2] - Rs1.H[3]

Parameters:

a[in] unsigned long longtype of value stored in a

Returns:

value stored in unsigned long type

__STATIC_FORCEINLINE int16_t __RV_DKCLIP64 (unsigned long long a)

DKCLIP64 (64-bit Clipped to 16-bit Saturation Value)

Type: SIMD

Syntax:

DKCLIP64 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 15-bit element arithmetic right shift operations and limit result into 32-bit int,then do saturate operation to 16-bit and clip result to 16-bit Q15.

Description

:

For the

DKCLIP64 instruction, shift the input 15 bits to the right and data convert the result to 32-bit int type, after which the input is saturated to limit the data to between 2^15-1 and -2^15. the result is converted to 16-bits q15 type. The final results are written to Rd.

Operations:

const int32_t max = (int32_t)((1U << 15U) - 1U);
const int32_t min = -1 - max ;
int32_t val = (int32_t)(Rs s>> 15);
if (val > max) {
  Rd = max;
} else if (val < min) {
  Rd = min;
} else {
  Rd = (int16_t)val;
}

Parameters:

a[in] unsigned long long type of value stored in a

Returns:

value stored in int16_t type

__STATIC_FORCEINLINE unsigned long long __RV_DKMDA (unsigned long long a, unsigned long long b)

DKMDA (Signed Multiply Two Halfs and Add)

Type: SIMD

Syntax:

DKMDA Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then adds the two 32-bit results together. The addition result may be saturated.

Description

:

This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. The addition result is checked for saturation. If saturation happens, the result is saturated to 2^31-1 The final results are written to Rd. The 16-bit contents are treated as signed integers

Operations:

if (Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000){
  Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[1]) + (Rs1.W[x].H[0] * Rs2.W[x].H[0]);
} else {
  Rd.W[x] = 0x7fffffff;
  OV = 1;
}
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKMXDA (unsigned long long a, unsigned long long b)

DKMXDA (Signed Crossed Multiply Two Halfs and Add)

Type: SIMD

Syntax:

DKMXDA Rd, Rs1, Rs2

Purpose

:

Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then adds the two 32-bit results together. The addition result may be saturated.

  • DKMXDA: top*bottom + top*bottom (per 32-bit element)

Description

:

This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2 and then adds the result to the result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2. The addition result is checked for saturation.If saturation happens, the result is saturated to 2^31-1 The final results are written to Rd. The 16-bit contents are treated as signed integers.

Operations:

if (Rs1.W[x] != 0x80008000) or (Rs2.W[x] != 0x80008000){
Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) + (Rs1.W[x].H[0] * Rs2.W[x].H[1]);
} else {
Rd.W[x] = 0x7fffffff;
OV = 1;
}
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSMDRS (unsigned long long a, unsigned long long b)

DSMDRS (Signed Multiply Two Halfs and Reverse Subtract)

Type: SIMD

Syntax:

DSMDRS Rd, Rs1, Rs2

Purpose

:

Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then perform a subtraction operation between the two 32-bit results.

  • DSMDRS: bottom*bottom - top*top (per 32-bit element)

Description

:

This instruction multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of multiplying the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2. The subtraction result is written to the corresponding 32-bit element of Rd (The 16-bit contents of multiplication are treated as signed integers).

Operations:

Rd.W[x] = (Rs1.W[x].H[0] * Rs2.W[x].H[0]) - (Rs1.W[x].H[1] * Rs2.W[x].H[1]); x = 1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSMXDS (unsigned long long a, unsigned long long b)

DSMXDS (Signed Crossed Multiply Two Halfs and Subtract)

Type: SIMD

Syntax:

DSMXDS Rd, Rs1, Rs2

Purpose

:

Do two signed 16-bit multiplications from the 32-bit elements of two registers; and then perform a subtraction operation between the two 32-bit results.

  • DSMXDS: top*bottom - bottom*top (per 32-bit element)

Description

:

This instruction multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2 and then subtracts the result from the result of multiplying the top 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2. The subtraction result is written to the corresponding 32-bit element of Rd. The 16-bit contents of multiplication are treated as signed integers.

Operations:

Rd.W[x] = (Rs1.W[x].H[1] * Rs2.W[x].H[0]) - (Rs1.W[x].H[0] * Rs2.W[x].H[1]); x = 1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE long long __RV_DSMBB32 (unsigned long long a, unsigned long long b)

DSMBB32 (Signed Multiply Bottom Word & Bottom Word)

Type: SIMD

Syntax:

DSMBB32 Rd, Rs1, Rs2

Purpose

:

Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit result to a third register.

  • DSMBB32: bottom*bottom

Description

:

This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.

Operations:

res = (Rs1.W[0] * Rs2.W[0]);
Rd = res;

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in long long type

__STATIC_FORCEINLINE long long __RV_DSMBB32_SRA14 (unsigned long long a, unsigned long long b)

DSMBB32.sra14 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 14)

Type: SIMD

Syntax:

DSMBB32.sra14 Rd, Rs1, Rs2

Purpose

:

Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 14- bit,finally write the 64-bit result to a third register.

  • DSMBB32.sra14: bottom*bottom s>> 14

Description

:

This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. The 64-bit multiplication result is written to Rd after right shift 14-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.

Operations:

res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
Rd = res;

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in long long type

__STATIC_FORCEINLINE long long __RV_DSMBB32_SRA32 (unsigned long long a, unsigned long long b)

DSMBB32.sra32 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 32)

Type: SIMD

Syntax:

DSMBB32.sra32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 32- bit,finally write the 64-bit result to a third register.

  • DSMBB32.sra32: bottom*bottom s >> 32

Description

:

This instruction multiplies the bottom 32-bit element of Rs1 with the bottom 32-bit element of Rs2. The 64-bit multiplication result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.

Operations:

res = (Rs1.W[0] * Rs2.W[0]) s>> 32;
Rd = res;

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in long long type

__STATIC_FORCEINLINE long long __RV_DSMBT32 (unsigned long long a, unsigned long long b)

SMBT32 (Signed Multiply Bottom Word & Top Word)

Type: SIMD

Syntax:

DSMBT32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit result to a third register.

  • DSMBT32: bottom*top

Description

:

This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.

Operations:

res = (Rs1.W[0] * Rs2.W[0]);
Rd = res;

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in long long type

__STATIC_FORCEINLINE long long __RV_DSMBT32_SRA14 (unsigned long long a, unsigned long long b)

DSMBT32.sra14 (Signed Multiply Bottom Word & Top Word with Right Shift 14)

Type: SIMD

Syntax:

DSMBT32.sra14 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 14- bit,finally write the 64-bit result to a third register.

  • DSMBT32.sra14: bottom*bottom s>> 14

Description

:

This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication result is written to Rd after right shift 14-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.

Operations:

res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
Rd = res;

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in long long type

__STATIC_FORCEINLINE long long __RV_DSMBT32_SRA32 (unsigned long long a, unsigned long long b)

DSMBT32.sra32 (Signed Crossed Multiply Two Halfs and Subtract with Right Shift 32)

Type: SIMD

Syntax:

DSMBT32.sra32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Multiply the signed 32-bit element of a register with the signed 32-bit element of another register, then right shift 32- bit,finally write the 64-bit result to a third register.

  • DSMBT32.sra32: bottom*bottom s>> 32

Description

:

This instruction multiplies the bottom 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.

Operations:

res = (Rs1.W[0] * Rs2.W[0]) s>> 14;
Rd = res;

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in long long type

__STATIC_FORCEINLINE long long __RV_DSMTT32 (unsigned long long a, unsigned long long b)

DSMTT32 (Signed Multiply Top Word & Top Word)

Type: SIMD

Syntax:

DSMTT32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Multiply the signed 32-bit element of a register with the signed 32-bit element of another register and write the 64-bit result to a third register.

  • DSMTT32: top*top

Description

:

This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication result is written to Rd. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.

Operations:

res = Rs1.W[1] * Rs2.W[1];
Rd = res;

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in long long type

__STATIC_FORCEINLINE long long __RV_DSMTT32_SRA14 (unsigned long long a, unsigned long long b)

DSMTT32.sra14 (Signed Multiply Top Word & Top Word with Right Shift 14-bit)

Type: SIMD

Syntax:

DSMTT32.sra14 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Multiply the signed 32-bit element of a register with the signed 32-bit element of another register,then right shift 14-bit, finally write the 64-bit result to a third register.

  • DSMTT32.sra14: top*top s>> 14

Description

:

This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication result is written to Rd after right shift 14-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.

Operations:

res = Rs1.W[1] * Rs2.W[1] >> 14;
Rd = res;

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in long long type

__STATIC_FORCEINLINE long long __RV_DSMTT32_SRA32 (unsigned long long a, unsigned long long b)

DSMTT32.sra32 (Signed Multiply Top Word & Top Word with Right Shift 32-bit)

Type: SIMD

Syntax:

DSMTT32.sra32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Multiply the signed 32-bit element of a register with the signed 32-bit element of another register,then right shift 32-bit, finally write the 64-bit result to a third register.

  • DSMTT32.sra32: top*top s>> 32

Description

:

This instruction multiplies the top 32-bit element of Rs1 with the top 32-bit element of Rs2. The 64-bit multiplication result is written to Rd after right shift 32-bit. The 32-bit contents of Rs1 and Rs2 are treated as signed integers.

Operations:

res = Rs1.W[1] * Rs2.W[1] >> 32;
Rd = res;

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in long long type

__STATIC_FORCEINLINE unsigned long long __RV_DPKBB32 (unsigned long long a, unsigned long long b)

DPKBB32 (Pack Two 32-bit Data from Both Bottom Half)

Type: SIMD

Syntax:

DPKBB32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Pack 32-bit data from 64-bit chunks in two registers.

  • DPKBB32: bottom.bottom

Description

:

This instruction moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].

Operations:

Rd = CONCAT(Rs1.W[0], Rs2.W[0]);

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DPKBT32 (unsigned long long a, unsigned long long b)

DPKBT32 (Pack Two 32-bit Data from Bottom and Top Half)

Type: SIMD

Syntax:

DPKBT32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Pack 32-bit data from 64-bit chunks in two registers.

  • DPKBT32: bottom.top

Description

:

This instruction moves Rs1.W[0] to Rd.W[1] and moves Rs2.W[1] to Rd.W[0].

Operations:

Rd = CONCAT(Rs1.W[0], Rs2.W[1]);

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DPKTT32 (unsigned long long a, unsigned long long b)

DPKTT32 (Pack Two 32-bit Data from Both Top Half)

Type: SIMD

Syntax:

DPKTT32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Pack 32-bit data from 64-bit chunks in two registers.

  • DPKTT32: top.top

Description

:

This instruction moves Rs1.W[1] to Rd.W[0] and moves Rs2.W[1] to Rd.W[0].

Operations:

Rd = CONCAT(Rs1.W[1], Rs2.W[1]);

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DPKTB32 (unsigned long long a, unsigned long long b)

DPKTB32 (Pack Two 32-bit Data from Top and Bottom Half)

Type: SIMD

Syntax:

DPKTB32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Pack 32-bit data from 64-bit chunks in two registers.

  • DPKTB32: top.bottom

Description

:

This instruction moves Rs1.W[1] to Rd.W[1] and moves Rs2.W[0] to Rd.W[0].

Operations:

Rd = CONCAT(Rs1.W[1], Rs2.W[0]);

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DPKTB16 (unsigned long long a, unsigned long long b)

DPKTB16 (Pack Two 32-bit Data from Top and Bottom Half)

Type: SIMD

Syntax:

DPKTB16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Pack 16-bit data from 32-bit chunks in two registers.

  • DPKTB16: top.bottom

Description

:

This instruction moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].

Operations:

Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][15:0]);
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DPKBB16 (unsigned long long a, unsigned long long b)

DPKBB16 (Pack Two 16-bit Data from Both Bottom Half)

Type: SIMD

Syntax:

DPKBB16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Pack 16-bit data from 32-bit chunks in two registers.

  • PKBB16: bottom.bottom

Description

:

This instruction moves Rs1.W[x][15:0] to Rd.W[x][31:16] and moves Rs2.W[x] [15:0] to Rd.W[x] [15:0].

Operations:

Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][15:0]);
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DPKBT16 (unsigned long long a, unsigned long long b)

DPKBT16 (Pack Two 16-bit Data from Bottom and Top Half)

Type: SIMD

Syntax:

DPKBT16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Pack 16-bit data from 32-bit chunks in two registers.

  • PKBT16: bottom.top

Description

:

This instruction moves Rs1.W[x] [15:0] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].

Operations:

Rd.W[x][31:0] = CONCAT(Rs1.W[x][15:0], Rs2.W[x][31:16]);
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DPKTT16 (unsigned long long a, unsigned long long b)

DPKTT16 (Pack Two 16-bit Data from Both Top Half)

Type: SIMD

Syntax:

DPKTT16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Pack 16-bit data from 32-bit chunks in two registers.

  • PKTT16 top.top

Description

:

This instruction moves Rs1.W[x] [31:16] to Rd.W[x] [31:16] and moves Rs2.W[x] [31:16] to Rd.W[x] [15:0].

Operations:

Rd.W[x][31:0] = CONCAT(Rs1.W[x][31:16], Rs2.W[x][31:16]);
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSRA16 (unsigned long long a, unsigned long b)

DSRA16 (SIMD 16-bit Shift Right Arithmetic)

Type: SIMD

Syntax:

DSRA16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 16-bit element arithmetic right shift operations simultaneously. The shift amount is a variable from a GPR.

Description

:

The 16-bit data elements in Rs1 are right-shifted arithmetically, that is, the shifted out bits are filled with the sign-bit of the data elements. The shift amount is specified by the low-order 4-bits of the value in the Rs2 register. And the results are written to Rd.

Operations:

sa = Rs2[3:0];
if (sa != 0)
{
Rd.H[x] = SE16(Rs1.H[x][15:sa]);
} else {
Rd = Rs1;
}
x=3...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DADD16 (unsigned long long a, unsigned long long b)

DADD16 (16-bit Addition)

Type: SIMD

Syntax:

DADD16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 16-bit integer element additions simultaneously.

Description

:

This instruction adds the 16-bit unsigned integer elements in Rs1 with the 16-bit unsigned integer elements in Rs2. And the results are written to Rd.

Operations:

Rd.H[x] = Rs1.H[x] + Rs2.H[x];
x=3...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DADD32 (unsigned long long a, unsigned long long b)

DADD32 (32-bit Addition)

Type: SIMD

Syntax:

DADD32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit integer element additions simultaneously.

Description

:

This instruction adds the 32-bit integer elements in Rs1 with the 32-bit integer elements in Rs2, and then writes the 32-bit element results to Rd.

Operations:

Rd.W[x] = Rs1.W[x] + Rs2.W[x];
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSMBB16 (unsigned long long a, unsigned long long b)

DSMBB16 (Signed Multiply Bottom Half & Bottom Half)

Type: SIMD

Syntax:

DSMBB16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit elements of another register and write the result to a third register.

  • DSMBB16: W[x].bottom*W[x].bottom

Description

:

For the

DSMBB16 instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the bottom 16-bit content of the 32-bit elements of Rs2. The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.

Operations:

Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[0];
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSMBT16 (unsigned long long a, unsigned long long b)

DSMBT16 (Signed Multiply Bottom Half & Top Half)

Type: SIMD

Syntax:

DSMBT16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit elements of another register and write the result to a third register.

  • DSMBT16: W[x].bottom *W[x].top

Description

:

For the

DSMBT16 instruction, it multiplies the bottom 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.

Operations:

Rd.W[x] = Rs1.W[x].H[0] * Rs2.W[x].H[1];
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSMTT16 (unsigned long long a, unsigned long long b)

DSMTT16 (Signed Multiply Top Half & Top Half)

Type: SIMD

Syntax:

DSMTT16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Multiply the signed 16-bit content of the 32-bit elements of a register with the signed 16-bit content of the 32-bit elements of another register and write the result to a third register.

  • DSMTT16: W[x].top * W[x].top

Description

:

For the

DSMTT16 instruction, it multiplies the top 16-bit content of the 32-bit elements of Rs1 with the top 16-bit content of the 32-bit elements of Rs2. The multiplication results are written to Rd. The 16-bit contents of Rs1 and Rs2 are treated as signed integers.

Operations:

Rd.W[x] = Rs1.W[x].H[1] * Rs2.W[x].H[1];
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DRCRSA16 (unsigned long long a, unsigned long long b)

DRCRSA16 (16-bit Signed Halving Cross Subtraction & Addition)

Type: SIMD

Syntax:

DRCRSA16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results are halved to avoid overflow or saturation.

Description

:

This instruction subtracts the 16-bit signed integer in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer in [15:0] of 32-bit chunks in Rs2, and adds the 16-bit signed integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer in [15:0] of 32-bit chunks in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32- bit chunks in Rd and [15:0] of 32-bit chunks in Rd.

Operations:

Rd.W[x][31:16] = (Rs1.W[x][31:16] - Rs2.W[x][15:0]) s>> 1;
Rd.W[x][15:0] = (Rs1.W[x][15:0] + Rs2.W[x][31:16]) s>> 1;
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DRCRSA32 (unsigned long long a, unsigned long long b)

DRCRSA32 (32-bit Signed Halving Cross Subtraction & Addition)

Type: SIMD

Syntax:

DRCRSA32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit signed integer element subtraction and 32-bit signed integer element addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to avoid overflow or saturation.

Description

:

This instruction subtracts the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit signed integer element in [31:0] of Rs2, and adds the 32-bit signed integer element in [63:32] of Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.

Operations:

Rd.W[1] = (Rs1.W[1] - Rs2.W[0]) s>> 1;
Rd.W[0] = (Rs1.W[0] + Rs2.W[1]) s>> 1;

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DRCRAS16 (unsigned long long a, unsigned long long b)

DRCRAS16 (16-bit Signed Halving Cross Addition & Subtraction)

Type: SIMD

Syntax:

DRCRAS16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 16-bit signed integer element subtraction and 16-bit signed integer element addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks. The results are halved to avoid overflow or saturation.

Description

:

This instruction adds the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs1 with the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs2, and subtracts the 16-bit unsigned integer in [31:16] of 32-bit chunks in Rs2 from the 16-bit unsigned integer in [15:0] of 32-bit chunks in Rs1. The element results are first logically right-shifted by 1 bit and then written to [31:16] of 32-bit chunks in Rd and [15:0] of 32-bit chunks in Rd.

Operations:

Rd.W[x][31:16] = (Rs1.W[x][31:16] + Rs2.W[x][15:0]) s>> 1;
Rd.W[x][15:0] = (Rs1.W[x][15:0] - Rs2.W[x][31:16]) s>> 1;
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DRCRAS32 (unsigned long long a, unsigned long long b)

DRCRAS32 (32-bit Signed Cross Addition & Subtraction)

Type: SIMD

Syntax:

DRCRAS32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit signed integer element addition and 32-bit signed integer element subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements. The results are halved to avoid overflow or saturation.

Description

:

This instruction adds the 32-bit signed integer element in [63:32] of Rs1 with the 32-bit signed integer element in [31:0] of Rs2, and subtracts the 32-bit signed integer element in [63:32] of Rs2 from the 32-bit signed integer element in [31:0] of Rs1. The element results are first arithmetically right-shifted by 1 bit and then written to [63:32] of Rd for addition and [31:0] of Rd for subtraction.

Operations:

Rd.W[1] = (Rs1.W[1] + Rs2.W[0]) s>> 1;
Rd.W[0] = (Rs1.W[0] - Rs2.W[1]) s>> 1;

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKCRAS16 (unsigned long long a, unsigned long long b)

DKCRAS16 (16-bit Signed Saturating Cross Addition & Subtraction)

Type: SIMD

Syntax:

DKCRAS16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 16-bit signed integer element saturating addition and 16-bit signed integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.

Description

:

This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2; at the same time, it subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd for addition.

Operations:

res1 = Rs1.W[x][31:16] - Rs2.W[x][15:0];
res2 = Rs1.W[x][15:0] + Rs2.W[x][31:16];
for (res in [res1, res2]) {
  if (res > (2^15)-1) {
    res = (2^15)-1;
    OV = 1;
  } else if (res < -2^15) {
    res = -2^15;
    OV = 1;
  }
}
Rd.W[x][31:16] = res1;
Rd.W[x][15:0] = res2;
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKCRSA16 (unsigned long long a, unsigned long long b)

DKCRSA16 (16-bit Signed Saturating Cross Subtraction & Addition)

Type: SIMD

Syntax:

DKCRSA16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element saturating addition in a 32-bit chunk simultaneously. Operands are from crossed positions in 32-bit chunks.

Description

:

This instruction subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for addition and [15:0] of 32-bit chunks in Rd for subtraction.

Operations:

res1 = Rs1.W[x][31:16] + Rs2.W[x][15:0];
res2 = Rs1.W[x][15:0] - Rs2.W[x][31:16];
for (res in [res1, res2]) {
  if (res > (2^15)-1) {
    res = (2^15)-1;
    OV = 1;
  } else if (res < -2^15) {
    res = -2^15;
    OV = 1;
  }
}
Rd.W[x][31:16] = res1;
Rd.W[x][15:0] = res2;
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DRSUB16 (unsigned long long a, unsigned long long b)

DRSUB16 (16-bit Signed Halving Subtraction)

Type: SIMD

Syntax:

DRSUB16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 16-bit signed integer element subtractions simultaneously. The results are halved to avoid overflow or saturation.

Description

:

This instruction subtracts the 16-bit signed integer elements in Rs2 from the 16-bit signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then written to Rd.

Operations:

Rd.H[x] = (Rs1.H[x] - Rs2.H[x]) s>> 1;
x=3...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSTSA32 (unsigned long long a, unsigned long long b)

DSTSA32 (32-bit Straight Subtraction & Addition)

Type: SIMD

Syntax:

DSTSA32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements.

Description

:

This instruction subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [63:32] of Rs1, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and writes the result to [31:0] of Rd.

Operations:

Rd.W[1] = Rs1.W[1] - Rs2.W[1];
Rd.W[0] = Rs1.W[0] + Rs2.W[0];

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSTAS32 (unsigned long long a, unsigned long long b)

DSTAS32 (SIMD 32-bit Straight Addition & Subtractionn)

Type: SIMD

Syntax:

DSTAS32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit chunk simultaneously. Operands are from corresponding 32-bit elements.

Description

:

This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [63:32] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.

Operations:

Rd.W[1] = Rs1.W[1] + Rs2.W[1];
Rd.W[0] = Rs1.W[0] - Rs2.W[0];

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKCRSA32 (unsigned long long a, unsigned long long b)

DKCRSA32 (32-bit Signed Saturating Cross Subtraction & Addition)

Type: SIMD

Syntax:

DKCRSA32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element saturating addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.

Description

:

This instruction subtracts the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [63:32] of Rs1; at the same time, it adds the 32-bit integer element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.

Operations:

res[1] = Rs1.W[1] - Rs2.W[0];
res[0] = Rs1.W[0] + Rs2.W[1];
if (res[x] > (2^31)-1) {
  res[x] = (2^31)-1;
  OV = 1;
} else if (res < -2^31) {
  res[x] = -2^31;
  OV = 1;
}
Rd.W[1] = res[1];
Rd.W[0] = res[0];

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKCRAS32 (unsigned long long a, unsigned long long b)

DKCRAS32 (32-bit Signed Saturating Cross Addition & Subtraction)

Type: SIMD

Syntax:

DKCRAS32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit signed integer element saturating subtraction and 32-bit signed integer element saturating addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.

Description

:

This instruction adds the 32-bit integer element in [31:0] of Rs2 from the 32-bit integer element in [63:32] of Rs1; at the same time, it subtracts the 32-bit integer element in [31:0] of Rs1 with the 32-bit integer element in [63:32] of Rs2. If any of the results are beyond the Q31 number range (-2^31 <= Q31 <= 2^31-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to [63:32] of Rd for subtraction and [31:0] of Rd for addition.

Operations:

res[1] = Rs1.W[1] + Rs2.W[0];
res[0] = Rs1.W[0] - Rs2.W[1];
if (res[x] > (2^31)-1) {
  res[x] = (2^31)-1;
  OV = 1;
} else if (res < -2^31) {
  res[x] = -2^31;
  OV = 1;
}
Rd.W[1] = res[1];
Rd.W[0] = res[0];

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DCRSA32 (unsigned long long a, unsigned long long b)

DCRSA32 (32-bit Cross Subtraction & Addition)

Type: SIMD

Syntax:

DCRSA32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit integer element subtraction and 32-bit integer element addition in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.

Description

:

This instruction adds the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and writes the result to [63:32] of Rd; at the same time, it subtracts the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.

Operations:

res[1] = Rs1.W[1] - Rs2.W[0];
res[0] = Rs1.W[0] + Rs2.W[1];

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DCRAS32 (unsigned long long a, unsigned long long b)

DCRAS32 (32-bit Cross Addition & Subtraction)

Type: SIMD

Syntax:

DCRAS32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit integer element addition and 32-bit integer element subtraction in a 64-bit chunk simultaneously. Operands are from crossed 32-bit elements.

Description

:

This instruction subtracts the 32-bit integer element in [63:32] of Rs1 with the 32-bit integer element in [31:0] of Rs2, and writes the result to [63:32] of Rd; at the same time, it adds the 32-bit integer element in [63:32] of Rs2 from the 32-bit integer element in [31:0] of Rs1, and writes the result to [31:0] of Rd.

Operations:

res[1] = Rs1.W[1] - Rs2.W[0];
res[0] = Rs1.W[0] + Rs2.W[1];

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKSTSA16 (unsigned long long a, unsigned long long b)

DKSTSA16 (16-bit Signed Saturating Straight Subtraction & Addition)

Type: SIMD

Syntax:

DKSTSA16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 16-bit signed integer element saturating subtraction and 16-bit signed integer element saturating addition in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.

Description

:

This instruction subtracts the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2 from the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1; at the same time, it adds the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 with the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd for addition.

Operations:

res1 = Rs1.W[x][31:16] - Rs2.W[x][31:16];
res2 = Rs1.W[x][15:0] + Rs2.W[x][15:0];
for (res in [res1, res2]) {
  if (res > (2^15)-1) {
    res = (2^15)-1;
    OV = 1;
  } else if (res < -2^15) {
    res = -2^15;
    OV = 1;
  }
}
Rd.W[x][31:16] = res1;
Rd.W[x][15:0] = res2;
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DKSTAS16 (unsigned long long a, unsigned long long b)

DKSTAS16 (16-bit Signed Saturating Straight Addition & Subtraction)

Type: SIMD

Syntax:

DKSTAS16 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 16-bit signed integer element saturating addition and 16-bit signed integer element saturating subtraction in a 32-bit chunk simultaneously. Operands are from corresponding positions in 32-bit chunks.

Description

:

This instruction adds the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs1 with the 16-bit signed integer element in [31:16] of 32-bit chunks in Rs2; at the same time, it subtracts the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs2 from the 16-bit signed integer element in [15:0] of 32-bit chunks in Rs1. If any of the results are beyond the Q15 number range (-2^15 <= Q15 <= 2^15-1), they are saturated to the range and the OV bit is set to 1. The saturated results are written to [31:16] of 32-bit chunks in Rd for subtraction and [15:0] of 32-bit chunks in Rd for addition.

Operations:

res1 = Rs1.W[x][31:16] + Rs2.W[x][31:16];
res2 = Rs1.W[x][15:0] - Rs2.W[x][15:0];
for (res in [res1, res2]) {
  if (res > (2^15)-1) {
    res = (2^15)-1;
    OV = 1;
  } else if (res < -2^15) {
    res = -2^15;
    OV = 1;
  }
}
Rd.W[x][31:16] = res1;
Rd.W[x][15:0] = res2;
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DRSUB32 (unsigned long long a, unsigned long long b)

DRSUB32 (32-bit Signed Halving Subtraction)

Type: SIMD

Syntax:

DRSUB32 Rd, Rs1, Rs2
# Rd, Rs1, Rs2 are all even/odd pair of registers

Purpose

:

Do 32-bit signed integer element subtractions simultaneously. The results are halved to avoid overflow or saturation.

Description

:

This instruction subtracts the 32-bit signed integer elements in Rs2 from the 32-bit signed integer elements in Rs1. The results are first arithmetically right-shifted by 1 bit and then written to Rd.

Operations:

Rd.W[x] = (Rs1.W[x] - Rs2.W[x]) s>> 1;
x=1...0

Parameters:
  • a[in] unsigned long long type of value stored in a

  • b[in] unsigned long long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DPACK32 (signed long a, signed long b)

DPACK32 (SIMD Pack Two 32-bit Data To 64-bit)

Type: SIMD

Syntax:

DPACK32 Rd, Rs1, Rs2
# Rd is even/odd pair of register

Purpose

:

Pack two 32-bit datas which from two registers into a 64-bit data.

Description

:

This instruction moves 32-bit Rs1 to Rd.W[1] and moves 32-bit Rs2 to Rd.W[0].

Operations:

Rd = CONCAT(Rs1.W , Rs2.W);

Parameters:
  • a[in] signed long type of value stored in a

  • b[in] signed long type of value stored in b

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD810 (unsigned long long a)

DSUNPKD810 (Signed Unpacking Bytes 1 & 0)

Type: SIMD

Syntax:

DSUNPKD810 Rd, Rs1
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Unpack byte 1 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.

Description

:

For the

DSUNPKD810 instruction, it unpacks byte 1 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit chunks in Rd.

Operations:

Rd.W[m].H[1] = SE16(Rs1.W[m].B[1])
Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])

Parameters:

a[in] unsigned long long type of value stored in a

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD820 (unsigned long long a)

DSUNPKD820 (Signed Unpacking Bytes 2 & 0)

Type: SIMD

Syntax:

DSUNPKD820 Rd, Rs1
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Unpack byte 2 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.

Description

:

For the

DSUNPKD820 instruction, it unpacks byte 2 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit chunks in Rd.

Operations:

Rd.W[m].H[1] = SE16(Rs1.W[m].B[2])
Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])

Parameters:

a[in] unsigned long long type of value stored in a

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD830 (unsigned long long a)

DSUNPKD830 (Signed Unpacking Bytes 3 & 0)

Type: SIMD

Syntax:

DSUNPKD830 Rd, Rs1
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Unpack byte 3 and byte 0 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.

Description

:

For the

DSUNPKD830 instruction, it unpacks byte 3 and byte 0 of 32-bit chunks in Rs1 into two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit chunks in Rd.

Operations:

Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])

Parameters:

a[in] unsigned long long type of value stored in a

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD831 (unsigned long long a)

DSUNPKD831 (Signed Unpacking Bytes 3 & 1)

Type: SIMD

Syntax:

DSUNPKD831 Rd, Rs1
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Unpack byte 3 and byte 1 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.

Description

:

For the

DSUNPKD831 instruction, it unpacks byte 3 and byte 1 of 32-bit chunks in Rs1 into two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit chunks in Rd.

Operations:

Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
Rd.W[m].H[0] = SE16(Rs1.W[m].B[1])

Parameters:

a[in] unsigned long long type of value stored in a

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DSUNPKD832 (unsigned long long a)

DSUNPKD832 (Signed Unpacking Bytes 3 & 2)

Type: SIMD

Syntax:

DSUNPKD832 Rd, Rs1
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Unpack byte 3 and byte 2 of 32-bit chunks in a register into two 16-bit signed halfwords of 32-bit chunks in a register.

Description

:

For the

DSUNPKD832 instruction, it unpacks byte 3 and byte 2 of 32-bit chunks in Rs1 into two 16-bit signed halfwords and writes the results to the top part and the bottom part of 32-bit chunks in Rd.

Operations:

Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
Rd.W[m].H[0] = SE16(Rs1.W[m].B[2])

Parameters:

a[in] unsigned long long type of value stored in a

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD810 (unsigned long long a)

DZUNPKD810 (UnSigned Unpacking Bytes 1 & 0)

Type: SIMD

Syntax:

DZUNPKD810 Rd, Rs1
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Unpack byte 1 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.

Description

:

For the

DZUNPKD810 instruction, it unpacks byte 1 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit chunks in Rd.

Operations:

Rd.W[m].H[1] = SE16(Rs1.W[m].B[1])
Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])

Parameters:

a[in] unsigned long long type of value stored in a

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD820 (unsigned long long a)

DZUNPKD820 (UnSigned Unpacking Bytes 2 & 0)

Type: SIMD

Syntax:

DZUNPKD820 Rd, Rs1
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Unpack byte 2 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.

Description

:

For the

DZUNPKD820 instruction, it unpacks byte 2 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit chunks in Rd.

Operations:

Rd.W[m].H[1] = SE16(Rs1.W[m].B[2])
Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])

Parameters:

a[in] unsigned long long type of value stored in a

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD830 (unsigned long long a)

DZUNPKD830 (UnSigned Unpacking Bytes 3 & 0)

Type: SIMD

Syntax:

DZUNPKD830 Rd, Rs1
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Unpack byte 3 and byte 0 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.

Description

:

For the

DZUNPKD830 instruction, it unpacks byte 3 and byte 0 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit chunks in Rd.

Operations:

Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
Rd.W[m].H[0] = SE16(Rs1.W[m].B[0])

Parameters:

a[in] unsigned long long type of value stored in a

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD831 (unsigned long long a)

DZUNPKD831 (UnSigned Unpacking Bytes 3 & 1)

Type: SIMD

Syntax:

DZUNPKD831 Rd, Rs1
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Unpack byte 3 and byte 1 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.

Description

:

For the

DZUNPKD831 instruction, it unpacks byte 3 and byte 1 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit chunks in Rd.

Operations:

Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
Rd.W[m].H[0] = SE16(Rs1.W[m].B[1])

Parameters:

a[in] unsigned long long type of value stored in a

Returns:

value stored in unsigned long long type

__STATIC_FORCEINLINE unsigned long long __RV_DZUNPKD832 (unsigned long long a)

DZUNPKD832 (UnSigned Unpacking Bytes 3 & 2)

Type: SIMD

Syntax:

DZUNPKD832 Rd, Rs1
# Rd, Rs1 are all even/odd pair of registers

Purpose

:

Unpack byte 3 and byte 2 of 32-bit chunks in a register into two 16-bit unsigned halfwords of 32-bit chunks in a register.

Description

:

For the

DZUNPKD832 instruction, it unpacks byte 3 and byte 2 of 32-bit chunks in Rs1 into two 16-bit unsigned halfwords and writes the results to the top part and the bottom part of 32-bit chunks in Rd.

Operations:

Rd.W[m].H[1] = SE16(Rs1.W[m].B[3])
Rd.W[m].H[0] = SE16(Rs1.W[m].B[2])

Parameters:

a[in] unsigned long long type of value stored in a

Returns:

value stored in unsigned long long type