NMSIS-Core  Version 1.4.0
NMSIS-Core support for Nuclei processor-based devices
nmsis_bench.h
1 /*
2  * Copyright (c) 2019 Nuclei Limited. All rights reserved.
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Licensed under the Apache License, Version 2.0 (the License); you may
7  * not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 #ifndef __NMSIS_BENCH__
20 #define __NMSIS_BENCH__
21 
27 #ifdef __cplusplus
28  extern "C" {
29 #endif
30 
31 #include "core_feature_base.h"
32 #include <stdio.h>
33 
34 #ifdef BENCH_XLEN_MODE
35 typedef unsigned long Bench_Type;
36 #else
37 typedef uint64_t Bench_Type;
38 #endif
39 
74 {
75 #ifdef __riscv_vector
77 #endif
79 }
80 
81 #ifndef READ_CYCLE
90 #ifdef BENCH_XLEN_MODE
92 #define READ_CYCLE __read_cycle_csr
93 #else
95 #define READ_CYCLE __get_rv_cycle
96 #endif /* #ifdef BENCH_XLEN_MODE */
97 #endif /* #ifndef READ_CYCLE */
98 
99 #ifndef DISABLE_NMSIS_BENCH
100 
102 #define BENCH_DECLARE_VAR() static volatile Bench_Type _bc_sttcyc, _bc_endcyc, _bc_usecyc, _bc_sumcyc; \
103  static volatile unsigned long _bc_lpcnt, _bc_ercd;
104 
106 #define BENCH_INIT() printf("Benchmark initialized\n"); \
107  __prepare_bench_env(); \
108  _bc_ercd = 0; _bc_sumcyc = 0;
109 
111 #define BENCH_RESET(proc) _bc_sumcyc = 0; _bc_usecyc = 0; _bc_lpcnt = 0; _bc_ercd = 0;
112 
114 #define BENCH_START(proc) _bc_ercd = 0; \
115  _bc_sttcyc = READ_CYCLE();
116 
118 #define BENCH_SAMPLE(proc) _bc_endcyc = READ_CYCLE(); \
119  _bc_usecyc = _bc_endcyc - _bc_sttcyc; \
120  _bc_sumcyc += _bc_usecyc; _bc_lpcnt += 1;
121 
123 #define BENCH_END(proc) BENCH_SAMPLE(proc); \
124  printf("CSV, %s, %lu\n", #proc, (unsigned long)_bc_usecyc);
125 
127 #define BENCH_STOP(proc) printf("CSV, %s, %lu\n", #proc, (unsigned long)_bc_sumcyc);
128 
130 #define BENCH_STAT(proc) printf("STAT, %s, %lu, %lu\n", #proc, (unsigned long)_bc_lpcnt, (unsigned long)_bc_sumcyc);
131 
133 #define BENCH_GET_USECYC() (_bc_usecyc)
134 
136 #define BENCH_GET_SUMCYC() (_bc_sumcyc)
137 
139 #define BENCH_GET_LPCNT() (_bc_lpcnt)
140 
142 #define BENCH_ERROR(proc) _bc_ercd = 1;
144 #define BENCH_STATUS(proc) if (_bc_ercd) { \
145  printf("ERROR, %s\n", #proc); \
146  } else { \
147  printf("SUCCESS, %s\n", #proc); \
148  }
149 #else
150 #define BENCH_DECLARE_VAR() static volatile unsigned long _bc_ercd, _bc_lpcnt;
151 #define BENCH_INIT() _bc_ercd = 0; __prepare_bench_env();
152 #define BENCH_RESET(proc)
153 #define BENCH_START(proc) _bc_ercd = 0;
154 #define BENCH_SAMPLE(proc) _bc_lpcnt += 1;
155 #define BENCH_END(proc)
156 #define BENCH_STOP(proc)
157 #define BENCH_STAT(proc)
158 #define BENCH_GET_USECYC() (0)
159 #define BENCH_GET_SUMCYC() (0)
160 #define BENCH_GET_LPCNT() (_bc_lpcnt)
161 #define BENCH_ERROR(proc) _bc_ercd = 1;
162 #define BENCH_STATUS(proc) if (_bc_ercd) { \
163  printf("ERROR, %s\n", #proc); \
164  } else { \
165  printf("SUCCESS, %s\n", #proc); \
166  }
167 
168 #endif
169 
170 // High performance monitor bench helpers
171 #if defined(__HPM_PRESENT) && (__HPM_PRESENT == 1) && (!defined(DISABLE_NMSIS_HPM))
172 
173 /* Events type select */
174 #define EVENT_SEL_INSTRUCTION_COMMIT 0
175 #define EVENT_SEL_MEMORY_ACCESS 1
176 #define EVENT_SEL_TYPE_0 0
177 #define EVENT_SEL_TYPE_1 1
178 /* The following event type 2 and 3 are introduced in PMU v2 */
179 #define EVENT_SEL_TYPE_2 2
180 #define EVENT_SEL_TYPE_3 3
181 
182 /* Instruction commit events idx macros */
183 #define EVENT_INSTRUCTION_COMMIT_CYCLE_COUNT 1
184 #define EVENT_INSTRUCTION_COMMIT_RETIRED_COUNT 2
185 /* Integer load instruction (includes LR) */
186 #define EVENT_INSTRUCTION_COMMIT_INTEGER_LOAD 3
187 /* Integer store instruction (includes SC) */
188 #define EVENT_INSTRUCTION_COMMIT_INTEGER_STORE 4
189 /* Atomic memory operation (do not include LR and SC) */
190 #define EVENT_INSTRUCTION_COMMIT_ATOMIC_MEMORY_OPERATION 5
191 /* System instruction */
192 #define EVENT_INSTRUCTION_COMMIT_SYSTEM 6
193 /* Integer computational instruction (excluding multiplication/division/remainder) */
194 #define EVENT_INSTRUCTION_COMMIT_INTEGER_COMPUTATIONAL 7
195 #define EVENT_INSTRUCTION_COMMIT_CONDITIONAL_BRANCH 8
196 #define EVENT_INSTRUCTION_COMMIT_TAKEN_CONDITIONAL_BRANCH 9
197 #define EVENT_INSTRUCTION_COMMIT_JAL 10
198 #define EVENT_INSTRUCTION_COMMIT_JALR 11
199 #define EVENT_INSTRUCTION_COMMIT_RETURN 12
200 /* Control transfer instruction (CBR+JAL+JALR) */
201 #define EVENT_INSTRUCTION_COMMIT_CONTROL_TRANSFER 13
202 /* 14 fence instruction(Not include fence.i) */
203 #define EVENT_INSTRUCTION_COMMIT_FENCE_INSTRUCTION 14
204 #define EVENT_INSTRUCTION_COMMIT_INTEGER_MULTIPLICATION 15
205 /* Integer division/remainder instruction */
206 #define EVENT_INSTRUCTION_COMMIT_INTEGER_DIVISION_REMAINDER 16
207 #define EVENT_INSTRUCTION_COMMIT_FLOATING_POINT_LOAD 17
208 #define EVENT_INSTRUCTION_COMMIT_FLOATING_POINT_STORE 18
209 /* Floating-point addition/subtraction */
210 #define EVENT_INSTRUCTION_COMMIT_FLOATING_POINT_ADDITION_SUBTRACTION 19
211 #define EVENT_INSTRUCTION_COMMIT_FLOATING_POINT_MULTIPLICATION 20
212 /* Floating-point fused multiply-add (FMADD, FMSUB, FNMSUB, FNMADD) */
213 #define EVENT_INSTRUCTION_COMMIT_FLOATING_POINT_FUSED_MULTIPLY_ADD_SUB 21
214 #define EVENT_INSTRUCTION_COMMIT_FLOATING_POINT_DIVISION_OR_SQUARE_ROOT 22
215 #define EVENT_INSTRUCTION_COMMIT_OTHER_FLOATING_POINT_INSTRUCTION 23
216 #define EVENT_INSTRUCTION_COMMIT_CONDITIONAL_BRANCH_PREDICTION_FAIL 24
217 /* JAL_PREDICTION_FAIL never existed, it is wrong documented, JALR_PREDICTION_FAIL should be 25 not 26 */
218 #define EVENT_INSTRUCTION_COMMIT_JALR_PREDICTION_FAIL 25
219 #define EVENT_INSTRUCTION_COMMIT_POP_PREDICTION_FAIL 26
220 #define EVENT_INSTRUCTION_COMMIT_FENCEI_INSTRUCTION 27
221 #define EVENT_INSTRUCTION_COMMIT_SFENCE_INSTRUCTION 28
222 #define EVENT_INSTRUCTION_COMMIT_ECALL_INSTRUCTION 29
223 #define EVENT_INSTRUCTION_COMMIT_EXCEPTION_INSTRUCTION 30
224 #define EVENT_INSTRUCTION_COMMIT_INTERRUPT_INSTRUCTION 31
225 
226 /* Memory access events idx macros */
227 #define EVENT_MEMORY_ACCESS_ICACHE_MISS 1
228 #define EVENT_MEMORY_ACCESS_DCACHE_MISS 2
229 #define EVENT_MEMORY_ACCESS_ITLB_MISS 3
230 #define EVENT_MEMORY_ACCESS_DTLB_MISS 4
231 #define EVENT_MEMORY_ACCESS_MAIN_DTLB_MISS 5
232 #define EVENT_MEMORY_ACCESS_MAIN_TLB_MISS 5
233 /* The following events are introduced in PMU v2 */
234 #define EVENT_MEMORY_ACCESS_L2_CACHE_ACCESS 8
235 #define EVENT_MEMORY_ACCESS_L2_CACHE_MISS 9
236 /* For Single Core, the Core memory bus read/write request count
237  * For SMP Core, the cluster memory bus read/write/prefetch request count
238  * is initiated by current Core */
239 #define EVENT_MEMORY_ACCESS_MEMORY_BUS_REQUEST 10
240 #define EVENT_MEMORY_ACCESS_IFU_STALL_CYCLE 11
241 #define EVENT_MEMORY_ACCESS_EXU_STALL_CYCLE 12
242 #define EVENT_MEMORY_ACCESS_TIMER 13
243 
244 /*
245  * Here are new event types macro naming for PMU v1 and v2.
246  * Since the event type can be no longer summary into a group naming,
247  * so we just use the event type id such as TYPE_0, TYPE_1, TYPE_2, TYPE_3
248  */
249 /* Events Type 0 (event sel == 0) event name macros */
250 #define EVENT_TYPE_0_CYCLE_COUNT 1
251 #define EVENT_TYPE_0_RETIRED_COUNT 2
252 #define EVENT_TYPE_0_INTEGER_LOAD 3
253 #define EVENT_TYPE_0_INTEGER_STORE 4
254 #define EVENT_TYPE_0_ATOMIC_MEMORY_OPERATION 5
255 #define EVENT_TYPE_0_SYSTEM 6
256 #define EVENT_TYPE_0_INTEGER_COMPUTATIONAL 7
257 #define EVENT_TYPE_0_CONDITIONAL_BRANCH 8
258 #define EVENT_TYPE_0_TAKEN_CONDITIONAL_BRANCH 9
259 #define EVENT_TYPE_0_JAL 10
260 #define EVENT_TYPE_0_JALR 11
261 #define EVENT_TYPE_0_RETURN 12
262 #define EVENT_TYPE_0_CONTROL_TRANSFER 13
263 #define EVENT_TYPE_0_FENCE_INSTRUCTION 14
264 #define EVENT_TYPE_0_INTEGER_MULTIPLICATION 15
265 #define EVENT_TYPE_0_INTEGER_DIVISION_REMAINDER 16
266 #define EVENT_TYPE_0_FLOATING_POINT_LOAD 17
267 #define EVENT_TYPE_0_FLOATING_POINT_STORE 18
268 #define EVENT_TYPE_0_FLOATING_POINT_ADDITION_SUBTRACTION 19
269 #define EVENT_TYPE_0_FLOATING_POINT_MULTIPLICATION 20
270 #define EVENT_TYPE_0_FLOATING_POINT_FUSED_MULTIPLY_ADD_SUB 21
271 #define EVENT_TYPE_0_FLOATING_POINT_DIVISION_OR_SQUARE_ROOT 22
272 #define EVENT_TYPE_0_OTHER_FLOATING_POINT_INSTRUCTION 23
273 #define EVENT_TYPE_0_CONDITIONAL_BRANCH_PREDICTION_FAIL 24
274 #define EVENT_TYPE_0_JALR_PREDICTION_FAIL 25
275 #define EVENT_TYPE_0_POP_PREDICTION_FAIL 26
276 #define EVENT_TYPE_0_FENCEI_INSTRUCTION 27
277 #define EVENT_TYPE_0_SFENCE_INSTRUCTION 28
278 #define EVENT_TYPE_0_ECALL_INSTRUCTION 29
279 #define EVENT_TYPE_0_EXCEPTION_INSTRUCTION 30
280 #define EVENT_TYPE_0_INTERRUPT_INSTRUCTION 31
281 
282 /* Events Type 1 (event sel == 1) event name macros */
283 #define EVENT_TYPE_1_ICACHE_READ_MISS 1
284 #define EVENT_TYPE_1_DCACHE_RW_MISS 2
285 #define EVENT_TYPE_1_ITLB_READ_MISS 3
286 #define EVENT_TYPE_1_DTLB_RW_MISS 4
287 #define EVENT_TYPE_1_MAIN_TLB_MISS 5
288 #define EVENT_TYPE_1_L2_CACHE_ACCESS 8
289 #define EVENT_TYPE_1_L2_CACHE_MISS 9
290 #define EVENT_TYPE_1_MEMORY_BUS_REQUEST 10
291 #define EVENT_TYPE_1_IFU_STALL_CYCLE 11
292 #define EVENT_TYPE_1_EXU_STALL_CYCLE 12
293 #define EVENT_TYPE_1_TIMER 13
294 
295 /* Events Type 2 (event sel == 2) event name macros */
296 #define EVENT_TYPE_2_BRANCH_INSTRUCTION_COMMIT 2
297 #define EVENT_TYPE_2_BRANCH_PREDICT_FAIL_COMMIT 3
298 
299 /* Events Type 3 (event sel == 3) event name macros */
300 #define EVENT_TYPE_3_DCACHE_READ 0
301 #define EVENT_TYPE_3_DCACHE_READ_MISS 1
302 #define EVENT_TYPE_3_DCACHE_WRITE 2
303 #define EVENT_TYPE_3_DCACHE_WRITE_MISS 3
304 #define EVENT_TYPE_3_DCACHE_PREFETCH 4
305 #define EVENT_TYPE_3_DCACHE_PREFETCH_MISS 5
306 #define EVENT_TYPE_3_ICACHE_READ 6
307 #define EVENT_TYPE_3_ICACHE_PREFETCH 8
308 #define EVENT_TYPE_3_ICACHE_PREFETCH_MISS 9
309 #define EVENT_TYPE_3_L2_CACHE_READ_HIT 10
310 #define EVENT_TYPE_3_L2_CACHE_READ_MISS 11
311 #define EVENT_TYPE_3_L2_CACHE_WRITE_HIT 12
312 #define EVENT_TYPE_3_L2_CACHE_WRITE_MISS 13
313 #define EVENT_TYPE_3_L2_CACHE_PREFETCH_HIT 14
314 #define EVENT_TYPE_3_L2_CACHE_PREFETCH_MISS 15
315 #define EVENT_TYPE_3_DTLB_READ 16
316 #define EVENT_TYPE_3_DTLB_READ_MISS 17
317 #define EVENT_TYPE_3_DTLB_WRITE 18
318 #define EVENT_TYPE_3_DTLB_WRITE_MISS 19
319 #define EVENT_TYPE_3_ITLB_READ 20
320 #define EVENT_TYPE_3_BTB_READ 22
321 #define EVENT_TYPE_3_BTB_READ_MISS 23
322 #define EVENT_TYPE_3_BTB_WRITE 24
323 #define EVENT_TYPE_3_BTB_WRITE_MISS 25
324 
325 /* Enable the corresponding performance monitor counter increment for events in Machine/Supervisor/User Mode */
326 #define MSU_EVENT_ENABLE 0x0F
327 #define MEVENT_EN 0x08
328 #define SEVENT_EN 0x02
329 #define UEVENT_EN 0x01
330 
331 #ifdef BENCH_XLEN_MODE
335 #define READ_HPM_COUNTER __read_hpm_counter
336 #else
337 #define READ_HPM_COUNTER __get_hpm_counter
338 #endif /* #ifdef BENCH_XLEN_MODE */
339 
341 #define HPM_DECLARE_VAR(idx) static volatile Bench_Type __hpm_sttcyc##idx, __hpm_endcyc##idx, __hpm_usecyc##idx, __hpm_sumcyc##idx; \
342  static volatile unsigned long __hpm_lpcnt##idx, __hpm_val##idx;
343 
344 #define HPM_SEL_ENABLE(ena) (ena << 28)
345 #define HPM_SEL_EVENT(sel, idx) ((sel) | (idx << 4))
346 
348 #define HPM_EVENT(sel, idx, ena) (HPM_SEL_ENABLE(ena) | HPM_SEL_EVENT(sel, idx))
349 
351 #define HPM_INIT() printf("High performance monitor initialized\n"); \
352  __prepare_bench_env();
353 
355 #define HPM_RESET(idx, proc, event) __hpm_sumcyc##idx = 0; __hpm_lpcnt##idx = 0;
356 
358 #define HPM_START(idx, proc, event) \
359  __hpm_val##idx = (event); \
360  __set_hpm_event(idx, __hpm_val##idx); \
361  __set_hpm_counter(idx, 0); \
362  __hpm_sttcyc##idx = READ_HPM_COUNTER(idx);
363 
365 #define HPM_SAMPLE(idx, proc, event) \
366  __hpm_endcyc##idx = READ_HPM_COUNTER(idx); \
367  __hpm_usecyc##idx = __hpm_endcyc##idx - __hpm_sttcyc##idx; \
368  __hpm_sumcyc##idx += __hpm_usecyc##idx; \
369  __hpm_lpcnt##idx += 1;
370 
372 #define HPM_END(idx, proc, event) \
373  HPM_SAMPLE(idx, proc, event); \
374  printf("HPM%d:0x%x, %s, %lu\n", idx, event, #proc, (unsigned long)__hpm_usecyc##idx);
375 
377 #define HPM_STOP(idx, proc, event) \
378  printf("HPM%d:0x%x, %s, %lu\n", idx, event, #proc, (unsigned long)__hpm_sumcyc##idx);
379 
381 #define HPM_STAT(idx, proc, event) \
382  printf("STATHPM%d:0x%x, %s, %lu, %lu\n", idx, event, #proc, (unsigned long)__hpm_lpcnt##idx, (unsigned long)__hpm_sumcyc##idx);
383 
385 #define HPM_GET_USECYC(idx) (__hpm_usecyc##idx)
386 
388 #define HPM_GET_SUMCYC(idx) (__hpm_sumcyc##idx)
389 
391 #define HPM_GET_LPCNT(idx) (__hpm_lpcnt##idx)
392 
393 #else
394 #define HPM_DECLARE_VAR(idx)
395 #define HPM_EVENT(sel, idx, ena)
396 #define HPM_INIT()
397 #define HPM_RESET(idx, proc, event)
398 #define HPM_START(idx, proc, event)
399 #define HPM_SAMPLE(idx, proc, event)
400 #define HPM_END(idx, proc, event)
401 #define HPM_STOP(idx, proc, event)
402 #define HPM_STAT(idx, proc, event)
403 #define HPM_GET_USECYC(idx) (0)
404 #define HPM_GET_SUMCYC(idx) (0)
405 #define HPM_GET_LPCNT(idx) (1)
406 #endif
407 
408 // NMSIS Helpers
409 #ifndef DISABLE_NMSIS_HELPER
411 #define NMSIS_TEST_PASS() printf("\nNMSIS_TEST_PASS\n");
413 #define NMSIS_TEST_FAIL() printf("\nNMSIS_TEST_FAIL\n");
414 #else
415 #define NMSIS_TEST_PASS()
416 #define NMSIS_TEST_FAIL()
417 #endif
418  /* End of Doxygen Group NMSIS_Core_Bench_Helpers */
420 #ifdef __cplusplus
421 }
422 #endif
423 #endif /* __NMSIS_BENCH__ */
__STATIC_FORCEINLINE void __prepare_bench_env(void)
Prepare benchmark environment.
Definition: nmsis_bench.h:73
__STATIC_FORCEINLINE void __enable_all_counter(void)
Enable all MCYCLE & MINSTRET & MHPMCOUNTER counter.
#define MSTATUS_VS
#define __RV_CSR_SET(csr, val)
CSR operation Macro for csrs instruction.
#define CSR_MSTATUS
#define __STATIC_FORCEINLINE
Define a static function that should be always inlined by the compiler.
Definition: nmsis_gcc.h:70