NMSIS-Core  Version 1.5.0
NMSIS-Core support for Nuclei processor-based devices
nmsis_bench.h
1 /*
2  * Copyright (c) 2019 Nuclei Limited. All rights reserved.
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  *
6  * Licensed under the Apache License, Version 2.0 (the License); you may
7  * not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 #ifndef __NMSIS_BENCH__
20 #define __NMSIS_BENCH__
21 
27 #ifdef __cplusplus
28  extern "C" {
29 #endif
30 
31 #include "core_feature_base.h"
32 #include <stdio.h>
33 
34 #ifdef BENCH_XLEN_MODE
35 typedef unsigned long Bench_Type;
36 #else
37 #if defined(CPU_SERIES) && CPU_SERIES == 100
38 typedef uint32_t Bench_Type;
39 #else
40 typedef uint64_t Bench_Type;
41 #endif
42 #endif
43 
78 {
79 #ifdef __riscv_vector
81 #endif
83 }
84 
85 #ifndef READ_CYCLE
94 #ifdef BENCH_XLEN_MODE
96 #define READ_CYCLE __read_cycle_csr
97 #else
99 #define READ_CYCLE __get_rv_cycle
100 #endif /* #ifdef BENCH_XLEN_MODE */
101 #endif /* #ifndef READ_CYCLE */
102 
103 #ifndef DISABLE_NMSIS_BENCH
104 
106 #define BENCH_DECLARE_VAR() static volatile Bench_Type _bc_sttcyc, _bc_endcyc, _bc_usecyc, _bc_sumcyc; \
107  static volatile unsigned long _bc_lpcnt, _bc_ercd;
108 
110 #define BENCH_INIT() printf("Benchmark initialized\n"); \
111  __prepare_bench_env(); \
112  _bc_ercd = 0; _bc_sumcyc = 0;
113 
115 #define BENCH_RESET(proc) _bc_sumcyc = 0; _bc_usecyc = 0; _bc_lpcnt = 0; _bc_ercd = 0;
116 
118 #define BENCH_START(proc) _bc_ercd = 0; \
119  _bc_sttcyc = READ_CYCLE();
120 
122 #define BENCH_SAMPLE(proc) _bc_endcyc = READ_CYCLE(); \
123  _bc_usecyc = _bc_endcyc - _bc_sttcyc; \
124  _bc_sumcyc += _bc_usecyc; _bc_lpcnt += 1;
125 
127 #define BENCH_END(proc) BENCH_SAMPLE(proc); \
128  printf("CSV, %s, %lu\n", #proc, (unsigned long)_bc_usecyc);
129 
131 #define BENCH_STOP(proc) printf("CSV, %s, %lu\n", #proc, (unsigned long)_bc_sumcyc);
132 
134 #define BENCH_STAT(proc) printf("STAT, %s, %lu, %lu\n", #proc, (unsigned long)_bc_lpcnt, (unsigned long)_bc_sumcyc);
135 
137 #define BENCH_GET_USECYC() (_bc_usecyc)
138 
140 #define BENCH_GET_SUMCYC() (_bc_sumcyc)
141 
143 #define BENCH_GET_LPCNT() (_bc_lpcnt)
144 
146 #define BENCH_ERROR(proc) _bc_ercd = 1;
148 #define BENCH_STATUS(proc) if (_bc_ercd) { \
149  printf("ERROR, %s\n", #proc); \
150  } else { \
151  printf("SUCCESS, %s\n", #proc); \
152  }
153 #else
154 #define BENCH_DECLARE_VAR() static volatile unsigned long _bc_ercd, _bc_lpcnt;
155 #define BENCH_INIT() _bc_ercd = 0; __prepare_bench_env();
156 #define BENCH_RESET(proc)
157 #define BENCH_START(proc) _bc_ercd = 0;
158 #define BENCH_SAMPLE(proc) _bc_lpcnt += 1;
159 #define BENCH_END(proc)
160 #define BENCH_STOP(proc)
161 #define BENCH_STAT(proc)
162 #define BENCH_GET_USECYC() (0)
163 #define BENCH_GET_SUMCYC() (0)
164 #define BENCH_GET_LPCNT() (_bc_lpcnt)
165 #define BENCH_ERROR(proc) _bc_ercd = 1;
166 #define BENCH_STATUS(proc) if (_bc_ercd) { \
167  printf("ERROR, %s\n", #proc); \
168  } else { \
169  printf("SUCCESS, %s\n", #proc); \
170  }
171 
172 #endif
173 
174 // High performance monitor bench helpers
175 #if defined(__HPM_PRESENT) && (__HPM_PRESENT == 1) && (!defined(DISABLE_NMSIS_HPM))
176 
177 /* Events type select */
178 #define EVENT_SEL_INSTRUCTION_COMMIT 0
179 #define EVENT_SEL_MEMORY_ACCESS 1
180 #define EVENT_SEL_TYPE_0 0
181 #define EVENT_SEL_TYPE_1 1
182 /* The following event type 2 and 3 are introduced in PMU v2 */
183 #define EVENT_SEL_TYPE_2 2
184 #define EVENT_SEL_TYPE_3 3
185 
186 /* Instruction commit events idx macros */
187 #define EVENT_INSTRUCTION_COMMIT_CYCLE_COUNT 1
188 #define EVENT_INSTRUCTION_COMMIT_RETIRED_COUNT 2
189 /* Integer load instruction (includes LR) */
190 #define EVENT_INSTRUCTION_COMMIT_INTEGER_LOAD 3
191 /* Integer store instruction (includes SC) */
192 #define EVENT_INSTRUCTION_COMMIT_INTEGER_STORE 4
193 /* Atomic memory operation (do not include LR and SC) */
194 #define EVENT_INSTRUCTION_COMMIT_ATOMIC_MEMORY_OPERATION 5
195 /* System instruction */
196 #define EVENT_INSTRUCTION_COMMIT_SYSTEM 6
197 /* Integer computational instruction (excluding multiplication/division/remainder) */
198 #define EVENT_INSTRUCTION_COMMIT_INTEGER_COMPUTATIONAL 7
199 #define EVENT_INSTRUCTION_COMMIT_CONDITIONAL_BRANCH 8
200 #define EVENT_INSTRUCTION_COMMIT_TAKEN_CONDITIONAL_BRANCH 9
201 #define EVENT_INSTRUCTION_COMMIT_JAL 10
202 #define EVENT_INSTRUCTION_COMMIT_JALR 11
203 #define EVENT_INSTRUCTION_COMMIT_RETURN 12
204 /* Control transfer instruction (CBR+JAL+JALR) */
205 #define EVENT_INSTRUCTION_COMMIT_CONTROL_TRANSFER 13
206 /* 14 fence instruction(Not include fence.i) */
207 #define EVENT_INSTRUCTION_COMMIT_FENCE_INSTRUCTION 14
208 #define EVENT_INSTRUCTION_COMMIT_INTEGER_MULTIPLICATION 15
209 /* Integer division/remainder instruction */
210 #define EVENT_INSTRUCTION_COMMIT_INTEGER_DIVISION_REMAINDER 16
211 #define EVENT_INSTRUCTION_COMMIT_FLOATING_POINT_LOAD 17
212 #define EVENT_INSTRUCTION_COMMIT_FLOATING_POINT_STORE 18
213 /* Floating-point addition/subtraction */
214 #define EVENT_INSTRUCTION_COMMIT_FLOATING_POINT_ADDITION_SUBTRACTION 19
215 #define EVENT_INSTRUCTION_COMMIT_FLOATING_POINT_MULTIPLICATION 20
216 /* Floating-point fused multiply-add (FMADD, FMSUB, FNMSUB, FNMADD) */
217 #define EVENT_INSTRUCTION_COMMIT_FLOATING_POINT_FUSED_MULTIPLY_ADD_SUB 21
218 #define EVENT_INSTRUCTION_COMMIT_FLOATING_POINT_DIVISION_OR_SQUARE_ROOT 22
219 #define EVENT_INSTRUCTION_COMMIT_OTHER_FLOATING_POINT_INSTRUCTION 23
220 #define EVENT_INSTRUCTION_COMMIT_CONDITIONAL_BRANCH_PREDICTION_FAIL 24
221 /* JAL_PREDICTION_FAIL never existed, it is wrong documented, JALR_PREDICTION_FAIL should be 25 not 26 */
222 #define EVENT_INSTRUCTION_COMMIT_JALR_PREDICTION_FAIL 25
223 #define EVENT_INSTRUCTION_COMMIT_POP_PREDICTION_FAIL 26
224 #define EVENT_INSTRUCTION_COMMIT_FENCEI_INSTRUCTION 27
225 #define EVENT_INSTRUCTION_COMMIT_SFENCE_INSTRUCTION 28
226 #define EVENT_INSTRUCTION_COMMIT_ECALL_INSTRUCTION 29
227 #define EVENT_INSTRUCTION_COMMIT_EXCEPTION_INSTRUCTION 30
228 #define EVENT_INSTRUCTION_COMMIT_INTERRUPT_INSTRUCTION 31
229 
230 /* Memory access events idx macros */
231 #define EVENT_MEMORY_ACCESS_ICACHE_MISS 1
232 #define EVENT_MEMORY_ACCESS_DCACHE_MISS 2
233 #define EVENT_MEMORY_ACCESS_ITLB_MISS 3
234 #define EVENT_MEMORY_ACCESS_DTLB_MISS 4
235 #define EVENT_MEMORY_ACCESS_MAIN_DTLB_MISS 5
236 #define EVENT_MEMORY_ACCESS_MAIN_TLB_MISS 5
237 /* The following events are introduced in PMU v2 */
238 #define EVENT_MEMORY_ACCESS_L2_CACHE_ACCESS 8
239 #define EVENT_MEMORY_ACCESS_L2_CACHE_MISS 9
240 /* For Single Core, the Core memory bus read/write request count
241  * For SMP Core, the cluster memory bus read/write/prefetch request count
242  * is initiated by current Core */
243 #define EVENT_MEMORY_ACCESS_MEMORY_BUS_REQUEST 10
244 #define EVENT_MEMORY_ACCESS_IFU_STALL_CYCLE 11
245 #define EVENT_MEMORY_ACCESS_EXU_STALL_CYCLE 12
246 #define EVENT_MEMORY_ACCESS_TIMER 13
247 
248 /*
249  * Here are new event types macro naming for PMU v1 and v2.
250  * Since the event type can be no longer summary into a group naming,
251  * so we just use the event type id such as TYPE_0, TYPE_1, TYPE_2, TYPE_3
252  */
253 /* Events Type 0 (event sel == 0) event name macros */
254 #define EVENT_TYPE_0_CYCLE_COUNT 1
255 #define EVENT_TYPE_0_RETIRED_COUNT 2
256 #define EVENT_TYPE_0_INTEGER_LOAD 3
257 #define EVENT_TYPE_0_INTEGER_STORE 4
258 #define EVENT_TYPE_0_ATOMIC_MEMORY_OPERATION 5
259 #define EVENT_TYPE_0_SYSTEM 6
260 #define EVENT_TYPE_0_INTEGER_COMPUTATIONAL 7
261 #define EVENT_TYPE_0_CONDITIONAL_BRANCH 8
262 #define EVENT_TYPE_0_TAKEN_CONDITIONAL_BRANCH 9
263 #define EVENT_TYPE_0_JAL 10
264 #define EVENT_TYPE_0_JALR 11
265 #define EVENT_TYPE_0_RETURN 12
266 #define EVENT_TYPE_0_CONTROL_TRANSFER 13
267 #define EVENT_TYPE_0_FENCE_INSTRUCTION 14
268 #define EVENT_TYPE_0_INTEGER_MULTIPLICATION 15
269 #define EVENT_TYPE_0_INTEGER_DIVISION_REMAINDER 16
270 #define EVENT_TYPE_0_FLOATING_POINT_LOAD 17
271 #define EVENT_TYPE_0_FLOATING_POINT_STORE 18
272 #define EVENT_TYPE_0_FLOATING_POINT_ADDITION_SUBTRACTION 19
273 #define EVENT_TYPE_0_FLOATING_POINT_MULTIPLICATION 20
274 #define EVENT_TYPE_0_FLOATING_POINT_FUSED_MULTIPLY_ADD_SUB 21
275 #define EVENT_TYPE_0_FLOATING_POINT_DIVISION_OR_SQUARE_ROOT 22
276 #define EVENT_TYPE_0_OTHER_FLOATING_POINT_INSTRUCTION 23
277 #define EVENT_TYPE_0_CONDITIONAL_BRANCH_PREDICTION_FAIL 24
278 #define EVENT_TYPE_0_JALR_PREDICTION_FAIL 25
279 #define EVENT_TYPE_0_POP_PREDICTION_FAIL 26
280 #define EVENT_TYPE_0_FENCEI_INSTRUCTION 27
281 #define EVENT_TYPE_0_SFENCE_INSTRUCTION 28
282 #define EVENT_TYPE_0_ECALL_INSTRUCTION 29
283 #define EVENT_TYPE_0_EXCEPTION_INSTRUCTION 30
284 #define EVENT_TYPE_0_INTERRUPT_INSTRUCTION 31
285 
286 /* Events Type 1 (event sel == 1) event name macros */
287 #define EVENT_TYPE_1_ICACHE_READ_MISS 1
288 #define EVENT_TYPE_1_DCACHE_RW_MISS 2
289 #define EVENT_TYPE_1_ITLB_READ_MISS 3
290 #define EVENT_TYPE_1_DTLB_RW_MISS 4
291 #define EVENT_TYPE_1_MAIN_TLB_MISS 5
292 #define EVENT_TYPE_1_L2_CACHE_ACCESS 8
293 #define EVENT_TYPE_1_L2_CACHE_MISS 9
294 #define EVENT_TYPE_1_MEMORY_BUS_REQUEST 10
295 #define EVENT_TYPE_1_IFU_STALL_CYCLE 11
296 #define EVENT_TYPE_1_EXU_STALL_CYCLE 12
297 #define EVENT_TYPE_1_TIMER 13
298 
299 /* Events Type 2 (event sel == 2) event name macros */
300 #define EVENT_TYPE_2_BRANCH_INSTRUCTION_COMMIT 2
301 #define EVENT_TYPE_2_BRANCH_PREDICT_FAIL_COMMIT 3
302 
303 /* Events Type 3 (event sel == 3) event name macros */
304 #define EVENT_TYPE_3_DCACHE_READ 0
305 #define EVENT_TYPE_3_DCACHE_READ_MISS 1
306 #define EVENT_TYPE_3_DCACHE_WRITE 2
307 #define EVENT_TYPE_3_DCACHE_WRITE_MISS 3
308 #define EVENT_TYPE_3_DCACHE_PREFETCH 4
309 #define EVENT_TYPE_3_DCACHE_PREFETCH_MISS 5
310 #define EVENT_TYPE_3_ICACHE_READ 6
311 #define EVENT_TYPE_3_ICACHE_PREFETCH 8
312 #define EVENT_TYPE_3_ICACHE_PREFETCH_MISS 9
313 #define EVENT_TYPE_3_L2_CACHE_READ 10
314 #define EVENT_TYPE_3_L2_CACHE_READ_MISS 11
315 #define EVENT_TYPE_3_L2_CACHE_WRITE 12
316 #define EVENT_TYPE_3_L2_CACHE_WRITE_MISS 13
317 #define EVENT_TYPE_3_L2_CACHE_PREFETCH_HIT 14
318 #define EVENT_TYPE_3_L2_CACHE_PREFETCH_MISS 15
319 #define EVENT_TYPE_3_DTLB_READ 16
320 #define EVENT_TYPE_3_DTLB_READ_MISS 17
321 #define EVENT_TYPE_3_DTLB_WRITE 18
322 #define EVENT_TYPE_3_DTLB_WRITE_MISS 19
323 #define EVENT_TYPE_3_ITLB_READ 20
324 #define EVENT_TYPE_3_BTB_READ 22
325 #define EVENT_TYPE_3_BTB_READ_MISS 23
326 #define EVENT_TYPE_3_BTB_WRITE 24
327 #define EVENT_TYPE_3_BTB_WRITE_MISS 25
328 
329 /* Enable the corresponding performance monitor counter increment for events in Machine/Supervisor/User Mode */
330 #define MSU_EVENT_ENABLE 0x0F
331 #define MEVENT_EN 0x08
332 #define SEVENT_EN 0x02
333 #define UEVENT_EN 0x01
334 
335 #ifdef BENCH_XLEN_MODE
339 #define READ_HPM_COUNTER __read_hpm_counter
340 #else
341 #define READ_HPM_COUNTER __get_hpm_counter
342 #endif /* #ifdef BENCH_XLEN_MODE */
343 
345 #define HPM_DECLARE_VAR(idx) static volatile Bench_Type __hpm_sttcyc##idx, __hpm_endcyc##idx, __hpm_usecyc##idx, __hpm_sumcyc##idx; \
346  static volatile unsigned long __hpm_lpcnt##idx, __hpm_val##idx;
347 
348 #define HPM_SEL_ENABLE(ena) (ena << 28)
349 #define HPM_SEL_EVENT(sel, idx) ((sel) | (idx << 4))
350 
352 #define HPM_EVENT(sel, idx, ena) (HPM_SEL_ENABLE(ena) | HPM_SEL_EVENT(sel, idx))
353 
355 #define HPM_INIT() printf("High performance monitor initialized\n"); \
356  __prepare_bench_env();
357 
359 #define HPM_RESET(idx, proc, event) __hpm_sumcyc##idx = 0; __hpm_lpcnt##idx = 0;
360 
362 #define HPM_START(idx, proc, event) \
363  __hpm_val##idx = (event); \
364  __set_hpm_event(idx, __hpm_val##idx); \
365  __set_hpm_counter(idx, 0); \
366  __hpm_sttcyc##idx = READ_HPM_COUNTER(idx);
367 
369 #define HPM_SAMPLE(idx, proc, event) \
370  __hpm_endcyc##idx = READ_HPM_COUNTER(idx); \
371  __hpm_usecyc##idx = __hpm_endcyc##idx - __hpm_sttcyc##idx; \
372  __hpm_sumcyc##idx += __hpm_usecyc##idx; \
373  __hpm_lpcnt##idx += 1;
374 
376 #define HPM_END(idx, proc, event) \
377  HPM_SAMPLE(idx, proc, event); \
378  printf("HPM%d:0x%x, %s, %lu\n", idx, event, #proc, (unsigned long)__hpm_usecyc##idx);
379 
381 #define HPM_STOP(idx, proc, event) \
382  printf("HPM%d:0x%x, %s, %lu\n", idx, event, #proc, (unsigned long)__hpm_sumcyc##idx);
383 
385 #define HPM_STAT(idx, proc, event) \
386  printf("STATHPM%d:0x%x, %s, %lu, %lu\n", idx, event, #proc, (unsigned long)__hpm_lpcnt##idx, (unsigned long)__hpm_sumcyc##idx);
387 
389 #define HPM_GET_USECYC(idx) (__hpm_usecyc##idx)
390 
392 #define HPM_GET_SUMCYC(idx) (__hpm_sumcyc##idx)
393 
395 #define HPM_GET_LPCNT(idx) (__hpm_lpcnt##idx)
396 
397 #else
398 #define HPM_DECLARE_VAR(idx)
399 #define HPM_EVENT(sel, idx, ena)
400 #define HPM_INIT()
401 #define HPM_RESET(idx, proc, event)
402 #define HPM_START(idx, proc, event)
403 #define HPM_SAMPLE(idx, proc, event)
404 #define HPM_END(idx, proc, event)
405 #define HPM_STOP(idx, proc, event)
406 #define HPM_STAT(idx, proc, event)
407 #define HPM_GET_USECYC(idx) (0)
408 #define HPM_GET_SUMCYC(idx) (0)
409 #define HPM_GET_LPCNT(idx) (1)
410 #endif
411 
412 // NMSIS Helpers
413 #ifndef DISABLE_NMSIS_HELPER
415 #define NMSIS_TEST_PASS() printf("\nNMSIS_TEST_PASS\n");
417 #define NMSIS_TEST_FAIL() printf("\nNMSIS_TEST_FAIL\n");
418 #else
419 #define NMSIS_TEST_PASS()
420 #define NMSIS_TEST_FAIL()
421 #endif
422  /* End of Doxygen Group NMSIS_Core_Bench_Helpers */
424 #ifdef __cplusplus
425 }
426 #endif
427 #endif /* __NMSIS_BENCH__ */
__STATIC_FORCEINLINE void __prepare_bench_env(void)
Prepare benchmark environment.
Definition: nmsis_bench.h:77
__STATIC_FORCEINLINE void __enable_all_counter(void)
Enable all MCYCLE & MINSTRET & MHPMCOUNTER counter.
#define MSTATUS_VS
#define __RV_CSR_SET(csr, val)
CSR operation Macro for csrs instruction.
#define CSR_MSTATUS
#define __STATIC_FORCEINLINE
Define a static function that should be always inlined by the compiler.
Definition: nmsis_gcc.h:70