From 88b4f20c93af4480fc7910d05a02cea854b977a8 Mon Sep 17 00:00:00 2001 From: AlexDenisov Date: Mon, 25 Nov 2024 21:33:50 +0100 Subject: [PATCH] Collect CPU counters The counters are collected using https://github.com/lunacookies/simple-kpc --- cmake/lightstorm.cmake | 2 +- lib/runtime/CMakeLists.txt | 5 +- lib/runtime/lightstorm_runtime_main.c | 16 ++ simple_kpc.c | 240 ++++++++++++++++++++ simple_kpc.h | 11 + tests/benchmarks/CMakeLists.txt | 5 +- tests/benchmarks/benchmarks_bytecode_main.c | 16 ++ 7 files changed, 290 insertions(+), 5 deletions(-) create mode 100644 simple_kpc.c create mode 100644 simple_kpc.h diff --git a/cmake/lightstorm.cmake b/cmake/lightstorm.cmake index 5c65d55..dc198e0 100644 --- a/cmake/lightstorm.cmake +++ b/cmake/lightstorm.cmake @@ -4,7 +4,7 @@ function(add_lightstorm_executable ruby) set(target_name ${ruby}.exe) add_custom_command( OUTPUT ${out_c} - COMMAND $ ${in_ruby} -o ${out_c} + COMMAND $ --no-opt ${in_ruby} -o ${out_c} DEPENDS ${in_ruby} lightstorm) add_executable(${target_name} ${out_c}) target_compile_options(${target_name} PRIVATE ${LIGHTSTORM_CFLAGS}) diff --git a/lib/runtime/CMakeLists.txt b/lib/runtime/CMakeLists.txt index 562cbc6..afdca14 100644 --- a/lib/runtime/CMakeLists.txt +++ b/lib/runtime/CMakeLists.txt @@ -1,7 +1,8 @@ -add_library(lightstorm_runtime_main STATIC lightstorm_runtime_main.c) +add_library(lightstorm_runtime_main STATIC lightstorm_runtime_main.c + ${CMAKE_SOURCE_DIR}/simple_kpc.c) target_include_directories( lightstorm_runtime_main - PUBLIC ${CMAKE_SOURCE_DIR}/include + PUBLIC ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/third_party/mruby/include ${CMAKE_SOURCE_DIR}/third_party/mruby/build/host/include) target_compile_options(lightstorm_runtime_main PRIVATE ${LIGHTSTORM_CFLAGS}) diff --git a/lib/runtime/lightstorm_runtime_main.c b/lib/runtime/lightstorm_runtime_main.c index 9a44c5a..49869e7 100644 --- a/lib/runtime/lightstorm_runtime_main.c +++ b/lib/runtime/lightstorm_runtime_main.c @@ -1,16 +1,32 @@ +#include "simple_kpc.h" #include #include mrb_value lightstorm_top(mrb_state *mrb, mrb_value self); int main() { + sk_init(); + + sk_events *e = sk_events_create(); + sk_events_push(e, "cycles", "FIXED_CYCLES"); + sk_events_push(e, "instructions", "FIXED_INSTRUCTIONS"); + sk_events_push(e, "branches", "INST_BRANCH"); + sk_events_push(e, "branch misses", "BRANCH_MISPRED_NONSPEC"); + sk_events_push(e, "load/stores", "INST_LDST"); + sk_events_push(e, "INST_SIMD_LD", "INST_SIMD_LD"); + sk_events_push(e, "INST_SIMD_ST", "INST_SIMD_ST"); + sk_events_push(e, "INST_BRANCH_INDIR", "INST_BRANCH_INDIR"); + mrb_state *mrb = mrb_open(); struct RProc *proc = mrb_proc_new_cfunc(mrb, lightstorm_top); MRB_PROC_SET_TARGET_CLASS(proc, mrb->object_class); mrb->c->ci->proc = proc; mrb_value self = mrb_top_self(mrb); mrb->c->ci->stack[0] = self; + sk_in_progress_measurement *m = sk_start_measurement(e); lightstorm_top(mrb, self); + sk_finish_measurement(m); mrb_close(mrb); + sk_events_destroy(e); return 0; } diff --git a/simple_kpc.c b/simple_kpc.c new file mode 100644 index 0000000..7101797 --- /dev/null +++ b/simple_kpc.c @@ -0,0 +1,240 @@ +#include "simple_kpc.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +typedef int8_t i8; +typedef uint8_t u8; +typedef int16_t i16; +typedef uint16_t u16; +typedef int32_t i32; +typedef uint32_t u32; +typedef int64_t i64; +typedef uint64_t u64; +typedef size_t usize; + +#define ARRAY_LENGTH(x) (sizeof(x) / sizeof((x)[0])) + +#define KPC_MAX_COUNTERS 32 + +typedef struct kpep_db kpep_db; +typedef struct kpep_config kpep_config; +typedef struct kpep_event kpep_event; + +static int (*kpc_set_counting)(u32 classes); +static int (*kpc_set_thread_counting)(u32 classes); +static int (*kpc_set_config)(u32 classes, u64 *config); +static int (*kpc_get_thread_counters)(u32 tid, u32 buf_count, u64 *buf); +static int (*kpc_force_all_ctrs_set)(int val); +static int (*kpc_force_all_ctrs_get)(int *val_out); + +static int (*kpep_config_create)(kpep_db *db, kpep_config **cfg_ptr); +static void (*kpep_config_free)(kpep_config *cfg); +static int (*kpep_config_add_event)(kpep_config *cfg, kpep_event **ev_ptr, u32 flag, u32 *err); +static int (*kpep_config_force_counters)(kpep_config *cfg); +static int (*kpep_config_kpc)(kpep_config *cfg, u64 *buf, usize buf_size); +static int (*kpep_config_kpc_classes)(kpep_config *cfg, u32 *classes_ptr); +static int (*kpep_config_kpc_map)(kpep_config *cfg, usize *buf, usize buf_size); + +static int (*kpep_db_create)(const char *name, kpep_db **db_ptr); +static void (*kpep_db_free)(kpep_db *db); +static int (*kpep_db_event)(kpep_db *db, const char *name, kpep_event **ev_ptr); + +typedef struct { + const char *name; + void **impl; +} symbol; + +#define SYMBOL(n) { .name = #n, .impl = (void **)&n } + +static const symbol KPERF_SYMBOLS[] = { + SYMBOL(kpc_set_counting), SYMBOL(kpc_set_thread_counting), SYMBOL(kpc_set_config), + SYMBOL(kpc_get_thread_counters), SYMBOL(kpc_force_all_ctrs_set), SYMBOL(kpc_force_all_ctrs_get), +}; + +static const symbol KPERFDATA_SYMBOLS[] = { + SYMBOL(kpep_config_create), SYMBOL(kpep_config_free), + SYMBOL(kpep_config_add_event), SYMBOL(kpep_config_force_counters), + SYMBOL(kpep_config_kpc), SYMBOL(kpep_config_kpc_classes), + SYMBOL(kpep_config_kpc_map), SYMBOL(kpep_db_create), + SYMBOL(kpep_db_free), SYMBOL(kpep_db_event), +}; + +#define KPERF_PATH "/System/Library/PrivateFrameworks/kperf.framework/kperf" +#define KPERFDATA_PATH "/System/Library/PrivateFrameworks/kperfdata.framework/kperfdata" + +static bool initialized = false; + +void sk_init(void) { + if (initialized) + return; + + void *kperf = dlopen(KPERF_PATH, RTLD_LAZY); + if (!kperf) { + fprintf(stderr, + "simple_kpc: failed to load kperf.framework, message: " + "%s\n", + dlerror()); + exit(1); + } + + void *kperfdata = dlopen(KPERFDATA_PATH, RTLD_LAZY); + if (!kperfdata) { + fprintf(stderr, + "simple_kpc: failed to load kperfdata.framework, " + "message: %s\n", + dlerror()); + exit(1); + } + + for (usize i = 0; i < ARRAY_LENGTH(KPERF_SYMBOLS); i++) { + const symbol *symbol = &KPERF_SYMBOLS[i]; + void *p = dlsym(kperf, symbol->name); + if (!p) { + fprintf(stderr, + "simple_kpc: failed to load kperf function " + "%s\n", + symbol->name); + exit(1); + } + *symbol->impl = p; + } + + for (usize i = 0; i < ARRAY_LENGTH(KPERFDATA_SYMBOLS); i++) { + const symbol *symbol = &KPERFDATA_SYMBOLS[i]; + void *p = dlsym(kperfdata, symbol->name); + if (!p) { + fprintf(stderr, + "simple_kpc: failed to load kperfdata function " + "%s\n", + symbol->name); + exit(1); + } + *symbol->impl = p; + } + + if (kpc_force_all_ctrs_get(NULL) != 0) { + fprintf(stderr, + "simple_kpc: permission denied, xnu/kpc " + "requires root privileges\n"); + exit(1); + } + + initialized = true; +} + +struct sk_events { + const char **human_readable_names; + const char **internal_names; + usize count; +}; + +sk_events *sk_events_create(void) { + sk_events *e = calloc(1, sizeof(sk_events)); + *e = (sk_events){ + .human_readable_names = calloc(KPC_MAX_COUNTERS, sizeof(const char *)), + .internal_names = calloc(KPC_MAX_COUNTERS, sizeof(const char *)), + .count = 0, + }; + return e; +} + +void sk_events_push(sk_events *e, const char *human_readable_name, const char *internal_name) { + if (e->count >= KPC_MAX_COUNTERS) { + fprintf(stderr, "Cannot push event anymore"); + abort(); + } + e->human_readable_names[e->count] = human_readable_name; + e->internal_names[e->count] = internal_name; + e->count++; +} + +void sk_events_destroy(sk_events *e) { + free(e->human_readable_names); + free(e->internal_names); + free(e); +} + +struct sk_in_progress_measurement { + sk_events *events; + u32 classes; + usize counter_map[KPC_MAX_COUNTERS]; + u64 regs[KPC_MAX_COUNTERS]; + u64 counters[KPC_MAX_COUNTERS]; +}; + +sk_in_progress_measurement *sk_start_measurement(sk_events *e) { + assert(initialized); + + sk_in_progress_measurement *m = calloc(1, sizeof(sk_in_progress_measurement)); + *m = (sk_in_progress_measurement){ + .events = e, + .classes = 0, + .counter_map = { 0 }, + .counters = { 0 }, + }; + + kpep_db *kpep_db = NULL; + kpep_db_create(NULL, &kpep_db); + + kpep_config *kpep_config = NULL; + kpep_config_create(kpep_db, &kpep_config); + kpep_config_force_counters(kpep_config); + + for (usize i = 0; i < m->events->count; i++) { + const char *internal_name = m->events->internal_names[i]; + kpep_event *event = NULL; + kpep_db_event(kpep_db, internal_name, &event); + + if (event == NULL) { + const char *human_readable_name = m->events->human_readable_names[i]; + fprintf(stderr, "Cannot find event for %s: “%s”.\n", human_readable_name, internal_name); + exit(1); + } + + kpep_config_add_event(kpep_config, &event, 0, NULL); + } + + kpep_config_kpc_classes(kpep_config, &m->classes); + kpep_config_kpc_map(kpep_config, m->counter_map, sizeof(m->counter_map)); + kpep_config_kpc(kpep_config, m->regs, sizeof(m->regs)); + + kpep_config_free(kpep_config); + kpep_db_free(kpep_db); + + kpc_force_all_ctrs_set(1); + kpc_set_config(m->classes, m->regs); + + // Don’t put any library code below these kpc calls! + kpc_set_counting(m->classes); + kpc_set_thread_counting(m->classes); + kpc_get_thread_counters(0, KPC_MAX_COUNTERS, m->counters); + return m; +} + +void sk_finish_measurement(sk_in_progress_measurement *m) { + u64 counters_after[KPC_MAX_COUNTERS] = { 0 }; + + // Don’t put any library code above these kpc calls! + // We don’t want to execute anything until timing has stopped + kpc_get_thread_counters(0, KPC_MAX_COUNTERS, counters_after); + kpc_set_counting(0); + kpc_force_all_ctrs_set(0); + + fprintf(stderr, "\033[1m=== simple-kpc report ===\033[m\n\n"); + setlocale(LC_NUMERIC, ""); + for (usize i = 0; i < m->events->count; i++) { + const char *name = m->events->human_readable_names[i]; + usize idx = m->counter_map[i]; + u64 diff = counters_after[idx] - m->counters[idx]; + fprintf(stderr, "\033[32m%16llu \033[95m%s\033[m\n", diff, name); + } + + free(m); +} diff --git a/simple_kpc.h b/simple_kpc.h new file mode 100644 index 0000000..3a801f3 --- /dev/null +++ b/simple_kpc.h @@ -0,0 +1,11 @@ +typedef struct sk_events sk_events; +typedef struct sk_in_progress_measurement sk_in_progress_measurement; + +void sk_init(void); + +sk_events *sk_events_create(void); +void sk_events_push(sk_events *e, const char *human_readable_name, const char *internal_name); +void sk_events_destroy(sk_events *e); + +sk_in_progress_measurement *sk_start_measurement(sk_events *e); +void sk_finish_measurement(sk_in_progress_measurement *m); diff --git a/tests/benchmarks/CMakeLists.txt b/tests/benchmarks/CMakeLists.txt index 1bd4982..7e78eb3 100644 --- a/tests/benchmarks/CMakeLists.txt +++ b/tests/benchmarks/CMakeLists.txt @@ -7,10 +7,11 @@ function(add_bench_executable ruby) COMMAND $ -Blightstorm_bench -o ${host_c} ${full_ruby_path} DEPENDS ${full_ruby_path}) - add_executable(${target_name} benchmarks_bytecode_main.c ${host_c}) + add_executable(${target_name} benchmarks_bytecode_main.c ${host_c} + ${CMAKE_SOURCE_DIR}/simple_kpc.c) target_include_directories( ${target_name} - PRIVATE ${CMAKE_SOURCE_DIR}/third_party/mruby/include + PRIVATE ${CMAKE_SOURCE_DIR}/third_party/mruby/include ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/third_party/mruby/build/host/include) target_compile_options(${target_name} PRIVATE -g) target_link_libraries(${target_name} PRIVATE mruby_static) diff --git a/tests/benchmarks/benchmarks_bytecode_main.c b/tests/benchmarks/benchmarks_bytecode_main.c index cf1eac4..32aa590 100644 --- a/tests/benchmarks/benchmarks_bytecode_main.c +++ b/tests/benchmarks/benchmarks_bytecode_main.c @@ -1,3 +1,4 @@ +#include "simple_kpc.h" #include #include #include @@ -5,8 +6,23 @@ extern const uint8_t lightstorm_bench[]; int main() { + sk_init(); + + sk_events *e = sk_events_create(); + sk_events_push(e, "cycles", "FIXED_CYCLES"); + sk_events_push(e, "instructions", "FIXED_INSTRUCTIONS"); + sk_events_push(e, "branches", "INST_BRANCH"); + sk_events_push(e, "branch misses", "BRANCH_MISPRED_NONSPEC"); + sk_events_push(e, "load/stores", "INST_LDST"); + sk_events_push(e, "INST_SIMD_LD", "INST_SIMD_LD"); + sk_events_push(e, "INST_SIMD_ST", "INST_SIMD_ST"); + sk_events_push(e, "INST_BRANCH_INDIR", "INST_BRANCH_INDIR"); + mrb_state *mrb = mrb_open(); + sk_in_progress_measurement *m = sk_start_measurement(e); mrb_load_irep(mrb, lightstorm_bench); + sk_finish_measurement(m); mrb_close(mrb); + sk_events_destroy(e); return 0; }