3 * (c) Chang S. Bae, 2013
6 #include <linux/cdev.h>
7 #include <linux/errno.h>
9 #include <asm/msr-index.h>
11 #include <palacios/vmm_types.h>
12 #include <palacios/vmm_util.h>
13 #include <interfaces/vmm_pmu.h>
17 #include "iface-pmu-intel.h"
18 #include "iface-pmu-amd.h"
19 #include "util-queue.h"
20 #include "linux-exts.h"
23 // Number of inits/deinits we have seen (inc on init, dec on deinit)
24 // This is per CPU - init/deinit mean init/deinit PMU
25 // tracking ON THE CURRENT CORE
26 static DEFINE_PER_CPU(u32, pmu_refcount) = 0;
30 * some macros may be commonly used
32 #define MSR_READ(msrs, c) do {rdmsrl((c), (msrs).q);} while (0)
33 #define MSR_WRITE(msrs, c) do {wrmsrl((c), (msrs).q);} while (0)
34 #define SET_BIT(val, i) ((val) |= (1 << i))
35 #define CLEAR_BIT(val, u, i) ((val) &= ~((u&1) << i))
36 #define SET_BYTE(val, u, i) ((val) |= ((u&255) << i))
37 #define CHECK_BIT(val, i) ((val) & (1U << i))
40 static inline void cpuid_string(u32 id, u32 dest[4]) {
42 :"=a"(*dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3))
47 static int get_cpu_vendor(char name[13])
54 ((u32*)name)[0]=dest[1];
55 ((u32*)name)[1]=dest[3];
56 ((u32*)name)[2]=dest[2];
62 static int is_intel(void)
66 return !strcmp(name,"GenuineIntel");
69 static int is_amd(void)
73 return !strcmp(name,"AuthenticAMD");
79 * AMD and Intel implementations are distinguished by prefix: INTEL or AMD
84 * description: check available slots in pmu
85 * return: -1 if none, else returns index: 0 ... 3
88 static int intel_get_slot(void) {
96 for (i=0; i<INTEL_NUM_PMU_CONTROLS; i++) {
97 INTEL_CTRL_READ(control, i);
98 if(control.q & (0x1<<INTEL_EN_BIT)) {
109 static int amd_get_slot(void) {
116 for (i=0; i<AMD_NUM_PMU_CONTROLS; i++) {
117 AMD_CTRL_READ(control, i);
118 if(control.q & (0x1<<AMD_EN_BIT)) {
131 * description: find index of pmu register that is available
133 static int intel_find_idx(uint8_t event, uint8_t mask) {
140 for (i=0; i<INTEL_NUM_PMU_COUNTERS; i++) {
141 INTEL_CTRL_READ(control, i);
142 if((((control.l>>INTEL_EVENT_BIT) & 0xff) == event) &&
143 (((control.l>>INTEL_UMASK_BIT) & 0xff) == mask)) {
153 * following implementations : init, deinit, start_tracking, stop_track and get_value
154 * specifically fit into the pmu interface
157 static uint64_t intel_get_value(v3_pmon_ctr_t ctr) {
167 case V3_PMON_CLOCK_COUNT:
168 INTEL_FIXED_CTR_READ(count, INTEL_IDX_CLK_IN_FPMU);
170 case V3_PMON_RETIRED_INST_COUNT:
171 INTEL_FIXED_CTR_READ(count, INTEL_IDX_INST_IN_FPMU);
173 case V3_PMON_MEM_LOAD_COUNT:
174 if((ctr_idx = intel_find_idx(INTEL_MEM_INST_RETIRED, INTEL_LOADS)) >= 0) {
175 INTEL_CTR_READ(count, ctr_idx);
177 goto INTEL_READ_FAILED;
180 case V3_PMON_MEM_STORE_COUNT:
181 if((ctr_idx = intel_find_idx(INTEL_MEM_INST_RETIRED, INTEL_STORES)) >= 0) {
182 INTEL_CTR_READ(count, ctr_idx);
184 goto INTEL_READ_FAILED;
187 case V3_PMON_CACHE_MISS_COUNT:
188 if((ctr_idx = intel_find_idx(INTEL_MEM_LOAD_RETIRED, INTEL_L3_MISS)) >= 0) {
189 INTEL_CTR_READ(count, ctr_idx);
191 goto INTEL_READ_FAILED;
194 case V3_PMON_TLB_MISS_COUNT:
195 if((ctr_idx = intel_find_idx(INTEL_MEM_LOAD_RETIRED, INTEL_DTLB_MISS)) >= 0) {
196 INTEL_CTR_READ(count, ctr_idx);
198 goto INTEL_READ_FAILED;
203 return (uint64_t)count.q;
210 static int intel_start_tracking(v3_pmon_ctr_t ctr) {
218 * check if available slot in PMU, except for fixed counters (Intel specific)
222 case V3_PMON_CLOCK_COUNT:
223 INTEL_FIXED_CTRL_READ(msrs);
225 INTEL_FIXED_CTRL_WRITE(msrs);
227 case V3_PMON_RETIRED_INST_COUNT:
228 INTEL_FIXED_CTRL_READ(msrs);
230 INTEL_FIXED_CTRL_WRITE(msrs);
232 case V3_PMON_MEM_LOAD_COUNT:
233 if((ctr_idx = intel_get_slot()) >= 0) {
234 INTEL_CTRL_START(INTEL_MEM_INST_RETIRED, INTEL_LOADS, ctr_idx);
236 goto INTEL_START_FAILED;
239 case V3_PMON_MEM_STORE_COUNT:
240 if((ctr_idx = intel_get_slot()) >= 0) {
241 INTEL_CTRL_START(INTEL_MEM_INST_RETIRED, INTEL_STORES, ctr_idx);
243 goto INTEL_START_FAILED;
246 case V3_PMON_CACHE_MISS_COUNT:
247 if((ctr_idx = intel_get_slot()) >= 0) {
248 INTEL_CTRL_START(INTEL_MEM_LOAD_RETIRED, INTEL_L3_MISS, ctr_idx);
250 goto INTEL_START_FAILED;
253 case V3_PMON_TLB_MISS_COUNT:
254 if((ctr_idx = intel_get_slot()) >= 0) {
255 INTEL_CTRL_START(INTEL_MEM_LOAD_RETIRED, INTEL_DTLB_MISS, ctr_idx);
257 goto INTEL_START_FAILED;
265 ERROR("ERROR: no more slot remains for pmon events\n");
270 * descript: disabling pmu event counts
273 static int intel_stop_tracking(v3_pmon_ctr_t ctr) {
281 * check if available slot in PMU, except
285 case V3_PMON_CLOCK_COUNT:
286 INTEL_FIXED_CTRL_READ(msrs);
288 INTEL_FIXED_CTRL_WRITE(msrs);
290 case V3_PMON_RETIRED_INST_COUNT:
291 INTEL_FIXED_CTRL_READ(msrs);
293 INTEL_FIXED_CTRL_WRITE(msrs);
295 case V3_PMON_MEM_LOAD_COUNT:
296 if((ctr_idx = intel_find_idx(INTEL_MEM_INST_RETIRED, INTEL_LOADS)) >= 0) {
297 INTEL_CTRL_STOP(ctr_idx);
299 goto INTEL_STOP_FAILED;
302 case V3_PMON_MEM_STORE_COUNT:
303 if((ctr_idx = intel_find_idx(INTEL_MEM_INST_RETIRED, INTEL_STORES)) >= 0) {
304 INTEL_CTRL_STOP(ctr_idx);
306 goto INTEL_STOP_FAILED;
309 case V3_PMON_CACHE_MISS_COUNT:
310 if((ctr_idx = intel_find_idx(INTEL_MEM_LOAD_RETIRED, INTEL_L3_MISS)) >= 0) {
311 INTEL_CTRL_STOP(ctr_idx);
313 goto INTEL_STOP_FAILED;
316 case V3_PMON_TLB_MISS_COUNT:
317 if((ctr_idx = intel_find_idx(INTEL_MEM_LOAD_RETIRED, INTEL_DTLB_MISS)) >= 0) {
318 INTEL_CTRL_STOP(ctr_idx);
320 goto INTEL_STOP_FAILED;
328 ERROR("ERROR: no more slot remains for pmon events\n");
332 static void intel_pmu_init(void) {
336 if ((get_cpu_var(pmu_refcount)++) > 1) {
337 put_cpu_var(pmu_refcount);
338 // only the first init clears the pmu
341 put_cpu_var(pmu_refcount);
347 * per Intel PMU architecture,
348 * there are two class of counters
349 * fixed ones (3 counters) and programmable ones (4 counters)
350 * events for fixed coutners are determined, so enabling or not is the option
351 * whereas, programmable ones are litterally programmable.
355 * enable fixed counters in global
357 MSR_READ(control, INTEL_IA32_PERF_GLOBAL_CTRL);
358 control.q |= 0x70000000f; // enable fix counters (3 for the intel model)
359 MSR_WRITE(control, INTEL_IA32_PERF_GLOBAL_CTRL);
362 * disable in fixed counters control
365 INTEL_FIXED_CTRL_WRITE(control);
368 * clean up programmable counter control
370 for (i=0; i<INTEL_NUM_PMU_CONTROLS; i++) {
371 INTEL_CTRL_WRITE(control, i);
375 static void intel_pmu_deinit(void) {
376 if ((get_cpu_var(pmu_refcount)--)==0) {
377 put_cpu_var(pmu_refcount);
381 put_cpu_var(pmu_refcount);
388 static int amd_find_idx(uint8_t event, uint8_t mask) {
395 for (i=0; i<AMD_NUM_PMU_COUNTERS; i++) {
396 AMD_CTRL_READ(control, i);
397 if((((control.l>>AMD_EVENT_BIT) & 0xff) == event) &&
398 (((control.l>>AMD_UMASK_BIT) & 0xff) == mask)) {
407 static uint64_t amd_get_value(v3_pmon_ctr_t ctr) {
414 case V3_PMON_CLOCK_COUNT:
415 if((ctr_idx = amd_find_idx(AMD_CLK_NOT_HALTED, 0x0)) >= 0) {
416 AMD_CTR_READ(count, ctr_idx);
418 goto AMD_READ_FAILED;
421 case V3_PMON_RETIRED_INST_COUNT:
422 if((ctr_idx = amd_find_idx(AMD_RETIRED_INSTRUCTIONS, 0x0)) >= 0) {
423 AMD_CTR_READ(count, ctr_idx);
425 goto AMD_READ_FAILED;
428 case V3_PMON_MEM_LOAD_COUNT:
429 if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_ACCESSES, 0x0)) >= 0) {
430 AMD_CTR_READ(count, ctr_idx);
432 goto AMD_READ_FAILED;
435 case V3_PMON_MEM_STORE_COUNT:
436 if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_ACCESSES, 0x0)) >= 0) {
437 AMD_CTR_READ(count, ctr_idx);
439 goto AMD_READ_FAILED;
442 case V3_PMON_CACHE_MISS_COUNT:
443 if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_MISSES, 0x0)) >= 0) {
444 AMD_CTR_READ(count, ctr_idx);
446 goto AMD_READ_FAILED;
449 case V3_PMON_TLB_MISS_COUNT:
450 if((ctr_idx = amd_find_idx(AMD_L1_DTLB_AND_L2_DTLB_MISS, 0x7)) >= 0) {
451 AMD_CTR_READ(count, ctr_idx);
453 goto AMD_READ_FAILED;
458 return (uint64_t)count.q;
464 static int amd_start_tracking(v3_pmon_ctr_t ctr) {
469 case V3_PMON_CLOCK_COUNT:
470 if((ctr_idx = amd_get_slot()) >= 0) {
471 AMD_CTRL_START(AMD_CLK_NOT_HALTED, 0x0, ctr_idx);
473 goto AMD_START_FAILED;
476 case V3_PMON_RETIRED_INST_COUNT:
477 if((ctr_idx = amd_get_slot()) >= 0) {
478 AMD_CTRL_START(AMD_RETIRED_INSTRUCTIONS, 0x0, ctr_idx);
480 goto AMD_START_FAILED;
483 case V3_PMON_MEM_LOAD_COUNT:
484 if((ctr_idx = amd_get_slot()) >= 0) {
485 AMD_CTRL_START(AMD_DATA_CACHE_ACCESSES, 0x0, ctr_idx);
487 goto AMD_START_FAILED;
490 case V3_PMON_MEM_STORE_COUNT:
491 if((ctr_idx = amd_get_slot()) >= 0) {
492 AMD_CTRL_START(AMD_DATA_CACHE_ACCESSES, 0x0, ctr_idx);
494 goto AMD_START_FAILED;
497 case V3_PMON_CACHE_MISS_COUNT:
498 if((ctr_idx = amd_get_slot()) >= 0) {
499 AMD_CTRL_START(AMD_DATA_CACHE_MISSES, 0x0, ctr_idx);
501 goto AMD_START_FAILED;
504 case V3_PMON_TLB_MISS_COUNT:
505 if((ctr_idx = amd_get_slot()) >= 0) {
506 AMD_CTRL_START(AMD_L1_DTLB_AND_L2_DTLB_MISS, 0x7, ctr_idx);
508 goto AMD_START_FAILED;
516 ERROR("ERROR: no more slot remains for pmon events\n");
521 static int amd_stop_tracking(v3_pmon_ctr_t ctr) {
527 case V3_PMON_CLOCK_COUNT:
528 if((ctr_idx = amd_find_idx(AMD_CLK_NOT_HALTED, 0x0)) >= 0) {
529 AMD_CTRL_STOP(ctr_idx);
531 goto AMD_STOP_FAILED;
534 case V3_PMON_RETIRED_INST_COUNT:
535 if((ctr_idx = amd_find_idx(AMD_RETIRED_INSTRUCTIONS, 0x0)) >= 0) {
536 AMD_CTRL_STOP(ctr_idx);
538 goto AMD_STOP_FAILED;
541 case V3_PMON_MEM_LOAD_COUNT:
542 if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_ACCESSES, 0x0)) >= 0) {
543 AMD_CTRL_STOP(ctr_idx);
545 goto AMD_STOP_FAILED;
548 case V3_PMON_MEM_STORE_COUNT:
549 if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_ACCESSES, 0x0)) >= 0) {
550 AMD_CTRL_STOP(ctr_idx);
552 goto AMD_STOP_FAILED;
555 case V3_PMON_CACHE_MISS_COUNT:
556 if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_MISSES, 0x0)) >= 0) {
557 AMD_CTRL_STOP(ctr_idx);
559 goto AMD_STOP_FAILED;
562 case V3_PMON_TLB_MISS_COUNT:
563 if((ctr_idx = amd_find_idx(AMD_L1_DTLB_AND_L2_DTLB_MISS, 0x7)) >= 0) {
564 AMD_CTRL_STOP(ctr_idx);
566 goto AMD_STOP_FAILED;
574 ERROR("ERROR: no more slot remains for pmon events\n");
579 static void amd_pmu_init(void) {
585 if ((get_cpu_var(pmu_refcount)++) > 1) {
586 put_cpu_var(pmu_refcount);
587 // only the first init clears the pmu
590 put_cpu_var(pmu_refcount);
594 // initialize variables
598 * clean up programmable counter control
600 for (i=0; i<AMD_NUM_PMU_CONTROLS; i++) {
601 AMD_CTRL_WRITE(control, i);
605 static void amd_pmu_deinit(void) {
606 if ((get_cpu_var(pmu_refcount)--)==0) {
607 put_cpu_var(pmu_refcount);
611 put_cpu_var(pmu_refcount);
615 static struct v3_pmu_iface palacios_pmu_intel = {
616 .init = intel_pmu_init,
617 .deinit = intel_pmu_deinit,
618 .start_tracking = intel_start_tracking,
619 .stop_tracking = intel_stop_tracking,
620 .get_value = intel_get_value
623 static struct v3_pmu_iface palacios_pmu_amd = {
624 .init = amd_pmu_init,
625 .deinit = amd_pmu_deinit,
626 .start_tracking = amd_start_tracking,
627 .stop_tracking = amd_stop_tracking,
628 .get_value = amd_get_value
631 static int pmu_init( void ) {
633 INFO("Intel PMU featureset detected\n");
634 V3_Init_PMU(&palacios_pmu_intel);
635 } else if (is_amd()) {
636 INFO("AMD PMU featureset detected\n");
637 V3_Init_PMU(&palacios_pmu_amd);
639 ERROR("This is neither an Intel nor AMD machine - No PMU functionality configured\n");
646 static int pmu_deinit(void)
653 static struct linux_ext pmu_ext = {
656 .deinit = pmu_deinit,
661 register_extension(&pmu_ext);