3 * (c) Chang S. Bae, 2013
6 #include <linux/cdev.h>
7 #include <linux/errno.h>
9 #include <asm/msr-index.h>
11 #include <palacios/vmm_types.h>
12 #include <palacios/vmm_util.h>
13 #include <interfaces/vmm_pmu.h>
17 #include "iface-pmu-intel.h"
18 #include "iface-pmu-amd.h"
19 #include "util-queue.h"
20 #include "linux-exts.h"
23 // Number of inits/deinits we have seen (inc on init, dec on deinit)
24 // This is per CPU - init/deinit mean init/deinit PMU
25 // tracking ON THE CURRENT CORE
26 static DEFINE_PER_CPU(u32, pmu_refcount) = 0;
30 * some macros may be commonly used
32 #define MSR_READ(msrs, c) do {rdmsrl((c), (msrs).q);} while (0)
33 #define MSR_WRITE(msrs, c) do {wrmsrl((c), (msrs).q);} while (0)
34 #define SET_BIT(val, i) ((val) |= (1 << i))
35 #define CLEAR_BIT(val, u, i) ((val) &= ~((u&1) << i))
36 #define SET_BYTE(val, u, i) ((val) |= ((u&255) << i))
37 #define CHECK_BIT(val, i) ((val) & (1U << i))
40 static inline void cpuid_string(u32 id, u32 dest[4]) {
42 :"=a"(*dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3))
47 static int get_cpu_vendor(char name[13])
54 ((u32*)name)[0]=dest[1];
55 ((u32*)name)[1]=dest[3];
56 ((u32*)name)[2]=dest[2];
62 static int is_intel(void)
66 return !strcmp(name,"GenuineIntel");
69 static int is_amd(void)
73 return !strcmp(name,"AuthenticAMD");
79 * AMD and Intel implementations are distinguished by prefix: INTEL or AMD
84 * description: check available slots in pmu
85 * return: -1 if none, else returns index: 0 ... 3
88 static int intel_get_slot(void) {
96 for (i=0; i<INTEL_NUM_PMU_CONTROLS; i++) {
97 INTEL_CTRL_READ(control, i);
98 if(control.q & (0x1<<INTEL_EN_BIT)) {
109 static int amd_get_slot(void) {
116 for (i=0; i<AMD_NUM_PMU_CONTROLS; i++) {
117 AMD_CTRL_READ(control, i);
118 if(control.q & (0x1<<AMD_EN_BIT)) {
132 * description: find index of pmu register that is available
134 static int intel_find_idx(uint8_t event, uint8_t mask) {
141 for (i=0; i<INTEL_NUM_PMU_COUNTERS; i++) {
142 INTEL_CTRL_READ(control, i);
143 if((((control.l>>INTEL_EVENT_BIT) & 0xff) == event) &&
144 (((control.l>>INTEL_UMASK_BIT) & 0xff) == mask)) {
154 * following implementations : init, deinit, start_tracking, stop_track and get_value
155 * specifically fit into the pmu interface
158 static uint64_t intel_get_value(v3_pmon_ctr_t ctr) {
168 case V3_PMON_CLOCK_COUNT:
169 INTEL_FIXED_CTR_READ(count, INTEL_IDX_CLK_IN_FPMU);
171 case V3_PMON_RETIRED_INST_COUNT:
172 INTEL_FIXED_CTR_READ(count, INTEL_IDX_INST_IN_FPMU);
174 case V3_PMON_MEM_LOAD_COUNT:
175 if((ctr_idx = intel_find_idx(INTEL_MEM_INST_RETIRED, INTEL_LOADS)) >= 0) {
176 INTEL_CTR_READ(count, ctr_idx);
178 goto INTEL_READ_FAILED;
181 case V3_PMON_MEM_STORE_COUNT:
182 if((ctr_idx = intel_find_idx(INTEL_MEM_INST_RETIRED, INTEL_STORES)) >= 0) {
183 INTEL_CTR_READ(count, ctr_idx);
185 goto INTEL_READ_FAILED;
188 case V3_PMON_CACHE_MISS_COUNT:
189 if((ctr_idx = intel_find_idx(INTEL_MEM_LOAD_RETIRED, INTEL_L3_MISS)) >= 0) {
190 INTEL_CTR_READ(count, ctr_idx);
192 goto INTEL_READ_FAILED;
195 case V3_PMON_TLB_MISS_COUNT:
196 if((ctr_idx = intel_find_idx(INTEL_MEM_LOAD_RETIRED, INTEL_DTLB_MISS)) >= 0) {
197 INTEL_CTR_READ(count, ctr_idx);
199 goto INTEL_READ_FAILED;
204 return (uint64_t)count.q;
211 static int intel_start_tracking(v3_pmon_ctr_t ctr) {
219 * check if available slot in PMU, except for fixed counters (Intel specific)
223 case V3_PMON_CLOCK_COUNT:
224 INTEL_FIXED_CTRL_READ(msrs);
226 INTEL_FIXED_CTRL_WRITE(msrs);
228 case V3_PMON_RETIRED_INST_COUNT:
229 INTEL_FIXED_CTRL_READ(msrs);
231 INTEL_FIXED_CTRL_WRITE(msrs);
233 case V3_PMON_MEM_LOAD_COUNT:
234 if((ctr_idx = intel_get_slot()) >= 0) {
235 INTEL_CTRL_START(INTEL_MEM_INST_RETIRED, INTEL_LOADS, ctr_idx);
237 goto INTEL_START_FAILED;
240 case V3_PMON_MEM_STORE_COUNT:
241 if((ctr_idx = intel_get_slot()) >= 0) {
242 INTEL_CTRL_START(INTEL_MEM_INST_RETIRED, INTEL_STORES, ctr_idx);
244 goto INTEL_START_FAILED;
247 case V3_PMON_CACHE_MISS_COUNT:
248 if((ctr_idx = intel_get_slot()) >= 0) {
249 INTEL_CTRL_START(INTEL_MEM_LOAD_RETIRED, INTEL_L3_MISS, ctr_idx);
251 goto INTEL_START_FAILED;
254 case V3_PMON_TLB_MISS_COUNT:
255 if((ctr_idx = intel_get_slot()) >= 0) {
256 INTEL_CTRL_START(INTEL_MEM_LOAD_RETIRED, INTEL_DTLB_MISS, ctr_idx);
258 goto INTEL_START_FAILED;
266 ERROR("ERROR: no more slot remains for pmon events\n");
271 * descript: disabling pmu event counts
274 static int intel_stop_tracking(v3_pmon_ctr_t ctr) {
282 * check if available slot in PMU, except
286 case V3_PMON_CLOCK_COUNT:
287 INTEL_FIXED_CTRL_READ(msrs);
289 INTEL_FIXED_CTRL_WRITE(msrs);
291 case V3_PMON_RETIRED_INST_COUNT:
292 INTEL_FIXED_CTRL_READ(msrs);
294 INTEL_FIXED_CTRL_WRITE(msrs);
296 case V3_PMON_MEM_LOAD_COUNT:
297 if((ctr_idx = intel_find_idx(INTEL_MEM_INST_RETIRED, INTEL_LOADS)) >= 0) {
298 INTEL_CTRL_STOP(ctr_idx);
300 goto INTEL_STOP_FAILED;
303 case V3_PMON_MEM_STORE_COUNT:
304 if((ctr_idx = intel_find_idx(INTEL_MEM_INST_RETIRED, INTEL_STORES)) >= 0) {
305 INTEL_CTRL_STOP(ctr_idx);
307 goto INTEL_STOP_FAILED;
310 case V3_PMON_CACHE_MISS_COUNT:
311 if((ctr_idx = intel_find_idx(INTEL_MEM_LOAD_RETIRED, INTEL_L3_MISS)) >= 0) {
312 INTEL_CTRL_STOP(ctr_idx);
314 goto INTEL_STOP_FAILED;
317 case V3_PMON_TLB_MISS_COUNT:
318 if((ctr_idx = intel_find_idx(INTEL_MEM_LOAD_RETIRED, INTEL_DTLB_MISS)) >= 0) {
319 INTEL_CTRL_STOP(ctr_idx);
321 goto INTEL_STOP_FAILED;
329 ERROR("ERROR: no more slot remains for pmon events\n");
333 static void intel_pmu_init(void) {
337 if ((get_cpu_var(pmu_refcount)++) > 1) {
338 put_cpu_var(pmu_refcount);
339 // only the first init clears the pmu
342 put_cpu_var(pmu_refcount);
348 * per Intel PMU architecture,
349 * there are two class of counters
350 * fixed ones (3 counters) and programmable ones (4 counters)
351 * events for fixed coutners are determined, so enabling or not is the option
352 * whereas, programmable ones are litterally programmable.
356 * enable fixed counters in global
358 MSR_READ(control, INTEL_IA32_PERF_GLOBAL_CTRL);
359 control.q |= 0x70000000f; // enable fix counters (3 for the intel model)
360 MSR_WRITE(control, INTEL_IA32_PERF_GLOBAL_CTRL);
363 * disable in fixed counters control
366 INTEL_FIXED_CTRL_WRITE(control);
369 * clean up programmable counter control
371 for (i=0; i<INTEL_NUM_PMU_CONTROLS; i++) {
372 INTEL_CTRL_WRITE(control, i);
376 static void intel_pmu_deinit(void) {
377 if ((get_cpu_var(pmu_refcount)--)==0) {
378 put_cpu_var(pmu_refcount);
382 put_cpu_var(pmu_refcount);
389 static int amd_find_idx(uint8_t event, uint8_t mask) {
396 for (i=0; i<AMD_NUM_PMU_COUNTERS; i++) {
397 AMD_CTRL_READ(control, i);
398 if((((control.l>>AMD_EVENT_BIT) & 0xff) == event) &&
399 (((control.l>>AMD_UMASK_BIT) & 0xff) == mask)) {
408 static uint64_t amd_get_value(v3_pmon_ctr_t ctr) {
415 case V3_PMON_CLOCK_COUNT:
416 if((ctr_idx = amd_find_idx(AMD_CLK_NOT_HALTED, 0x0)) >= 0) {
417 AMD_CTR_READ(count, ctr_idx);
419 goto AMD_READ_FAILED;
422 case V3_PMON_RETIRED_INST_COUNT:
423 if((ctr_idx = amd_find_idx(AMD_RETIRED_INSTRUCTIONS, 0x0)) >= 0) {
424 AMD_CTR_READ(count, ctr_idx);
426 goto AMD_READ_FAILED;
429 case V3_PMON_MEM_LOAD_COUNT:
430 if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_ACCESSES, 0x0)) >= 0) {
431 AMD_CTR_READ(count, ctr_idx);
433 goto AMD_READ_FAILED;
436 case V3_PMON_MEM_STORE_COUNT:
437 if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_ACCESSES, 0x0)) >= 0) {
438 AMD_CTR_READ(count, ctr_idx);
440 goto AMD_READ_FAILED;
443 case V3_PMON_CACHE_MISS_COUNT:
444 if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_MISSES, 0x0)) >= 0) {
445 AMD_CTR_READ(count, ctr_idx);
447 goto AMD_READ_FAILED;
450 case V3_PMON_TLB_MISS_COUNT:
451 if((ctr_idx = amd_find_idx(AMD_L1_DTLB_AND_L2_DTLB_MISS, 0x7)) >= 0) {
452 AMD_CTR_READ(count, ctr_idx);
454 goto AMD_READ_FAILED;
459 return (uint64_t)count.q;
465 static int amd_start_tracking(v3_pmon_ctr_t ctr) {
470 case V3_PMON_CLOCK_COUNT:
471 if((ctr_idx = amd_get_slot()) >= 0) {
472 AMD_CTRL_START(AMD_CLK_NOT_HALTED, 0x0, ctr_idx);
474 goto AMD_START_FAILED;
477 case V3_PMON_RETIRED_INST_COUNT:
478 if((ctr_idx = amd_get_slot()) >= 0) {
479 AMD_CTRL_START(AMD_RETIRED_INSTRUCTIONS, 0x0, ctr_idx);
481 goto AMD_START_FAILED;
484 case V3_PMON_MEM_LOAD_COUNT:
485 if((ctr_idx = amd_get_slot()) >= 0) {
486 AMD_CTRL_START(AMD_DATA_CACHE_ACCESSES, 0x0, ctr_idx);
488 goto AMD_START_FAILED;
491 case V3_PMON_MEM_STORE_COUNT:
492 if((ctr_idx = amd_get_slot()) >= 0) {
493 AMD_CTRL_START(AMD_DATA_CACHE_ACCESSES, 0x0, ctr_idx);
495 goto AMD_START_FAILED;
498 case V3_PMON_CACHE_MISS_COUNT:
499 if((ctr_idx = amd_get_slot()) >= 0) {
500 AMD_CTRL_START(AMD_DATA_CACHE_MISSES, 0x0, ctr_idx);
502 goto AMD_START_FAILED;
505 case V3_PMON_TLB_MISS_COUNT:
506 if((ctr_idx = amd_get_slot()) >= 0) {
507 AMD_CTRL_START(AMD_L1_DTLB_AND_L2_DTLB_MISS, 0x7, ctr_idx);
509 goto AMD_START_FAILED;
517 ERROR("ERROR: no more slot remains for pmon events\n");
522 static int amd_stop_tracking(v3_pmon_ctr_t ctr) {
528 case V3_PMON_CLOCK_COUNT:
529 if((ctr_idx = amd_find_idx(AMD_CLK_NOT_HALTED, 0x0)) >= 0) {
530 AMD_CTRL_STOP(ctr_idx);
532 goto AMD_STOP_FAILED;
535 case V3_PMON_RETIRED_INST_COUNT:
536 if((ctr_idx = amd_find_idx(AMD_RETIRED_INSTRUCTIONS, 0x0)) >= 0) {
537 AMD_CTRL_STOP(ctr_idx);
539 goto AMD_STOP_FAILED;
542 case V3_PMON_MEM_LOAD_COUNT:
543 if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_ACCESSES, 0x0)) >= 0) {
544 AMD_CTRL_STOP(ctr_idx);
546 goto AMD_STOP_FAILED;
549 case V3_PMON_MEM_STORE_COUNT:
550 if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_ACCESSES, 0x0)) >= 0) {
551 AMD_CTRL_STOP(ctr_idx);
553 goto AMD_STOP_FAILED;
556 case V3_PMON_CACHE_MISS_COUNT:
557 if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_MISSES, 0x0)) >= 0) {
558 AMD_CTRL_STOP(ctr_idx);
560 goto AMD_STOP_FAILED;
563 case V3_PMON_TLB_MISS_COUNT:
564 if((ctr_idx = amd_find_idx(AMD_L1_DTLB_AND_L2_DTLB_MISS, 0x7)) >= 0) {
565 AMD_CTRL_STOP(ctr_idx);
567 goto AMD_STOP_FAILED;
575 ERROR("ERROR: no more slot remains for pmon events\n");
580 static void amd_pmu_init(void) {
586 if ((get_cpu_var(pmu_refcount)++) > 1) {
587 put_cpu_var(pmu_refcount);
588 // only the first init clears the pmu
591 put_cpu_var(pmu_refcount);
595 // initialize variables
599 * clean up programmable counter control
601 for (i=0; i<AMD_NUM_PMU_CONTROLS; i++) {
602 AMD_CTRL_WRITE(control, i);
606 static void amd_pmu_deinit(void) {
607 if ((get_cpu_var(pmu_refcount)--)==0) {
608 put_cpu_var(pmu_refcount);
612 put_cpu_var(pmu_refcount);
616 static struct v3_pmu_iface palacios_pmu_intel = {
617 .init = intel_pmu_init,
618 .deinit = intel_pmu_deinit,
619 .start_tracking = intel_start_tracking,
620 .stop_tracking = intel_stop_tracking,
621 .get_value = intel_get_value
624 static struct v3_pmu_iface palacios_pmu_amd = {
625 .init = amd_pmu_init,
626 .deinit = amd_pmu_deinit,
627 .start_tracking = amd_start_tracking,
628 .stop_tracking = amd_stop_tracking,
629 .get_value = amd_get_value
632 static int pmu_init( void ) {
634 INFO("Intel PMU featureset detected\n");
635 V3_Init_PMU(&palacios_pmu_intel);
636 } else if (is_amd()) {
637 INFO("AMD PMU featureset detected\n");
638 V3_Init_PMU(&palacios_pmu_amd);
640 ERROR("This is neither an Intel nor AMD machine - No PMU functionality configured\n");
647 static int pmu_deinit(void)
654 static struct linux_ext pmu_ext = {
657 .deinit = pmu_deinit,
662 register_extension(&pmu_ext);