--- /dev/null
+// Intel Utility Functions
+
+/*
+ * defines
+ */
+
+#define INTEL_IDX_INST_IN_FPMU 0
+#define INTEL_IDX_CLK_IN_FPMU 2
+
+/*
+ * REFERENCE CPU MODEL:
+ * Intel Xeon E5620 (family_cpuid: 06_1CH)
+ * Architectural Performance Monitoring Version: 3
+ * Number of general-purpose performance counters: 4
+ *
+ */
+
+#define INTEL_NUM_PMU_COUNTERS 4
+#define INTEL_NUM_PMU_CONTROLS 4
+#define INTEL_NUM_FIXED_PMU_COUNTERS 3
+#define INTEL_NUM_PMU_GLOBAL 3
+
+/*
+ * NOTICE currently there are 7 counters in a total (IA32_PMC0-3 + IA32_FIXED_CTR0-2)
+ * but, only 4 of those are being used if you will
+ * since hard-programmed now for CPI and LLCache Miss Rate are only events that are requested
+ *
+ * define NUM_USED_COUNTERS 4 at vmm_pmu.h
+ */
+
+/*
+ * MSR OFFSETS FOR PMU RELATED:
+ */
+
+#define INTEL_IA32_PMC0 0xc1
+#define INTEL_IA32_PMC1 0xc2
+#define INTEL_IA32_PMC2 0xc3
+#define INTEL_IA32_PMC3 0xc4
+
+#define INTEL_IA32_PERFEVTSEL0 0x186
+#define INTEL_IA32_PERFEVTSEL1 0x187
+#define INTEL_IA32_PERFEVTSEL2 0x188
+#define INTEL_IA32_PERFEVTSEL3 0x189
+
+/*
+ * 0x309 INTEL_IA32_FIXED_CTR0: counts Instr_Retired.Any
+ * 0x30A INTEL_IA32_FIXED_CTR1: counts CPU_CLK_Unhalted.Core
+ * 0x30B INTEL_IA32_FIXED_CTR2: counts CPU_CLK_Unhalted.Ref
+ */
+#define INTEL_IA32_FIXED_CTR0 0x309
+#define INTEL_IA32_FIXED_CTR1 0x30a
+#define INTEL_IA32_FIXED_CTR2 0x30b
+
+#define INTEL_IA32_FIXED_CTR_CTRL 0x38d
+
+#define INTEL_IA32_PERF_GLOBAL_STATUS 0x38e
+#define INTEL_IA32_PERF_GLOBAL_CTRL 0x38f
+#define INTEL_IA32_PERF_GLOBAL_OVF_CTRL 0x390
+
+#define INTEL_IA32_PERF_GLOBAL_STATUS_ORDER 0
+#define INTEL_IA32_PERF_GLOBAL_CTRL_ORDER 1
+#define INTEL_IA32_PERF_GLOBAL_OVF_CTRL_ORDER 2
+
+// bit for relevant configs for PEREVTSEL (perf event selection)
+#define INTEL_USR_BIT 16
+#define INTEL_OS_BIT 17
+#define INTEL_EDGE_BIT 18
+#define INTEL_PIN_BIT 19
+#define INT_BIT 20
+#define INTEL_ANY_BIT 21
+#define INTEL_EN_BIT 22
+#define INTEL_INV_BIT 23
+#define INTEL_CMASK_BIT 24
+#define INTEL_UMASK_BIT 8
+#define INTEL_EVENT_BIT 0
+
+
+
+/*
+ * SOME MACROS
+ */
+
+#define INTEL_MSR_OFFSET_PERF(val) (val & 0x3)
+
+#define INTEL_CTR_READ(msrs, c) do {rdmsrl((INTEL_IA32_PMC0 + (c)), (msrs).q);} while (0)
+#define INTEL_CTR_WRITE(msrs, c) do {wrmsrl((INTEL_IA32_PMC0 + (c)), (msrs).q);} while (0)
+
+#define INTEL_FIXED_CTR_READ(msrs, c) do {rdmsrl((INTEL_IA32_FIXED_CTR0 + (c)), (msrs).q);} while (0)
+#define INTEL_FIXED_CTR_WRITE(msrs, c) do {wrmsrl((INTEL_IA32_FIXED_CTR0 + (c)), (msrs).q);} while (0)
+
+#define INTEL_CTRL_READ(msrs, c) do {rdmsrl((INTEL_IA32_PERFEVTSEL0 + (c)), (msrs).q);} while (0)
+#define INTEL_CTRL_WRITE(msrs, c) do {wrmsrl((INTEL_IA32_PERFEVTSEL0 + (c)), (msrs).q);} while (0)
+
+// given even and mask, make it to track it on all ring levels
+#define INTEL_CTRL_START(event, mask, i) \
+({ \
+ uint64_t tmp = 0x0; \
+ tmp |= (mask)<<INTEL_UMASK_BIT; \
+ tmp |= (event)<<INTEL_EVENT_BIT; \
+ tmp |= 0x3<<INTEL_USR_BIT; \
+ tmp |= 0x1<<INTEL_EN_BIT; \
+ wrmsrl((INTEL_IA32_PERFEVTSEL0 + (i)), tmp); \
+ wrmsrl((INTEL_IA32_PMC0 + (i)), 0x0); \
+})
+
+#define INTEL_CTRL_STOP(i) do { wrmsrl((INTEL_IA32_PERFEVTSEL0 + (i)), 0x0); } while(0) \
+
+#define INTEL_FIXED_CTRL_READ(msrs) do {rdmsrl(INTEL_IA32_FIXED_CTR_CTRL, (msrs).q);} while (0)
+#define INTEL_FIXED_CTRL_WRITE(msrs) do {wrmsrl(INTEL_IA32_FIXED_CTR_CTRL, (msrs).q);} while (0)
+
+/*
+ * SELECTED PMU EVENTS AND UMASKS
+ * Intel 64 and IA-32 Arthitectures Software Developer's Manual, Jan 2013
+ * Chap 19 Performance-Monitoring Events
+ */
+
+// CLK and INSTRUCTIONS events
+#define INTEL_CLK_NOT_HALTED 0x3C // event
+#define INTEL_RETIRED_INSTRUCTIONS 0xC0 // event
+
+// MEM INST events
+#define INTEL_MEM_INST_RETIRED 0x0B // event
+ #define INTEL_LOADS 0x1 // umask
+ #define INTEL_STORES 0x2 // umask
+
+// MEM LOAD events and umasks
+#define INTEL_MEM_LOAD_RETIRED 0xCB // event
+ #define INTEL_L1D_HIT 0x1 // umask
+ #define INTEL_L2_HIT 0x2 // umask
+ #define INTEL_L3_UNSHARED_HIT 0x4 // umask
+ #define INTEL_OTHER_CORE_L2_HIT_HITM 0x8 // umask
+ #define INTEL_L3_MISS 0x10 // umask
+ #define INTEL_HIT_LFB 0x40 // umask
+ #define INTEL_DTLB_MISS 0x80 // umask
+
+
+
--- /dev/null
+/*
+ * PMU
+ * (c) Chang S. Bae, 2013
+ */
+
+#include <linux/cdev.h>
+#include <linux/errno.h>
+#include <asm/msr.h>
+#include <asm/msr-index.h>
+
+#include <palacios/vmm_types.h>
+#include <palacios/vmm_util.h>
+#include <interfaces/vmm_pmu.h>
+
+#include "vm.h"
+#include "palacios.h"
+#include "iface-pmu-intel.h"
+#include "iface-pmu-amd.h"
+#include "util-queue.h"
+#include "linux-exts.h"
+
+
+// Number of inits/deinits we have seen (inc on init, dec on deinit)
+// This is per CPU - init/deinit mean init/deinit PMU
+// tracking ON THE CURRENT CORE
+static DEFINE_PER_CPU(u32, pmu_refcount) = 0;
+
+
+/*
+ * some macros may be commonly used
+ */
+#define MSR_READ(msrs, c) do {rdmsrl((c), (msrs).q);} while (0)
+#define MSR_WRITE(msrs, c) do {wrmsrl((c), (msrs).q);} while (0)
+#define SET_BIT(val, i) ((val) |= (1 << i))
+#define CLEAR_BIT(val, u, i) ((val) &= ~((u&1) << i))
+#define SET_BYTE(val, u, i) ((val) |= ((u&255) << i))
+#define CHECK_BIT(val, i) ((val) & (1U << i))
+
+
+static inline void cpuid_string(u32 id, u32 dest[4]) {
+ asm volatile("cpuid"
+ :"=a"(dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3))
+ :"a"(id));
+}
+
+
+static int get_cpu_vendor(char name[13])
+{
+ u32 dest[4];
+ u32 maxid;
+
+ cpuid_string(0,dest);
+ maxid=dest[0];
+ ((u32*)name)[0]=dest[1];
+ ((u32*)name)[1]=dest[3];
+ ((u32*)name)[2]=dest[2];
+ name[12]=0;
+
+ return maxid;
+}
+
+static int is_intel(void)
+{
+ char name[13];
+ get_cpu_vendor(name);
+ return !strcmp(name,"GenuineIntel");
+}
+
+static int is_amd(void)
+{
+ char name[13];
+ get_cpu_vendor(name);
+ return !strcmp(name,"AuthenticAMD");
+}
+
+
+
+/*
+ * AMD and Intel implementations are distinguished by prefix: INTEL or AMD
+ */
+
+/*
+ * name: *_get_slot
+ * description: check available slots in pmu
+ * return: -1 if none, else returns index: 0 ... 3
+ */
+
+static int intel_get_slot(void) {
+
+ int i, slot;
+ struct msr control;
+
+ slot = -1;
+ control.q = 0x0;
+
+ for (i=0; i<INTEL_NUM_PMU_CONTROLS; i++) {
+ INTEL_CTRL_READ(control, i);
+ if(control.q & (0x1<<INTEL_EN_BIT)) {
+ continue;
+ } else {
+ slot = i;
+ break;
+ }
+ }
+
+ return slot;
+}
+
+static int amd_get_slot(void) {
+ int i, slot;
+ struct msr control;
+
+ slot = -1;
+ control.q = 0x0;
+
+ for (i=0; i<AMD_NUM_PMU_CONTROLS; i++) {
+ AMD_CTRL_READ(control, i);
+ if(control.q & (0x1<<AMD_EN_BIT)) {
+ continue;
+ } else {
+ slot = i;
+ break;
+ }
+ }
+
+ return slot;
+ return -1;
+}
+
+/*
+ * name: *_find_idx
+ * description: find index of pmu register that is available
+ */
+static int intel_find_idx(uint8_t event, uint8_t mask) {
+ int i;
+
+ struct msr control;
+
+ control.q = 0x0;
+
+ for (i=0; i<INTEL_NUM_PMU_COUNTERS; i++) {
+ INTEL_CTRL_READ(control, i);
+ if((((control.l>>INTEL_EVENT_BIT) & 0xff) == event) &&
+ (((control.l>>INTEL_UMASK_BIT) & 0xff) == mask)) {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+
+/*
+ * following implementations : init, deinit, start_tracking, stop_track and get_value
+ * specifically fit into the pmu interface
+ */
+
+static uint64_t intel_get_value(v3_pmon_ctr_t ctr) {
+ /*
+ * local variables
+ */
+ int ctr_idx;
+ struct msr count;
+
+ count.q = 0x0;
+
+ switch(ctr) {
+ case V3_PMON_CLOCK_COUNT:
+ INTEL_FIXED_CTR_READ(count, INTEL_IDX_CLK_IN_FPMU);
+ break;
+ case V3_PMON_RETIRED_INST_COUNT:
+ INTEL_FIXED_CTR_READ(count, INTEL_IDX_INST_IN_FPMU);
+ break;
+ case V3_PMON_MEM_LOAD_COUNT:
+ if((ctr_idx = intel_find_idx(INTEL_MEM_INST_RETIRED, INTEL_LOADS)) >= 0) {
+ INTEL_CTR_READ(count, ctr_idx);
+ } else {
+ goto INTEL_READ_FAILED;
+ }
+ break;
+ case V3_PMON_MEM_STORE_COUNT:
+ if((ctr_idx = intel_find_idx(INTEL_MEM_INST_RETIRED, INTEL_STORES)) >= 0) {
+ INTEL_CTR_READ(count, ctr_idx);
+ } else {
+ goto INTEL_READ_FAILED;
+ }
+ break;
+ case V3_PMON_CACHE_MISS_COUNT:
+ if((ctr_idx = intel_find_idx(INTEL_MEM_LOAD_RETIRED, INTEL_L3_MISS)) >= 0) {
+ INTEL_CTR_READ(count, ctr_idx);
+ } else {
+ goto INTEL_READ_FAILED;
+ }
+ break;
+ case V3_PMON_TLB_MISS_COUNT:
+ if((ctr_idx = intel_find_idx(INTEL_MEM_LOAD_RETIRED, INTEL_DTLB_MISS)) >= 0) {
+ INTEL_CTR_READ(count, ctr_idx);
+ } else {
+ goto INTEL_READ_FAILED;
+ }
+ break;
+ }
+
+ return (uint64_t)count.q;
+
+ INTEL_READ_FAILED:
+ return 0;
+}
+
+
+static int intel_start_tracking(v3_pmon_ctr_t ctr) {
+ /*
+ * local variables
+ */
+ int ctr_idx;
+ struct msr msrs;
+
+ /*
+ * check if available slot in PMU, except for fixed counters (Intel specific)
+ */
+
+ switch(ctr) {
+ case V3_PMON_CLOCK_COUNT:
+ INTEL_FIXED_CTRL_READ(msrs);
+ msrs.l |= 0x3<<8;
+ INTEL_FIXED_CTRL_WRITE(msrs);
+ break;
+ case V3_PMON_RETIRED_INST_COUNT:
+ INTEL_FIXED_CTRL_READ(msrs);
+ msrs.l |= 0x3;
+ INTEL_FIXED_CTRL_WRITE(msrs);
+ break;
+ case V3_PMON_MEM_LOAD_COUNT:
+ if((ctr_idx = intel_get_slot()) >= 0) {
+ INTEL_CTRL_START(INTEL_MEM_INST_RETIRED, INTEL_LOADS, ctr_idx);
+ } else {
+ goto INTEL_START_FAILED;
+ }
+ break;
+ case V3_PMON_MEM_STORE_COUNT:
+ if((ctr_idx = intel_get_slot()) >= 0) {
+ INTEL_CTRL_START(INTEL_MEM_INST_RETIRED, INTEL_STORES, ctr_idx);
+ } else {
+ goto INTEL_START_FAILED;
+ }
+ break;
+ case V3_PMON_CACHE_MISS_COUNT:
+ if((ctr_idx = intel_get_slot()) >= 0) {
+ INTEL_CTRL_START(INTEL_MEM_LOAD_RETIRED, INTEL_L3_MISS, ctr_idx);
+ } else {
+ goto INTEL_START_FAILED;
+ }
+ break;
+ case V3_PMON_TLB_MISS_COUNT:
+ if((ctr_idx = intel_get_slot()) >= 0) {
+ INTEL_CTRL_START(INTEL_MEM_LOAD_RETIRED, INTEL_DTLB_MISS, ctr_idx);
+ } else {
+ goto INTEL_START_FAILED;
+ }
+ break;
+ }
+
+ return 0;
+
+ INTEL_START_FAILED:
+ ERROR("ERROR: no more slot remains for pmon events\n");
+ return -1;
+}
+
+/*
+ * descript: disabling pmu event counts
+ */
+
+static int intel_stop_tracking(v3_pmon_ctr_t ctr) {
+ /*
+ * local variables
+ */
+ int ctr_idx = -1;
+ struct msr msrs;
+
+ /*
+ * check if available slot in PMU, except
+ */
+
+ switch(ctr) {
+ case V3_PMON_CLOCK_COUNT:
+ INTEL_FIXED_CTRL_READ(msrs);
+ msrs.l &= ~(0xf<<8);
+ INTEL_FIXED_CTRL_WRITE(msrs);
+ break;
+ case V3_PMON_RETIRED_INST_COUNT:
+ INTEL_FIXED_CTRL_READ(msrs);
+ msrs.l &= ~(0xf);
+ INTEL_FIXED_CTRL_WRITE(msrs);
+ break;
+ case V3_PMON_MEM_LOAD_COUNT:
+ if((ctr_idx = intel_find_idx(INTEL_MEM_INST_RETIRED, INTEL_LOADS)) >= 0) {
+ INTEL_CTRL_STOP(ctr_idx);
+ } else {
+ goto INTEL_STOP_FAILED;
+ }
+ break;
+ case V3_PMON_MEM_STORE_COUNT:
+ if((ctr_idx = intel_find_idx(INTEL_MEM_INST_RETIRED, INTEL_STORES)) >= 0) {
+ INTEL_CTRL_STOP(ctr_idx);
+ } else {
+ goto INTEL_STOP_FAILED;
+ }
+ break;
+ case V3_PMON_CACHE_MISS_COUNT:
+ if((ctr_idx = intel_find_idx(INTEL_MEM_LOAD_RETIRED, INTEL_L3_MISS)) >= 0) {
+ INTEL_CTRL_STOP(ctr_idx);
+ } else {
+ goto INTEL_STOP_FAILED;
+ }
+ break;
+ case V3_PMON_TLB_MISS_COUNT:
+ if((ctr_idx = intel_find_idx(INTEL_MEM_LOAD_RETIRED, INTEL_DTLB_MISS)) >= 0) {
+ INTEL_CTRL_STOP(ctr_idx);
+ } else {
+ goto INTEL_STOP_FAILED;
+ }
+ break;
+ }
+
+ return 0;
+
+ INTEL_STOP_FAILED:
+ ERROR("ERROR: no more slot remains for pmon events\n");
+ return -1;
+}
+
+static void intel_pmu_init(void) {
+ int i;
+ struct msr control;
+
+ if ((get_cpu_var(pmu_refcount)++) > 1) {
+ put_cpu_var(pmu_refcount);
+ // only the first init clears the pmu
+ return;
+ }
+ put_cpu_var(pmu_refcount);
+
+
+ control.q=0x0;
+
+ /*
+ * per Intel PMU architecture,
+ * there are two class of counters
+ * fixed ones (3 counters) and programmable ones (4 counters)
+ * events for fixed coutners are determined, so enabling or not is the option
+ * whereas, programmable ones are litterally programmable.
+ */
+
+ /*
+ * enable fixed counters in global
+ */
+ MSR_READ(control, INTEL_IA32_PERF_GLOBAL_CTRL);
+ control.q |= 0x70000000f; // enable fix counters (3 for the intel model)
+ MSR_WRITE(control, INTEL_IA32_PERF_GLOBAL_CTRL);
+
+ /*
+ * disable in fixed counters control
+ */
+
+ INTEL_FIXED_CTRL_WRITE(control);
+
+ /*
+ * clean up programmable counter control
+ */
+ for (i=0; i<INTEL_NUM_PMU_CONTROLS; i++) {
+ INTEL_CTRL_WRITE(control, i);
+ }
+}
+
+static void intel_pmu_deinit(void) {
+ if ((get_cpu_var(pmu_refcount)--)==0) {
+ put_cpu_var(pmu_refcount);
+ // actually deinit
+ }
+ put_cpu_var(pmu_refcount);
+}
+
+
+
+
+
+static int amd_find_idx(uint8_t event, uint8_t mask) {
+ int i;
+
+ struct msr control;
+
+ control.q = 0x0;
+
+ for (i=0; i<AMD_NUM_PMU_COUNTERS; i++) {
+ AMD_CTRL_READ(control, i);
+ if((((control.l>>AMD_EVENT_BIT) & 0xff) == event) &&
+ (((control.l>>AMD_UMASK_BIT) & 0xff) == mask)) {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+
+static uint64_t amd_get_value(v3_pmon_ctr_t ctr) {
+ int ctr_idx;
+ struct msr count;
+
+ count.q = 0x0;
+
+ switch(ctr) {
+ case V3_PMON_CLOCK_COUNT:
+ if((ctr_idx = amd_find_idx(AMD_CLK_NOT_HALTED, 0x0)) >= 0) {
+ AMD_CTR_READ(count, ctr_idx);
+ } else {
+ goto AMD_READ_FAILED;
+ }
+ break;
+ case V3_PMON_RETIRED_INST_COUNT:
+ if((ctr_idx = amd_find_idx(AMD_RETIRED_INSTRUCTIONS, 0x0)) >= 0) {
+ AMD_CTR_READ(count, ctr_idx);
+ } else {
+ goto AMD_READ_FAILED;
+ }
+ break;
+ case V3_PMON_MEM_LOAD_COUNT:
+ if((ctr_idx = amd_find_idx(AMD_PREFETCH_INST_DISPATCHED, AMD_LOAD)) >= 0) {
+ AMD_CTR_READ(count, ctr_idx);
+ } else {
+ goto AMD_READ_FAILED;
+ }
+ break;
+ case V3_PMON_MEM_STORE_COUNT:
+ if((ctr_idx = amd_find_idx(AMD_PREFETCH_INST_DISPATCHED, AMD_STORE)) >= 0) {
+ AMD_CTR_READ(count, ctr_idx);
+ } else {
+ goto AMD_READ_FAILED;
+ }
+ break;
+ case V3_PMON_CACHE_MISS_COUNT:
+ if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_MISSES, 0x0)) >= 0) {
+ AMD_CTR_READ(count, ctr_idx);
+ } else {
+ goto AMD_READ_FAILED;
+ }
+ break;
+ case V3_PMON_TLB_MISS_COUNT:
+ if((ctr_idx = amd_find_idx(AMD_L1_DTLB_AND_L2_DTLB_MISS, 0x0)) >= 0) {
+ AMD_CTR_READ(count, ctr_idx);
+ } else {
+ goto AMD_READ_FAILED;
+ }
+ break;
+ }
+
+ return (uint64_t)count.q;
+
+ AMD_READ_FAILED:
+ return 0;
+}
+
+static int amd_start_tracking(v3_pmon_ctr_t ctr) {
+
+ int ctr_idx;
+
+ switch(ctr) {
+ case V3_PMON_CLOCK_COUNT:
+ if((ctr_idx = amd_get_slot()) >= 0) {
+ AMD_CTRL_START(AMD_CLK_NOT_HALTED, 0x0, ctr_idx);
+ } else {
+ goto AMD_START_FAILED;
+ }
+ break;
+ case V3_PMON_RETIRED_INST_COUNT:
+ if((ctr_idx = amd_get_slot()) >= 0) {
+ AMD_CTRL_START(AMD_RETIRED_INSTRUCTIONS, 0x0, ctr_idx);
+ } else {
+ goto AMD_START_FAILED;
+ }
+ break;
+ case V3_PMON_MEM_LOAD_COUNT:
+ if((ctr_idx = amd_get_slot()) >= 0) {
+ AMD_CTRL_START(AMD_PREFETCH_INST_DISPATCHED, AMD_LOAD, ctr_idx);
+ } else {
+ goto AMD_START_FAILED;
+ }
+ break;
+ case V3_PMON_MEM_STORE_COUNT:
+ if((ctr_idx = amd_get_slot()) >= 0) {
+ AMD_CTRL_START(AMD_PREFETCH_INST_DISPATCHED, AMD_STORE, ctr_idx);
+ } else {
+ goto AMD_START_FAILED;
+ }
+ break;
+ case V3_PMON_CACHE_MISS_COUNT:
+ if((ctr_idx = amd_get_slot()) >= 0) {
+ AMD_CTRL_START(AMD_DATA_CACHE_MISSES, 0x0, ctr_idx);
+ } else {
+ goto AMD_START_FAILED;
+ }
+ break;
+ case V3_PMON_TLB_MISS_COUNT:
+ if((ctr_idx = amd_get_slot()) >= 0) {
+ AMD_CTRL_START(AMD_L1_DTLB_AND_L2_DTLB_MISS, 0x0, ctr_idx);
+ } else {
+ goto AMD_START_FAILED;
+ }
+ break;
+ }
+
+ return 0;
+
+ AMD_START_FAILED:
+ ERROR("ERROR: no more slot remains for pmon events\n");
+ return -1;
+}
+
+
+static int amd_stop_tracking(v3_pmon_ctr_t ctr) {
+
+ int ctr_idx = -1;
+
+
+ switch(ctr) {
+ case V3_PMON_CLOCK_COUNT:
+ if((ctr_idx = amd_find_idx(AMD_CLK_NOT_HALTED, 0x0)) >= 0) {
+ AMD_CTRL_STOP(ctr_idx);
+ } else {
+ goto AMD_STOP_FAILED;
+ }
+ break;
+ case V3_PMON_RETIRED_INST_COUNT:
+ if((ctr_idx = amd_find_idx(AMD_RETIRED_INSTRUCTIONS, 0x0)) >= 0) {
+ AMD_CTRL_STOP(ctr_idx);
+ } else {
+ goto AMD_STOP_FAILED;
+ }
+ break;
+ case V3_PMON_MEM_LOAD_COUNT:
+ if((ctr_idx = amd_find_idx(AMD_PREFETCH_INST_DISPATCHED, AMD_LOAD)) >= 0) {
+ AMD_CTRL_STOP(ctr_idx);
+ } else {
+ goto AMD_STOP_FAILED;
+ }
+ break;
+ case V3_PMON_MEM_STORE_COUNT:
+ if((ctr_idx = amd_find_idx(AMD_PREFETCH_INST_DISPATCHED, AMD_STORE)) >= 0) {
+ AMD_CTRL_STOP(ctr_idx);
+ } else {
+ goto AMD_STOP_FAILED;
+ }
+ break;
+ case V3_PMON_CACHE_MISS_COUNT:
+ if((ctr_idx = amd_find_idx(AMD_DATA_CACHE_MISSES, 0x0)) >= 0) {
+ AMD_CTRL_STOP(ctr_idx);
+ } else {
+ goto AMD_STOP_FAILED;
+ }
+ break;
+ case V3_PMON_TLB_MISS_COUNT:
+ if((ctr_idx = amd_find_idx(AMD_L1_DTLB_AND_L2_DTLB_MISS, 0x0)) >= 0) {
+ AMD_CTRL_STOP(ctr_idx);
+ } else {
+ goto AMD_STOP_FAILED;
+ }
+ break;
+ }
+
+ return 0;
+
+ AMD_STOP_FAILED:
+ ERROR("ERROR: no more slot remains for pmon events\n");
+ return -1;
+}
+
+
+static void amd_pmu_init(void) {
+
+ int i;
+ struct msr control;
+
+
+ if ((get_cpu_var(pmu_refcount)++) > 1) {
+ put_cpu_var(pmu_refcount);
+ // only the first init clears the pmu
+ return;
+ }
+ put_cpu_var(pmu_refcount);
+
+
+
+ // initialize variables
+ control.q=0x0;
+
+ /*
+ * clean up programmable counter control
+ */
+ for (i=0; i<AMD_NUM_PMU_CONTROLS; i++) {
+ AMD_CTRL_WRITE(control, i);
+ }
+}
+
+static void amd_pmu_deinit(void) {
+ if ((get_cpu_var(pmu_refcount)--)==0) {
+ put_cpu_var(pmu_refcount);
+ // actually deinit
+ }
+ put_cpu_var(pmu_refcount);
+}
+
+
+static struct v3_pmu_iface palacios_pmu_intel = {
+ .init = intel_pmu_init,
+ .deinit = intel_pmu_deinit,
+ .start_tracking = intel_start_tracking,
+ .stop_tracking = intel_stop_tracking,
+ .get_value = intel_get_value
+};
+
+static struct v3_pmu_iface palacios_pmu_amd = {
+ .init = amd_pmu_init,
+ .deinit = amd_pmu_deinit,
+ .start_tracking = amd_start_tracking,
+ .stop_tracking = amd_stop_tracking,
+ .get_value = amd_get_value
+};
+
+static int pmu_init( void ) {
+ if (is_intel()) {
+ INFO("Intel PMU featureset detected\n");
+ V3_Init_PMU(&palacios_pmu_intel);
+ } else if (is_amd()) {
+ INFO("AMD PMU featureset detected\n");
+ V3_Init_PMU(&palacios_pmu_amd);
+ } else {
+ ERROR("This is neither an Intel nor AMD machine - No PMU functionality configured\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+
+static struct linux_ext pmu_ext = {
+ .name = "PMU",
+ .init = pmu_init,
+ .deinit = NULL,
+ .guest_init = NULL,
+ .guest_deinit = NULL
+};
+
+register_extension(&pmu_ext);
+
+
+
+