Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


20bf3ec59ac3fc51f80301f62091245dcf380ad5
[palacios.git] / palacios / src / palacios / vmm_pmu_telemetry.c
1 /*
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National
4  * Science Foundation and the Department of Energy.
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2013, The V3VEE Project <http://www.v3vee.org>
11  * All rights reserved.
12  *
13  * Author: Chang S. Bae <chang.bae@eecs.northwestern.edu>
14  *         Peter Dinda <pdinda@northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19 #include <palacios/vm_guest.h>
20 #include <palacios/vmm_telemetry.h>
21 #include <palacios/vmm_pmu_telemetry.h>
22 #include <palacios/vmm_sprintf.h>
23
24 /*
25   We will try to track:
26
27   V3_PMON_RETIRED_INST_COUNT,
28   V3_PMON_CLOCK_COUNT,
29   V3_PMON_MEM_LOAD_COUNT,
30   V3_PMON_MEM_STORE_COUNT,
31   V3_PMON_CACHE_MISS_COUNT,
32   V3_PMON_TLB_MISS_COUNT
33
34   and to derive:
35
36   CPI
37   cache Misses per instruction
38 */
39
40 #define HAVE(WHAT) (info->pmu_telem.active_counters[WHAT])
41
42 #define GUEST(WHAT)  do { if (HAVE(WHAT)) { V3_Print(info->vm_info, info, "%sGUEST:%u:%u:%s = %llu\n", hdr, info->vcpu_id, info->pcpu_id, #WHAT, info->pmu_telem.guest_counts[WHAT]); } } while (0)
43 #define HOST(WHAT)   do { if (HAVE(WHAT)) { V3_Print(info->vm_info, info, "%sHOST:%u:%u:%s = %llu\n", hdr, info->vcpu_id, info->pcpu_id, #WHAT, info->pmu_telem.host_counts[WHAT]); } } while (0)
44
45
46 static int print_pmu_data(struct guest_info *info, char * hdr) 
47 {
48   GUEST(V3_PMON_RETIRED_INST_COUNT);
49   GUEST(V3_PMON_CLOCK_COUNT);
50   GUEST(V3_PMON_MEM_LOAD_COUNT);
51   GUEST(V3_PMON_MEM_STORE_COUNT);
52   GUEST(V3_PMON_CACHE_MISS_COUNT);
53   GUEST(V3_PMON_TLB_MISS_COUNT);
54   V3_Print(info->vm_info, info, "%sGUEST:%u:%u:%s = %llu\n", hdr, info->vcpu_id, info->pcpu_id, "UCPI",info->pmu_telem.guest_ucpi_estimate);
55   V3_Print(info->vm_info, info, "%sGUEST:%u:%u:%s = %llu\n", hdr, info->vcpu_id, info->pcpu_id, "UMPL", info->pmu_telem.guest_umpl_estimate);
56
57   HOST(V3_PMON_RETIRED_INST_COUNT);
58   HOST(V3_PMON_CLOCK_COUNT);
59   HOST(V3_PMON_MEM_LOAD_COUNT);
60   HOST(V3_PMON_MEM_STORE_COUNT);
61   HOST(V3_PMON_CACHE_MISS_COUNT);
62   HOST(V3_PMON_TLB_MISS_COUNT);
63   V3_Print(info->vm_info, info, "%sHOST:%u:%u:%s = %llu\n", hdr, info->vcpu_id, info->pcpu_id, "UCPI",info->pmu_telem.host_ucpi_estimate);
64   V3_Print(info->vm_info, info, "%sHOST:%u:%u:%s = %llu\n", hdr, info->vcpu_id, info->pcpu_id, "UMPL", info->pmu_telem.host_umpl_estimate);
65
66   return 0;
67 }
68
69   
70 static void telemetry_pmu(struct v3_vm_info * vm, void * private_data, char * hdr) 
71 {
72   int i;
73   struct guest_info *core = NULL;
74   
75   /*
76    * work through each pcore (vcore for now per excluding oversubscription) and gathering info
77    */
78   for(i=0; i<vm->num_cores; i++) {
79     core = &(vm->cores[i]);
80     if((core->core_run_state != CORE_RUNNING)) continue;
81     print_pmu_data(core, hdr);
82   }
83 }
84
85
86 #define START(WHAT) \
87 do { \
88   if(v3_pmu_start_tracking(WHAT) == -1) { \
89     PrintError(info->vm_info, info, "Failed to start tracking of %s\n", #WHAT); \
90     info->pmu_telem.active_counters[WHAT]=0; \
91   } else { \
92   info->pmu_telem.active_counters[WHAT]=1;\
93   } \
94  } while (0) 
95
96 #define STOP(WHAT) \
97 do { \
98   if(info->pmu_telem.active_counters[WHAT]) {                           \
99     if (v3_pmu_stop_tracking(WHAT) == -1) {                             \
100       PrintError(info->vm_info, info, "Failed to stop tracking of %s\n", #WHAT); \
101     }                                                                   \
102     info->pmu_telem.active_counters[WHAT]=0; \
103    }       \
104  } while (0) 
105
106
107 void v3_pmu_telemetry_start(struct guest_info *info)
108 {
109   if (!info->vm_info->enable_telemetry) {
110     return;
111   }
112
113   memset(&(info->pmu_telem),0,sizeof(struct v3_core_pmu_telemetry));
114
115   v3_pmu_init();
116   
117   START(V3_PMON_RETIRED_INST_COUNT);
118   START(V3_PMON_CLOCK_COUNT);
119   START(V3_PMON_MEM_LOAD_COUNT);
120   START(V3_PMON_MEM_STORE_COUNT);
121   START(V3_PMON_CACHE_MISS_COUNT);
122   START(V3_PMON_TLB_MISS_COUNT);
123
124
125   info->pmu_telem.state=AWAIT_FIRST_ENTRY;
126
127
128   if (info->vcpu_id==0) { 
129     v3_add_telemetry_cb(info->vm_info, telemetry_pmu, NULL);
130   }
131
132 }
133
134 static void inline snapshot(uint64_t vals[]) {
135   vals[V3_PMON_RETIRED_INST_COUNT] = v3_pmu_get_value(V3_PMON_RETIRED_INST_COUNT); 
136   vals[V3_PMON_CLOCK_COUNT] = v3_pmu_get_value(V3_PMON_CLOCK_COUNT);
137   vals[V3_PMON_MEM_LOAD_COUNT] = v3_pmu_get_value(V3_PMON_MEM_LOAD_COUNT);
138   vals[V3_PMON_MEM_STORE_COUNT] = v3_pmu_get_value(V3_PMON_MEM_STORE_COUNT);
139   vals[V3_PMON_CACHE_MISS_COUNT] = v3_pmu_get_value(V3_PMON_CACHE_MISS_COUNT);
140   vals[V3_PMON_TLB_MISS_COUNT] = v3_pmu_get_value(V3_PMON_TLB_MISS_COUNT);
141 }  
142
143
144 #define ALPHA_DENOM         8  // we are counting in 8ths
145 #define ALPHA_NUM           1  // 1/8 to new value
146 #define OM_ALPHA_NUM        7  // 7/8 to estimate
147
148 static inline void update_ucpi_estimate(uint64_t *estimate, uint64_t counts[], uint64_t last[])
149 {
150   // 1e6 times the number of cycles since last
151   uint64_t ucycles = 1000000 * (counts[V3_PMON_CLOCK_COUNT] - last[V3_PMON_CLOCK_COUNT]);
152   uint64_t insts = counts[V3_PMON_RETIRED_INST_COUNT] - last[V3_PMON_RETIRED_INST_COUNT];
153
154   if (insts==0) { 
155     return;
156   }
157  
158   *estimate = ((ALPHA_NUM * (*estimate)) + (OM_ALPHA_NUM * ((ucycles/insts)))) / ALPHA_DENOM;
159                   
160 }
161
162 static inline void update_umpl_estimate(uint64_t *estimate, uint64_t counts[], uint64_t last[])
163 {
164   // 1e6 times the number of misses since the last time
165   uint64_t umisses = 1000000 * (counts[V3_PMON_CACHE_MISS_COUNT] - last[V3_PMON_CACHE_MISS_COUNT]);
166   uint64_t loads = counts[V3_PMON_MEM_LOAD_COUNT] - last[V3_PMON_MEM_LOAD_COUNT];
167
168   if (loads==0) {
169     return;
170   }
171  
172   *estimate = ((ALPHA_NUM * (*estimate)) + (OM_ALPHA_NUM * ((umisses/loads)))) / ALPHA_DENOM;
173                   
174 }
175
176 void v3_pmu_telemetry_enter(struct guest_info *info)
177 {
178   if (!info->vm_info->enable_telemetry) {
179     return;
180   }
181
182   switch (info->pmu_telem.state) { 
183   case AWAIT_FIRST_ENTRY:
184     snapshot(info->pmu_telem.last_snapshot);
185     info->pmu_telem.state=AWAIT_EXIT;
186     break;
187   
188   case AWAIT_ENTRY: {
189     // AWAIT_ENTRY - the snapshot in the struct is from the last exit
190     uint64_t snap[PMU_NUM_COUNTERS];
191     int i;
192
193     snapshot(snap);
194
195     for (i=0;i<PMU_NUM_COUNTERS;i++) { 
196       info->pmu_telem.host_counts[i] += snap[i] - info->pmu_telem.last_snapshot[i];
197     }
198
199     if (HAVE(V3_PMON_CLOCK_COUNT) && HAVE(V3_PMON_RETIRED_INST_COUNT)) { 
200       update_ucpi_estimate(&(info->pmu_telem.host_ucpi_estimate), info->pmu_telem.host_counts, info->pmu_telem.last_snapshot);
201     }
202     if (HAVE(V3_PMON_CACHE_MISS_COUNT) && HAVE(V3_PMON_MEM_LOAD_COUNT)) { 
203       update_umpl_estimate(&(info->pmu_telem.host_umpl_estimate), info->pmu_telem.host_counts, info->pmu_telem.last_snapshot);
204     }
205
206     for (i=0;i<PMU_NUM_COUNTERS;i++) { 
207       info->pmu_telem.last_snapshot[i] = snap[i];
208     }
209
210     info->pmu_telem.state = AWAIT_EXIT;
211   }
212     break;
213
214   default:
215     PrintError(info->vm_info, info, "Impossible state in on pmu telemetry entry\n");
216     break;
217   }
218   
219 }
220
221
222
223 void v3_pmu_telemetry_exit(struct guest_info *info)
224 {
225   if (!info->vm_info->enable_telemetry) {
226     return;
227   }
228
229   switch (info->pmu_telem.state) { 
230   case AWAIT_EXIT: {
231     // AWAIT_EXIT - the snapshot in the struct is from the last entryx
232     uint64_t snap[PMU_NUM_COUNTERS];
233     int i;
234
235     snapshot(snap);
236
237     for (i=0;i<PMU_NUM_COUNTERS;i++) { 
238       info->pmu_telem.guest_counts[i] += snap[i] - info->pmu_telem.last_snapshot[i];
239     }
240
241     if (HAVE(V3_PMON_CLOCK_COUNT) && HAVE(V3_PMON_RETIRED_INST_COUNT)) { 
242       update_ucpi_estimate(&(info->pmu_telem.guest_ucpi_estimate), info->pmu_telem.guest_counts, info->pmu_telem.last_snapshot);
243     }
244     if (HAVE(V3_PMON_CACHE_MISS_COUNT) && HAVE(V3_PMON_MEM_LOAD_COUNT)) { 
245       update_umpl_estimate(&(info->pmu_telem.guest_umpl_estimate), info->pmu_telem.guest_counts, info->pmu_telem.last_snapshot);
246     }
247
248     for (i=0;i<PMU_NUM_COUNTERS;i++) { 
249       info->pmu_telem.last_snapshot[i] = snap[i];
250     }
251
252     info->pmu_telem.state = AWAIT_ENTRY;
253   }
254     break;
255   default:
256     PrintError(info->vm_info, info, "Impossible state in on pmu telemetry exit\n");
257     break;
258   }
259   
260 }
261
262 void v3_pmu_telemetry_end(struct guest_info *info)
263 {
264   if (!info->vm_info->enable_telemetry) {
265     return;
266   }
267
268   STOP(V3_PMON_RETIRED_INST_COUNT);
269   STOP(V3_PMON_CLOCK_COUNT);
270   STOP(V3_PMON_MEM_LOAD_COUNT);
271   STOP(V3_PMON_MEM_STORE_COUNT);
272   STOP(V3_PMON_CACHE_MISS_COUNT);
273   STOP(V3_PMON_TLB_MISS_COUNT);
274
275   v3_pmu_deinit();
276   
277   info->pmu_telem.state=AWAIT_FIRST_ENTRY;
278
279   // Umm.... there is no v3_remove_telemtry_cb ?  WTF? 
280 }