Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Cleanup and sanity-checking of before/after null-check and copy+paste errors (Coverit...
[palacios.git] / palacios / src / palacios / vmm_pmu_telemetry.c
1 /*
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National
4  * Science Foundation and the Department of Energy.
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2013, The V3VEE Project <http://www.v3vee.org>
11  * All rights reserved.
12  *
13  * Author: Chang S. Bae <chang.bae@eecs.northwestern.edu>
14  *         Peter Dinda <pdinda@northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19 #include <palacios/vm_guest.h>
20 #include <palacios/vmm_telemetry.h>
21 #include <palacios/vmm_pmu_telemetry.h>
22 #include <palacios/vmm_sprintf.h>
23
24 /*
25   We will try to track:
26
27   V3_PMON_RETIRED_INST_COUNT,
28   V3_PMON_CLOCK_COUNT,
29   V3_PMON_MEM_LOAD_COUNT,
30   V3_PMON_MEM_STORE_COUNT,
31   V3_PMON_CACHE_MISS_COUNT,
32   V3_PMON_TLB_MISS_COUNT
33
34   and to derive:
35
36   CPI
37   cache Misses per instruction
38 */
39
40 #define HAVE(WHAT) (info->pmu_telem.active_counters[WHAT])
41
42 #define GUEST(WHAT)  do { if (HAVE(WHAT)) { V3_Print(info->vm_info, info, "%sGUEST:%u:%u:%s = %llu\n", hdr, info->vcpu_id, info->pcpu_id, #WHAT, info->pmu_telem.guest_counts[WHAT]); } } while (0)
43 #define HOST(WHAT)   do { if (HAVE(WHAT)) { V3_Print(info->vm_info, info, "%sHOST:%u:%u:%s = %llu\n", hdr, info->vcpu_id, info->pcpu_id, #WHAT, info->pmu_telem.host_counts[WHAT]); } } while (0)
44
45
46 static int print_pmu_data(struct guest_info *info, char * hdr) 
47 {
48   GUEST(V3_PMON_RETIRED_INST_COUNT);
49   GUEST(V3_PMON_CLOCK_COUNT);
50   GUEST(V3_PMON_MEM_LOAD_COUNT);
51   GUEST(V3_PMON_MEM_STORE_COUNT);
52   GUEST(V3_PMON_CACHE_MISS_COUNT);
53   GUEST(V3_PMON_TLB_MISS_COUNT);
54   V3_Print(info->vm_info, info, "%sGUEST:%u:%u:%s = %llu\n", hdr, info->vcpu_id, info->pcpu_id, "UCPI",info->pmu_telem.guest_ucpi_estimate);
55   V3_Print(info->vm_info, info, "%sGUEST:%u:%u:%s = %llu\n", hdr, info->vcpu_id, info->pcpu_id, "UMPL", info->pmu_telem.guest_umpl_estimate);
56
57   HOST(V3_PMON_RETIRED_INST_COUNT);
58   HOST(V3_PMON_CLOCK_COUNT);
59   HOST(V3_PMON_MEM_LOAD_COUNT);
60   HOST(V3_PMON_MEM_STORE_COUNT);
61   HOST(V3_PMON_CACHE_MISS_COUNT);
62   HOST(V3_PMON_TLB_MISS_COUNT);
63   V3_Print(info->vm_info, info, "%sHOST:%u:%u:%s = %llu\n", hdr, info->vcpu_id, info->pcpu_id, "UCPI",info->pmu_telem.host_ucpi_estimate);
64   V3_Print(info->vm_info, info, "%sHOST:%u:%u:%s = %llu\n", hdr, info->vcpu_id, info->pcpu_id, "UMPL", info->pmu_telem.host_umpl_estimate);
65
66   return 0;
67 }
68
69   
70 static void telemetry_pmu(struct v3_vm_info * vm, void * private_data, char * hdr) 
71 {
72   int i;
73   struct guest_info *core = NULL;
74   
75   /*
76    * work through each pcore (vcore for now per excluding oversubscription) and gathering info
77    */
78   for(i=0; i<vm->num_cores; i++) {
79     core = &(vm->cores[i]);
80     if((core->core_run_state != CORE_RUNNING)) continue;
81     print_pmu_data(core, hdr);
82   }
83 }
84
85
86 #define START(WHAT) \
87 do { \
88   if(v3_pmu_start_tracking(WHAT) == -1) { \
89     PrintError(info->vm_info, info, "Failed to start tracking of %s\n", #WHAT); \
90     info->pmu_telem.active_counters[WHAT]=0; \
91   } else { \
92   info->pmu_telem.active_counters[WHAT]=1;\
93   } \
94  } while (0) 
95
96 #define STOP(WHAT) \
97 do { \
98   if(info->pmu_telem.active_counters[WHAT]) {                           \
99     if (v3_pmu_stop_tracking(WHAT) == -1) {                             \
100       PrintError(info->vm_info, info, "Failed to stop tracking of %s\n", #WHAT); \
101     }                                                                   \
102     info->pmu_telem.active_counters[WHAT]=0; \
103    }       \
104  } while (0) 
105
106
107 void v3_pmu_telemetry_start(struct guest_info *info)
108 {
109   if (!info->vm_info->enable_telemetry) {
110     return;
111   }
112
113   memset(&(info->pmu_telem),0,sizeof(struct v3_core_pmu_telemetry));
114
115   v3_pmu_init();
116   
117   START(V3_PMON_RETIRED_INST_COUNT);
118   START(V3_PMON_CLOCK_COUNT);
119   START(V3_PMON_MEM_LOAD_COUNT);
120   START(V3_PMON_MEM_STORE_COUNT);
121   START(V3_PMON_CACHE_MISS_COUNT);
122   START(V3_PMON_TLB_MISS_COUNT);
123
124
125   info->pmu_telem.state=AWAIT_FIRST_ENTRY;
126
127
128   if (info->vcpu_id==0) { 
129     v3_add_telemetry_cb(info->vm_info, telemetry_pmu, NULL);
130   }
131
132 }
133
134 static void inline snapshot(uint64_t vals[]) {
135   vals[V3_PMON_RETIRED_INST_COUNT] = v3_pmu_get_value(V3_PMON_RETIRED_INST_COUNT); 
136   vals[V3_PMON_CLOCK_COUNT] = v3_pmu_get_value(V3_PMON_CLOCK_COUNT);
137   vals[V3_PMON_MEM_LOAD_COUNT] = v3_pmu_get_value(V3_PMON_MEM_LOAD_COUNT);
138   vals[V3_PMON_MEM_STORE_COUNT] = v3_pmu_get_value(V3_PMON_MEM_STORE_COUNT);
139   vals[V3_PMON_CACHE_MISS_COUNT] = v3_pmu_get_value(V3_PMON_CACHE_MISS_COUNT);
140   vals[V3_PMON_TLB_MISS_COUNT] = v3_pmu_get_value(V3_PMON_TLB_MISS_COUNT);
141 }  
142
143
144 #define ALPHA_DENOM         8  // we are counting in 8ths
145 #define ALPHA_NUM           1  // 1/8 to estimate
146 #define OM_ALPHA_NUM        7  // 7/8 to the update
147
148 static inline void update_ucpi_estimate(uint64_t *estimate, uint64_t cur[], uint64_t last[])
149 {
150   // 1e6 times the number of cycles since last
151   uint64_t ucycles = 1000000 * (cur[V3_PMON_CLOCK_COUNT] - last[V3_PMON_CLOCK_COUNT]);
152   uint64_t insts = cur[V3_PMON_RETIRED_INST_COUNT] - last[V3_PMON_RETIRED_INST_COUNT];
153
154   if (insts==0) { 
155     return;
156   }
157
158
159   *estimate = ((ALPHA_NUM * (*estimate)) + (OM_ALPHA_NUM * ((ucycles/insts)))) / ALPHA_DENOM;
160                   
161 }
162
163 static inline void update_umpl_estimate(uint64_t *estimate, uint64_t cur[], uint64_t last[])
164 {
165   // 1e6 times the number of misses since the last time
166   uint64_t umisses = 1000000 * (cur[V3_PMON_CACHE_MISS_COUNT] - last[V3_PMON_CACHE_MISS_COUNT]);
167   uint64_t loads = cur[V3_PMON_MEM_LOAD_COUNT] - last[V3_PMON_MEM_LOAD_COUNT];
168
169   if (loads==0) {
170     return;
171   }
172  
173   *estimate = ((ALPHA_NUM * (*estimate)) + (OM_ALPHA_NUM * ((umisses/loads)))) / ALPHA_DENOM;
174                   
175 }
176
177 void v3_pmu_telemetry_enter(struct guest_info *info)
178 {
179   if (!info->vm_info->enable_telemetry) {
180     return;
181   }
182
183   switch (info->pmu_telem.state) { 
184   case AWAIT_FIRST_ENTRY:
185     snapshot(info->pmu_telem.last_snapshot);
186     info->pmu_telem.state=AWAIT_EXIT;
187     break;
188   
189   case AWAIT_ENTRY: {
190     // AWAIT_ENTRY - the snapshot in the struct is from the last exit
191     uint64_t snap[PMU_NUM_COUNTERS];
192     int i;
193
194     snapshot(snap);
195
196     for (i=0;i<PMU_NUM_COUNTERS;i++) { 
197       info->pmu_telem.host_counts[i] += snap[i] - info->pmu_telem.last_snapshot[i];
198     }
199
200     if (HAVE(V3_PMON_CLOCK_COUNT) && HAVE(V3_PMON_RETIRED_INST_COUNT)) { 
201       update_ucpi_estimate(&(info->pmu_telem.host_ucpi_estimate), snap, info->pmu_telem.last_snapshot);
202     }
203     if (HAVE(V3_PMON_CACHE_MISS_COUNT) && HAVE(V3_PMON_MEM_LOAD_COUNT)) { 
204       update_umpl_estimate(&(info->pmu_telem.host_umpl_estimate), snap, info->pmu_telem.last_snapshot);
205     }
206
207     for (i=0;i<PMU_NUM_COUNTERS;i++) { 
208       info->pmu_telem.last_snapshot[i] = snap[i];
209     }
210
211     info->pmu_telem.state = AWAIT_EXIT;
212   }
213     break;
214
215   default:
216     PrintError(info->vm_info, info, "Impossible state in on pmu telemetry entry\n");
217     break;
218   }
219   
220 }
221
222
223
224 void v3_pmu_telemetry_exit(struct guest_info *info)
225 {
226   if (!info->vm_info->enable_telemetry) {
227     return;
228   }
229
230   switch (info->pmu_telem.state) { 
231   case AWAIT_EXIT: {
232     // AWAIT_EXIT - the snapshot in the struct is from the last entryx
233     uint64_t snap[PMU_NUM_COUNTERS];
234     int i;
235
236     snapshot(snap);
237
238     for (i=0;i<PMU_NUM_COUNTERS;i++) { 
239       info->pmu_telem.guest_counts[i] += snap[i] - info->pmu_telem.last_snapshot[i];
240     }
241
242     if (HAVE(V3_PMON_CLOCK_COUNT) && HAVE(V3_PMON_RETIRED_INST_COUNT)) { 
243       update_ucpi_estimate(&(info->pmu_telem.guest_ucpi_estimate), snap, info->pmu_telem.last_snapshot);
244     } 
245
246     if (HAVE(V3_PMON_CACHE_MISS_COUNT) && HAVE(V3_PMON_MEM_LOAD_COUNT)) { 
247       update_umpl_estimate(&(info->pmu_telem.guest_umpl_estimate), snap, info->pmu_telem.last_snapshot);
248     }
249
250     for (i=0;i<PMU_NUM_COUNTERS;i++) { 
251       info->pmu_telem.last_snapshot[i] = snap[i];
252     }
253
254     info->pmu_telem.state = AWAIT_ENTRY;
255   }
256     break;
257   default:
258     PrintError(info->vm_info, info, "Impossible state in on pmu telemetry exit\n");
259     break;
260   }
261   
262 }
263
264 void v3_pmu_telemetry_end(struct guest_info *info)
265 {
266   if (!info->vm_info->enable_telemetry) {
267     return;
268   }
269
270   STOP(V3_PMON_RETIRED_INST_COUNT);
271   STOP(V3_PMON_CLOCK_COUNT);
272   STOP(V3_PMON_MEM_LOAD_COUNT);
273   STOP(V3_PMON_MEM_STORE_COUNT);
274   STOP(V3_PMON_CACHE_MISS_COUNT);
275   STOP(V3_PMON_TLB_MISS_COUNT);
276
277   v3_pmu_deinit();
278   
279   info->pmu_telem.state=AWAIT_FIRST_ENTRY;
280
281   // Umm.... there is no v3_remove_telemtry_cb ?  WTF? 
282 }