Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Better support for dynamic changes to page event callbacks
[palacios.git] / palacios / src / palacios / vmm_shadow_paging.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/vmm_shadow_paging.h>
22
23
24 #include <palacios/vmm.h>
25 #include <palacios/vm_guest_mem.h>
26 #include <palacios/vmm_decoder.h>
27 #include <palacios/vmm_ctrl_regs.h>
28
29 #include <palacios/vmm_hashtable.h>
30
31 #include <palacios/vmm_direct_paging.h>
32
33
34
35
36 #ifdef V3_CONFIG_SHADOW_PAGING_TELEMETRY
37 #include <palacios/vmm_telemetry.h>
38 #endif
39
40 #ifdef V3_CONFIG_SYMBIOTIC_SWAP
41 #include <palacios/vmm_sym_swap.h>
42 #endif
43
44 #ifndef V3_CONFIG_DEBUG_SHADOW_PAGING
45 #undef PrintDebug
46 #define PrintDebug(fmt, args...)
47 #endif
48
49
50 static const char default_strategy[] = "VTLB";
51
52
53 static struct hashtable * master_shdw_pg_table = NULL;
54
55
56 struct event_callback {
57     int (*callback)(struct guest_info *core, struct v3_shdw_pg_event *event, void *priv_data);
58     void *priv_data;
59
60     struct list_head node;
61 };
62
63 static uint_t shdw_pg_hash_fn(addr_t key) {
64     char * name = (char *)key;
65     return v3_hash_buffer((uint8_t *)name, strlen(name));
66 }
67
68 static int shdw_pg_eq_fn(addr_t key1, addr_t key2) {
69     char * name1 = (char *)key1;
70     char * name2 = (char *)key2;
71
72     return (strcmp(name1, name2) == 0);
73 }
74
75 static int have_callbacks(struct guest_info *core)
76 {
77     // lock acquistion unnecessary
78     // caller will acquire the lock before *iterating* through the list
79     // so any race will be resolved then
80     return !list_empty(&(core->vm_info->shdw_impl.event_callback_list));
81 }
82
83 static void dispatch_event(struct guest_info *core, struct v3_shdw_pg_event *event)
84 {
85     struct event_callback *cb,*temp;
86
87     v3_read_lock(&(core->vm_info->shdw_impl.event_callback_lock));
88
89     list_for_each_entry_safe(cb,
90                              temp,
91                              &(core->vm_info->shdw_impl.event_callback_list),
92                              node) {
93         cb->callback(core,event,cb->priv_data);
94     }
95
96     v3_read_unlock(&(core->vm_info->shdw_impl.event_callback_lock));
97 }
98
99
100 int V3_init_shdw_paging() {
101     extern struct v3_shdw_pg_impl * __start__v3_shdw_pg_impls[];
102     extern struct v3_shdw_pg_impl * __stop__v3_shdw_pg_impls[];
103     struct v3_shdw_pg_impl ** tmp_impl = __start__v3_shdw_pg_impls;
104     int i = 0;
105
106     master_shdw_pg_table = v3_create_htable(0, shdw_pg_hash_fn, shdw_pg_eq_fn);
107
108
109     while (tmp_impl != __stop__v3_shdw_pg_impls) {
110         V3_Print(VM_NONE, VCORE_NONE, "Registering Shadow Paging Impl (%s)\n", (*tmp_impl)->name);
111
112         if (v3_htable_search(master_shdw_pg_table, (addr_t)((*tmp_impl)->name))) {
113             PrintError(VM_NONE, VCORE_NONE, "Multiple instances of shadow paging impl (%s)\n", (*tmp_impl)->name);
114             return -1;
115         }
116
117         if (v3_htable_insert(master_shdw_pg_table, 
118                              (addr_t)((*tmp_impl)->name),
119                              (addr_t)(*tmp_impl)) == 0) {
120             PrintError(VM_NONE, VCORE_NONE, "Could not register shadow paging impl (%s)\n", (*tmp_impl)->name);
121             return -1;
122         }
123
124         tmp_impl = &(__start__v3_shdw_pg_impls[++i]);
125     }
126
127     return 0;
128 }
129
130 int V3_deinit_shdw_paging() {
131     v3_free_htable(master_shdw_pg_table, 0, 0);
132     return 0;
133 }
134
135
136
137 /*** 
138  ***  There be dragons
139  ***/
140
141
142 #ifdef V3_CONFIG_SHADOW_PAGING_TELEMETRY
143 static void telemetry_cb(struct v3_vm_info * vm, void * private_data, char * hdr) {
144     int i = 0;
145     for (i = 0; i < vm->num_cores; i++) {
146         struct guest_info * core = &(vm->cores[i]);
147
148         V3_Print(vm, core, "%s Guest Page faults: %d\n", hdr, core->shdw_pg_state.guest_faults);
149     }
150 }
151 #endif
152
153
154
155 int v3_init_shdw_pg_state(struct guest_info * core) {
156     struct v3_shdw_pg_state * state = &(core->shdw_pg_state);
157     struct v3_shdw_pg_impl * impl = core->vm_info->shdw_impl.current_impl;
158   
159
160     state->guest_cr3 = 0;
161     state->guest_cr0 = 0;
162     state->guest_efer.value = 0x0LL;
163
164     if (impl->local_init(core) == -1) {
165         PrintError(core->vm_info, core, "Error in Shadow paging local initialization (%s)\n", impl->name);
166         return -1;
167     }
168
169
170 #ifdef V3_CONFIG_SHADOW_PAGING_TELEMETRY
171     v3_add_telemetry_cb(core->vm_info, telemetry_cb, NULL);
172 #endif
173   
174
175     return 0;
176 }
177
178
179 int v3_deinit_shdw_pg_state(struct guest_info * core) {
180     struct v3_shdw_pg_impl * impl = NULL;
181
182     if (!core || !core->vm_info) {
183         return -1;
184     }
185
186     impl = core->vm_info->shdw_impl.current_impl;
187
188     if (impl && impl->local_deinit(core) == -1) {
189         PrintError(core->vm_info, core, "Error deinitializing shadow paging state\n");
190         return -1;
191     }
192
193
194     return 0;
195 }
196
197
198
199 int v3_init_shdw_impl(struct v3_vm_info * vm) {
200     struct v3_shdw_impl_state * impl_state = &(vm->shdw_impl);
201     v3_cfg_tree_t * pg_cfg = v3_cfg_subtree(vm->cfg_data->cfg, "paging");
202     char * pg_mode = v3_cfg_val(pg_cfg, "mode");
203     char * pg_strat = v3_cfg_val(pg_cfg, "strategy");
204     struct v3_shdw_pg_impl * impl = NULL;
205    
206     PrintDebug(vm, VCORE_NONE, "Checking if shadow paging requested.\n");
207     if (pg_mode == NULL) { 
208         V3_Print(vm, VCORE_NONE, "No paging mode specified, assuming shadow with defaults\n");
209         pg_mode = "shadow";
210     } else {
211         if (strcasecmp(pg_mode, "nested") == 0) {
212             // this check is repeated here (compare to vmm_config's determine paging mode) since
213             // shadow paging initialization *precedes* per-core pre-config.
214             extern v3_cpu_arch_t v3_mach_type;
215             if ((v3_mach_type == V3_SVM_REV3_CPU) || 
216                 (v3_mach_type == V3_VMX_EPT_CPU) ||
217                 (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
218                 PrintDebug(vm, VCORE_NONE, "Nested paging specified on machine that supports it - not initializing shadow paging\n");
219                 return 0;
220             } else {
221                 V3_Print(vm, VCORE_NONE, "Nested paging specified but machine does not support it - falling back to shadow paging with defaults\n");
222                 pg_mode = "shadow";
223             }
224         } else if (strcasecmp(pg_mode, "shadow") != 0) { 
225             V3_Print(vm, VCORE_NONE, "Unknown paging mode '%s' specified - falling back to shadow paging with defaults\n",pg_mode);
226             pg_mode = "shadow";
227         }
228     }
229
230     if (pg_strat == NULL) {
231         pg_strat = (char *)default_strategy;
232     }
233         
234     V3_Print(vm, VCORE_NONE,"Initialization of Shadow Paging implementation\n");
235
236     impl = (struct v3_shdw_pg_impl *)v3_htable_search(master_shdw_pg_table, (addr_t)pg_strat);
237
238     if (impl == NULL) {
239         PrintError(vm, VCORE_NONE, "Could not find shadow paging impl (%s)\n", pg_strat);
240         return -1;
241     }
242
243     INIT_LIST_HEAD(&(impl_state->event_callback_list));
244     v3_rw_lock_init(&(impl_state->event_callback_lock));
245    
246     impl_state->current_impl = impl;
247
248     if (impl->init(vm, pg_cfg) == -1) {
249         PrintError(vm, VCORE_NONE, "Could not initialize Shadow paging implemenation (%s)\n", impl->name);
250         return -1;
251     }
252
253
254
255     return 0;
256 }
257
258 int v3_deinit_shdw_impl(struct v3_vm_info * vm) {
259     struct v3_shdw_pg_impl * impl = vm->shdw_impl.current_impl;
260     struct event_callback *cb,*temp;
261     addr_t flags;
262
263     if (impl == NULL) {
264         // Shadow paging not implemented
265         return 0;
266     }
267
268     if (impl->deinit(vm) == -1) {
269         PrintError(vm, VCORE_NONE,"Error deinitializing shadow paging implementation\n");
270         return -1;
271     }
272
273     flags=v3_write_lock_irqsave(&(vm->shdw_impl.event_callback_lock));
274
275     list_for_each_entry_safe(cb,
276                              temp,
277                              &(vm->shdw_impl.event_callback_list),
278                              node) {
279         list_del(&(cb->node));
280         V3_Free(cb);
281     }
282
283     v3_write_unlock_irqrestore(&(vm->shdw_impl.event_callback_lock),flags);
284
285     v3_rw_lock_deinit(&(vm->shdw_impl.event_callback_lock));
286
287     return 0;
288 }
289
290
291 // Reads the guest CR3 register
292 // creates new shadow page tables
293 // updates the shadow CR3 register to point to the new pts
294 int v3_activate_shadow_pt(struct guest_info * core) {
295     struct v3_shdw_impl_state * state = &(core->vm_info->shdw_impl);
296     struct v3_shdw_pg_impl * impl = state->current_impl;
297     
298     if (!have_callbacks(core)) { 
299         return impl->activate_shdw_pt(core);
300     } else {
301         int rc;
302         struct v3_shdw_pg_event event_pre={SHADOW_ACTIVATE,SHADOW_PREIMPL,0,{0,0,0,0,0,0}};
303         struct v3_shdw_pg_event event_post={SHADOW_ACTIVATE,SHADOW_POSTIMPL,0,{0,0,0,0,0,0}};
304         
305         dispatch_event(core,&event_pre);
306
307         rc =impl->activate_shdw_pt(core);
308
309         dispatch_event(core,&event_post);
310         
311         return rc;
312     }
313 }
314
315
316
317 // This must flush any caches
318 // and reset the cr3 value to the correct value
319 int v3_invalidate_shadow_pts(struct guest_info * core) {
320     struct v3_shdw_impl_state * state = &(core->vm_info->shdw_impl);
321     struct v3_shdw_pg_impl * impl = state->current_impl;
322
323     if (!have_callbacks(core)) { 
324         return impl->invalidate_shdw_pt(core);
325     } else {
326         int rc;
327         struct v3_shdw_pg_event event_pre={SHADOW_INVALIDATE,SHADOW_PREIMPL,0,{0,0,0,0,0,0}};
328         struct v3_shdw_pg_event event_post={SHADOW_INVALIDATE,SHADOW_POSTIMPL,0,{0,0,0,0,0,0}};
329         
330         dispatch_event(core,&event_pre);
331
332         rc = impl->invalidate_shdw_pt(core);
333
334         dispatch_event(core,&event_post);
335         
336         return rc;
337     }
338 }
339
340
341 int v3_handle_shadow_pagefault(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) 
342 {
343     int rc;
344    
345
346     if (have_callbacks(core)) { 
347         struct v3_shdw_pg_event event={SHADOW_PAGEFAULT,SHADOW_PREIMPL,fault_addr,error_code};
348         dispatch_event(core,&event);
349     }
350     
351     if (v3_get_vm_mem_mode(core) == PHYSICAL_MEM) {
352         // If paging is not turned on we need to handle the special cases
353       rc = v3_handle_passthrough_pagefault(core, fault_addr, error_code,NULL,NULL);
354     } else if (v3_get_vm_mem_mode(core) == VIRTUAL_MEM) {
355         struct v3_shdw_impl_state * state = &(core->vm_info->shdw_impl);
356         struct v3_shdw_pg_impl * impl = state->current_impl;
357         
358         rc = impl->handle_pagefault(core, fault_addr, error_code);
359     } else {
360         PrintError(core->vm_info, core, "Invalid Memory mode\n");
361         rc = -1;
362     }
363     
364     if (have_callbacks(core)) {
365         struct v3_shdw_pg_event event={SHADOW_PAGEFAULT,SHADOW_POSTIMPL,fault_addr,error_code};
366         dispatch_event(core,&event);
367     }
368     
369     return rc;
370 }
371
372
373 int v3_handle_shadow_invlpg(struct guest_info * core) {
374     uchar_t instr[15];
375     struct x86_instr dec_instr;
376     int ret = 0;
377     addr_t vaddr = 0;
378
379     if (v3_get_vm_mem_mode(core) != VIRTUAL_MEM) {
380         // Paging must be turned on...
381         // should handle with some sort of fault I think
382         PrintError(core->vm_info, core, "ERROR: INVLPG called in non paged mode\n");
383         return -1;
384     }
385
386     if (v3_get_vm_mem_mode(core) == PHYSICAL_MEM) { 
387         ret = v3_read_gpa_memory(core, get_addr_linear(core, core->rip, &(core->segments.cs)), 15, instr);
388     } else { 
389         ret = v3_read_gva_memory(core, get_addr_linear(core, core->rip, &(core->segments.cs)), 15, instr);
390     }
391
392     if (ret == -1) {
393         PrintError(core->vm_info, core, "Could not read instruction into buffer\n");
394         return -1;
395     }
396
397     if (v3_decode(core, (addr_t)instr, &dec_instr) == -1) {
398         PrintError(core->vm_info, core, "Decoding Error\n");
399         return -1;
400     }
401   
402     if ((dec_instr.op_type != V3_OP_INVLPG) || 
403         (dec_instr.num_operands != 1) ||
404         (dec_instr.dst_operand.type != MEM_OPERAND)) {
405         PrintError(core->vm_info, core, "Decoder Error: Not a valid INVLPG instruction...\n");
406         return -1;
407     }
408
409     vaddr = dec_instr.dst_operand.operand;
410
411     core->rip += dec_instr.instr_length;
412
413     {
414         struct v3_shdw_impl_state * state = &(core->vm_info->shdw_impl);
415         struct v3_shdw_pg_impl * impl = state->current_impl;
416         int rc;
417
418         if (have_callbacks(core)) { 
419             struct v3_shdw_pg_event event={SHADOW_INVLPG,SHADOW_PREIMPL,vaddr,{0,0,0,0,0,0}};
420             dispatch_event(core,&event);
421         }
422
423         rc=impl->handle_invlpg(core, vaddr);
424
425         if (have_callbacks(core)) { 
426             struct v3_shdw_pg_event event={SHADOW_INVLPG,SHADOW_POSTIMPL,vaddr,{0,0,0,0,0,0}};
427             dispatch_event(core,&event);
428         }
429
430         return rc;
431     }
432 }
433
434
435
436
437
438
439 int v3_inject_guest_pf(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
440     core->ctrl_regs.cr2 = fault_addr;
441
442 #ifdef V3_CONFIG_SHADOW_PAGING_TELEMETRY
443     core->shdw_pg_state.guest_faults++;
444 #endif
445
446     return v3_raise_exception_with_error(core, PF_EXCEPTION, *(uint_t *)&error_code);
447 }
448
449
450 int v3_is_guest_pf(pt_access_status_t guest_access, pt_access_status_t shadow_access) {
451     /* basically the reasoning is that there can be multiple reasons for a page fault:
452        If there is a permissions failure for a page present in the guest _BUT_
453        the reason for the fault was that the page is not present in the shadow,
454        _THEN_ we have to map the shadow page in and reexecute, this will generate
455        a permissions fault which is _THEN_ valid to send to the guest
456        _UNLESS_ both the guest and shadow have marked the page as not present
457
458        whew...
459     */
460     if (guest_access != PT_ACCESS_OK) {
461         // Guest Access Error
462
463         if ((shadow_access != PT_ACCESS_NOT_PRESENT) &&
464             (guest_access != PT_ACCESS_NOT_PRESENT)) {
465             // aka (guest permission error)
466             return 1;
467         }
468
469         /*
470           if ((shadow_access == PT_ACCESS_NOT_PRESENT) &&
471           (guest_access == PT_ACCESS_NOT_PRESENT)) {
472           // Page tables completely blank, handle guest first
473           return 1;
474           }
475         */
476
477         if (guest_access == PT_ACCESS_NOT_PRESENT) {
478             // Page tables completely blank, handle guest first
479             return 1;
480         }
481         
482         // Otherwise we'll handle the guest fault later...?
483     }
484
485     return 0;
486 }
487
488
489 int v3_register_shadow_paging_event_callback(struct v3_vm_info *vm,
490                                              int (*callback)(struct guest_info *core, 
491                                                              struct v3_shdw_pg_event *event,
492                                                              void      *priv_data),
493                                              void *priv_data)
494 {
495     struct event_callback *ec = V3_Malloc(sizeof(struct event_callback));
496     addr_t flags;
497
498     if (!ec) { 
499         PrintError(vm, VCORE_NONE, "Unable to allocate for a shadow paging event callback\n");
500         return -1;
501     }
502     
503     ec->callback = callback;
504     ec->priv_data = priv_data;
505
506     flags=v3_write_lock_irqsave(&(vm->shdw_impl.event_callback_lock));
507     list_add(&(ec->node),&(vm->shdw_impl.event_callback_list));
508     v3_write_unlock_irqrestore(&(vm->shdw_impl.event_callback_lock),flags);
509
510     return 0;
511
512 }
513
514 int v3_unregister_shadow_paging_event_callback(struct v3_vm_info *vm,
515                                                int (*callback)(struct guest_info *core, 
516                                                                struct v3_shdw_pg_event *event,
517                                                                void      *priv_data),
518                                                void *priv_data)
519 {
520     struct event_callback *cb,*temp;
521     addr_t flags;
522
523     flags=v3_write_lock_irqsave(&(vm->shdw_impl.event_callback_lock));
524
525     list_for_each_entry_safe(cb,
526                              temp,
527                              &(vm->shdw_impl.event_callback_list),
528                              node) {
529         if ((callback == cb->callback) && (priv_data == cb->priv_data)) { 
530             list_del(&(cb->node));
531             v3_write_unlock_irqrestore(&(vm->shdw_impl.event_callback_lock),flags);
532             V3_Free(cb);
533             return 0;
534         }
535     }
536     
537     v3_write_unlock_irqrestore(&(vm->shdw_impl.event_callback_lock),flags);
538
539     PrintError(vm, VCORE_NONE, "No callback found!\n");
540     
541     return -1;
542 }
543
544