Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


reorganized swapbypass to isolate it to a special shadow paging implementation, and...
[palacios.git] / palacios / src / palacios / vmm_shdw_pg_swapbypass.c
1 /*
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20 #include <palacios/vmm_shadow_paging.h>
21 #include <palacios/vmm_swapbypass.h>
22 #include <palacios/vmm_ctrl_regs.h>
23
24 #include <palacios/vm_guest.h>
25 #include <palacios/vm_guest_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/vmm_hashtable.h>
28
29
30 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
31 #include <palacios/vmm_telemetry.h>
32 #endif
33
34
35
36
37
38
39 // This is a hack and 32 bit linux specific.... need to fix...
40 struct swap_pte {
41     uint32_t present    : 1;
42     uint32_t dev_index  : 8;
43     uint32_t pg_index   : 23;
44 };
45
46
47 struct shadow_pointer {
48     uint32_t pg_index;
49     uint32_t dev_index;
50
51     pte32_t * shadow_pte;
52     
53     addr_t guest_pte;
54     
55     struct list_head node;
56 };
57
58
59
60
61
62
63 struct shadow_page_data {
64     v3_reg_t cr3;
65     addr_t page_pa;
66   
67     struct list_head page_list_node;
68 };
69
70
71 struct swapbypass_local_state {
72  
73     struct list_head page_list;
74
75 };
76
77 struct v3_swap_dev {
78     uint8_t present;
79
80     struct v3_swap_ops * ops;
81
82     void * private_data;
83 };
84
85
86 struct swapbypass_vm_state {
87     struct v3_swap_dev devs[256];
88
89 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
90     uint32_t read_faults;
91     uint32_t write_faults;
92     uint32_t flushes;
93     uint32_t mapped_pages;
94     uint32_t list_size;
95 #endif
96
97     // shadow pointers
98     struct hashtable * shdw_ptr_ht;
99 };
100
101
102
103
104 static uint_t swap_hash_fn(addr_t key) {
105     return v3_hash_long(key, 32);
106 }
107
108
109 static int swap_eq_fn(addr_t key1, addr_t key2) {
110     return (key1 == key2);
111 }
112
113
114
115 static inline uint32_t get_pg_index(pte32_t * pte) {
116     return ((struct swap_pte *)pte)->pg_index;
117 }
118
119
120 static inline uint32_t get_dev_index(pte32_t * pte) {
121     return ((struct swap_pte *)pte)->dev_index;
122 }
123
124
125 // Present = 0 and Dirty = 0
126 // fixme
127 static inline int is_swapped_pte32(pte32_t * pte) {
128     return ((pte->present == 0) && (*(uint32_t *)pte != 0));
129 }
130
131
132
133
134 static struct shadow_page_data * create_new_shadow_pt(struct guest_info * core);
135
136
137
138 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
139 static void telemetry_cb(struct v3_vm_info * vm, void * private_data, char * hdr) {
140     struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
141
142     V3_Print("%sSymbiotic Swap:\n", hdr);
143     V3_Print("%s\tRead faults=%d\n", hdr, swap_state->read_faults);
144     V3_Print("%s\tWrite faults=%d\n", hdr, swap_state->write_faults);
145     V3_Print("%s\tMapped Pages=%d\n", hdr, swap_state->mapped_pages);
146     V3_Print("%s\tFlushes=%d\n", hdr, swap_state->flushes);
147     V3_Print("%s\tlist size=%d\n", hdr, swap_state->list_size);
148 }
149 #endif
150
151
152
153
154
155
156
157
158
159 static int get_vaddr_perms(struct guest_info * info, addr_t vaddr, pte32_t * guest_pte, pf_error_t * page_perms) {
160     uint64_t pte_val = (uint64_t)*(uint32_t *)guest_pte;
161
162     // symcall to check if page is in cache or on swap disk
163     if (v3_sym_call3(info, SYMCALL_MEM_LOOKUP, (uint64_t *)&vaddr, (uint64_t *)&pte_val, (uint64_t *)page_perms) == -1) {
164         PrintError("Sym call error?? that's weird... \n");
165         return -1;
166     }
167
168     //    V3_Print("page perms = %x\n", *(uint32_t *)page_perms);
169
170     if (vaddr == 0) {
171         return 1;
172     }
173
174     return 0;
175 }
176
177
178 static addr_t get_swapped_pg_addr(struct v3_vm_info * vm, pte32_t * guest_pte) {
179    struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
180     int dev_index = get_dev_index(guest_pte);
181     struct v3_swap_dev * swp_dev = &(swap_state->devs[dev_index]);
182
183
184     if (! swp_dev->present ) {
185         return 0;
186     }
187
188     return (addr_t)swp_dev->ops->get_swap_entry(get_pg_index(guest_pte), swp_dev->private_data);
189 }
190
191
192
193 static addr_t map_swp_page(struct v3_vm_info * vm, pte32_t * shadow_pte, pte32_t * guest_pte, void * swp_page_ptr) {
194    struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
195     struct list_head * shdw_ptr_list = NULL;
196     struct shadow_pointer * shdw_ptr = NULL;
197
198
199
200     if (swp_page_ptr == NULL) {
201         //      PrintError("Swapped out page not found on swap device\n");
202         return 0;
203     }
204
205     shdw_ptr_list = (struct list_head *)v3_htable_search(swap_state->shdw_ptr_ht, (addr_t)*(uint32_t *)guest_pte);
206
207     if (shdw_ptr_list == NULL) {
208         shdw_ptr_list = (struct list_head *)V3_Malloc(sizeof(struct list_head *));
209 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
210         swap_state->list_size++;
211 #endif
212         INIT_LIST_HEAD(shdw_ptr_list);
213         v3_htable_insert(swap_state->shdw_ptr_ht, (addr_t)*(uint32_t *)guest_pte, (addr_t)shdw_ptr_list);
214     }
215
216     shdw_ptr = (struct shadow_pointer *)V3_Malloc(sizeof(struct shadow_pointer));
217
218     if (shdw_ptr == NULL) {
219         PrintError("MEMORY LEAK\n");
220 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
221         telemetry_cb(vm, NULL, "");
222 #endif
223         return 0;
224     }
225
226     shdw_ptr->shadow_pte = shadow_pte;
227     shdw_ptr->guest_pte = *(uint32_t *)guest_pte;
228     shdw_ptr->pg_index = get_pg_index(guest_pte);
229     shdw_ptr->dev_index = get_dev_index(guest_pte);
230
231     // We don't check for conflicts, because it should not happen...
232     list_add(&(shdw_ptr->node), shdw_ptr_list);
233
234     return PAGE_BASE_ADDR((addr_t)V3_PAddr(swp_page_ptr));
235 }
236
237
238
239
240
241 #include "vmm_shdw_pg_swapbypass_32.h"
242 #include "vmm_shdw_pg_swapbypass_32pae.h"
243 #include "vmm_shdw_pg_swapbypass_64.h"
244
245
246 static struct shadow_page_data * create_new_shadow_pt(struct guest_info * core) {
247     struct v3_shdw_pg_state * state = &(core->shdw_pg_state);
248     struct swapbypass_local_state * impl_state = (struct swapbypass_local_state *)(state->local_impl_data);
249     v3_reg_t cur_cr3 = core->ctrl_regs.cr3;
250     struct shadow_page_data * page_tail = NULL;
251     addr_t shdw_page = 0;
252
253     if (!list_empty(&(impl_state->page_list))) {
254         page_tail = list_tail_entry(&(impl_state->page_list), struct shadow_page_data, page_list_node);
255
256
257         if (page_tail->cr3 != cur_cr3) {
258             PrintDebug("Reusing old shadow Page: %p (cur_CR3=%p)(page_cr3=%p) \n",
259                        (void *)(addr_t)page_tail->page_pa, 
260                        (void *)(addr_t)cur_cr3, 
261                        (void *)(addr_t)(page_tail->cr3));
262
263             list_move(&(page_tail->page_list_node), &(impl_state->page_list));
264
265             memset(V3_VAddr((void *)(page_tail->page_pa)), 0, PAGE_SIZE_4KB);
266
267
268             return page_tail;
269         }
270     }
271
272     // else  
273
274     page_tail = (struct shadow_page_data *)V3_Malloc(sizeof(struct shadow_page_data));
275     page_tail->page_pa = (addr_t)V3_AllocPages(1);
276
277     PrintDebug("Allocating new shadow Page: %p (cur_cr3=%p)\n", 
278                (void *)(addr_t)page_tail->page_pa, 
279                (void *)(addr_t)cur_cr3);
280
281     page_tail->cr3 = cur_cr3;
282     list_add(&(page_tail->page_list_node), &(impl_state->page_list));
283
284     shdw_page = (addr_t)V3_VAddr((void *)(page_tail->page_pa));
285     memset((void *)shdw_page, 0, PAGE_SIZE_4KB);
286
287     return page_tail;
288 }
289
290
291
292
293
294
295 int v3_register_swap_disk(struct v3_vm_info * vm, int dev_index, 
296                           struct v3_swap_ops * ops, void * private_data) {
297     struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
298
299     swap_state->devs[dev_index].present = 1;
300     swap_state->devs[dev_index].private_data = private_data;
301     swap_state->devs[dev_index].ops = ops;
302
303     return 0;
304 }
305
306
307
308
309 int v3_swap_in_notify(struct v3_vm_info * vm, int pg_index, int dev_index) {
310     struct list_head * shdw_ptr_list = NULL;
311     struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
312     struct shadow_pointer * tmp_shdw_ptr = NULL;
313     struct shadow_pointer * shdw_ptr = NULL;
314     struct swap_pte guest_pte = {0, dev_index, pg_index};
315
316     shdw_ptr_list = (struct list_head * )v3_htable_search(swap_state->shdw_ptr_ht, *(addr_t *)&(guest_pte));
317
318     if (shdw_ptr_list == NULL) {
319         return 0;
320     }
321
322     list_for_each_entry_safe(shdw_ptr, tmp_shdw_ptr, shdw_ptr_list, node) {
323         if ((shdw_ptr->pg_index == pg_index) &&
324             (shdw_ptr->dev_index == dev_index)) {
325
326             // Trigger faults for next shadow access
327             shdw_ptr->shadow_pte->present = 0;
328
329             // Delete entry from list
330             list_del(&(shdw_ptr->node));
331             V3_Free(shdw_ptr);
332         }
333     }
334
335     return 0;
336 }
337
338
339
340 int v3_swap_flush(struct v3_vm_info * vm) {
341     struct swapbypass_vm_state * swap_state = (struct swapbypass_vm_state *)(vm->shdw_impl.impl_data);
342     struct hashtable_iter * ht_iter = v3_create_htable_iter(swap_state->shdw_ptr_ht);
343
344     //    PrintDebug("Flushing Symbiotic Swap table\n");
345
346 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
347     swap_state->flushes++;
348 #endif
349
350     if (!ht_iter) {
351         PrintError("NULL iterator in swap flush!! Probably will crash soon...\n");
352     }
353
354     while (ht_iter->entry) {
355         struct shadow_pointer * tmp_shdw_ptr = NULL;
356         struct shadow_pointer * shdw_ptr = NULL;
357         struct list_head * shdw_ptr_list = (struct list_head *)v3_htable_get_iter_value(ht_iter);
358
359         // delete all swapped entries
360         // we can leave the list_head structures and reuse them for the next round
361         
362         list_for_each_entry_safe(shdw_ptr, tmp_shdw_ptr, shdw_ptr_list, node) {
363             if (shdw_ptr == NULL) {
364                 PrintError("Null shadow pointer in swap flush!! Probably crashing soon...\n");
365             }
366
367             // Trigger faults for next shadow access
368             shdw_ptr->shadow_pte->present = 0;
369             
370             // Delete entry from list
371             list_del(&(shdw_ptr->node));
372             V3_Free(shdw_ptr);
373         }
374
375         v3_htable_iter_advance(ht_iter);
376     }
377
378     V3_Free(ht_iter);
379
380     return 0;
381 }
382
383
384
385
386
387
388
389 static int sb_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
390     struct v3_shdw_impl_state * impl_state = &(vm->shdw_impl);
391     struct swapbypass_vm_state * sb_state = NULL;
392
393     memset(sb_state, 0, sizeof(struct swapbypass_vm_state));
394     sb_state->shdw_ptr_ht = v3_create_htable(0, swap_hash_fn, swap_eq_fn);
395
396 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
397     if (vm->enable_telemetry) {
398         v3_add_telemetry_cb(vm, telemetry_cb, NULL);
399     }
400 #endif
401
402     impl_state->impl_data = sb_state;
403
404     PrintDebug("Initialized SwapBypass\n");
405
406
407     return 0;
408 }
409
410 static int sb_deinit(struct v3_vm_info * vm) {
411     return -1;
412 }
413
414 static int sb_local_init(struct guest_info * core) {
415     struct v3_shdw_pg_state * state = &(core->shdw_pg_state);
416     struct swapbypass_local_state * swapbypass_state = NULL;
417
418     V3_Print("SWAPBYPASS local initialization\n");
419
420     swapbypass_state = (struct swapbypass_local_state *)V3_Malloc(sizeof(struct swapbypass_local_state));
421
422     INIT_LIST_HEAD(&(swapbypass_state->page_list));
423
424     state->local_impl_data = swapbypass_state;
425
426     return 0;
427 }
428
429
430 static int sb_activate_shdw_pt(struct guest_info * core) {
431     switch (v3_get_vm_cpu_mode(core)) {
432
433         case PROTECTED:
434             return activate_shadow_pt_32(core);
435         case PROTECTED_PAE:
436             return activate_shadow_pt_32pae(core);
437         case LONG:
438         case LONG_32_COMPAT:
439         case LONG_16_COMPAT:
440             return activate_shadow_pt_64(core);
441         default:
442             PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
443             return -1;
444     }
445
446     return 0;
447 }
448
449 static int sb_invalidate_shdw_pt(struct guest_info * core) {
450     return sb_activate_shdw_pt(core);
451 }
452
453
454 static int sb_handle_pf(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
455
456         switch (v3_get_vm_cpu_mode(core)) {
457             case PROTECTED:
458                 return handle_shadow_pagefault_32(core, fault_addr, error_code);
459                 break;
460             case PROTECTED_PAE:
461                 return handle_shadow_pagefault_32pae(core, fault_addr, error_code);
462             case LONG:
463             case LONG_32_COMPAT:
464             case LONG_16_COMPAT:
465                 return handle_shadow_pagefault_64(core, fault_addr, error_code);
466                 break;
467             default:
468                 PrintError("Unhandled CPU Mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
469                 return -1;
470         }
471 }
472
473
474 static int sb_handle_invlpg(struct guest_info * core, addr_t vaddr) {
475
476     switch (v3_get_vm_cpu_mode(core)) {
477         case PROTECTED:
478             return handle_shadow_invlpg_32(core, vaddr);
479         case PROTECTED_PAE:
480             return handle_shadow_invlpg_32pae(core, vaddr);
481         case LONG:
482         case LONG_32_COMPAT:
483         case LONG_16_COMPAT:
484             return handle_shadow_invlpg_64(core, vaddr);
485         default:
486             PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
487             return -1;
488     }
489 }
490
491 static struct v3_shdw_pg_impl sb_impl =  {
492     .name = "SWAPBYPASS",
493     .init = sb_init,
494     .deinit = sb_deinit,
495     .local_init = sb_local_init,
496     .handle_pagefault = sb_handle_pf,
497     .handle_invlpg = sb_handle_invlpg,
498     .activate_shdw_pt = sb_activate_shdw_pt,
499     .invalidate_shdw_pt = sb_invalidate_shdw_pt
500 };
501
502
503
504
505
506 register_shdw_pg_impl(&sb_impl);