2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2014, The V3VEE Project <http://www.v3vee.org>
11 * All rights reserved.
13 * Author: Daniel Zuo <pengzuo2014@u.northwestern.edu>
14 * Nikhat Karimi <nikhatkarimi@gmail.com>
15 * Ahalya Srinivasan <AhalyaSrinivasan2015@u.northwestern.edu>
16 * Peter Dinda <pdinda@northwestern.edu> (pinning, cleanup, integration, locking etc)
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
22 #include <palacios/vmm_mem.h>
23 #include <palacios/vmm.h>
24 #include <palacios/vmm_util.h>
25 #include <palacios/vmm_emulator.h>
26 #include <palacios/vm_guest.h>
27 #include <palacios/vmm_debug.h>
29 #include <palacios/vmm_shadow_paging.h>
30 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmm_xml.h>
39 <mem ... >N_MB</mem> Size of memory in the GPA
42 <allocated>M_MB</allocated> Allocated space (M_MB <= N_MB)
43 <file>FILENAME</file> Where to swap to
44 <strategy>STRATEGY</strategy> Victim picker to use NEXT_FIT, RANDOM (default), LRU, DEFAULT
50 #ifndef V3_CONFIG_DEBUG_SWAPPING
52 #define PrintDebug(fmt, args...)
55 int v3_init_swapping()
57 PrintDebug(VM_NONE,VCORE_NONE, "swapper: init\n");
62 int v3_deinit_swapping()
64 PrintDebug(VM_NONE,VCORE_NONE, "swapper: deinit\n");
69 static int write_all(v3_file_t fd, void *buf, uint64_t len, uint64_t offset)
74 thisop = v3_file_write(fd, buf, len, offset);
85 static int read_all(v3_file_t fd, void *buf, uint64_t len, uint64_t offset)
90 thisop = v3_file_read(fd, buf, len, offset);
102 #define REGION_WARN_THRESH 16
104 #define CEIL_DIV(x,y) (((x)/(y)) + !!((x)%(y)))
106 int v3_init_swapping_vm(struct v3_vm_info *vm, struct v3_xml *config)
108 v3_cfg_tree_t *swap_config;
114 extern uint64_t v3_mem_block_size;
117 PrintDebug(vm, VCORE_NONE, "swapper: vm init\n");
119 memset(&vm->swap_state,0,sizeof(struct v3_swap_impl_state));
121 v3_lock_init(&(vm->swap_state.lock));
123 vm->swap_state.enable_swapping=0;
124 vm->swap_state.host_mem_size=vm->mem_size;
126 if (!config || !(swap_config=v3_cfg_subtree(config,"swapping"))) {
127 PrintDebug(vm,VCORE_NONE,"swapper: no swapping configuration found\n");
131 if (!(enable=v3_cfg_val(swap_config,"enable")) || strcasecmp(enable,"y")) {
132 PrintDebug(vm,VCORE_NONE,"swapper: swapping configuration disabled\n");
136 allocated = v3_cfg_val(swap_config,"allocated");
138 PrintError(vm,VCORE_NONE,"swapper: swapping configuration must included allocated block\n");
141 alloc = ((uint64_t)atoi(allocated))*1024*1024;
143 // make alloc an integer multiple of the memory block size
144 alloc = CEIL_DIV(alloc, v3_mem_block_size) * v3_mem_block_size;
146 PrintDebug(vm,VCORE_NONE,"swapper: adjusted allocated size is %llu\n",alloc);
148 if (alloc > vm->mem_size) {
149 PrintError(vm,VCORE_NONE,"swapper: cannot allocate more than the VM's memory size....\n");
154 file = v3_cfg_val(swap_config,"file");
156 PrintError(vm,VCORE_NONE,"swapper: swapping configuration must included swap file name\n");
160 strategy = v3_cfg_val(swap_config,"strategy");
162 PrintDebug(vm,VCORE_NONE,"swapper: default strategy selected\n");
166 // Can we allocate the file?
168 if ((vm->swap_state.swapfd = v3_file_open(vm,file, FILE_OPEN_MODE_READ | FILE_OPEN_MODE_WRITE | FILE_OPEN_MODE_CREATE))<0) {
169 PrintError(vm,VCORE_NONE,"swapper: cannot open or create swap file\n");
172 // Make sure we can write the whole thing
174 char *buf = V3_Malloc(PAGE_SIZE_4KB);
176 PrintError(vm,VCORE_NONE,"swapper: unable to allocate space for writing file\n");
179 memset(buf,0,PAGE_SIZE_4KB);
180 for (addr=0;addr<vm->mem_size;addr+=PAGE_SIZE_4KB) {
181 if (write_all(vm->swap_state.swapfd,
185 PrintError(vm,VCORE_NONE,"swapper: unable to write initial swap file\n");
187 v3_file_close(vm->swap_state.swapfd);
194 // We are now set - we have space to swap to
195 vm->swap_state.enable_swapping=1;
197 vm->swap_state.strategy =
198 !strcasecmp(strategy,"next_fit") ? V3_SWAP_NEXT_FIT :
199 !strcasecmp(strategy,"random") ? V3_SWAP_RANDOM :
200 !strcasecmp(strategy,"lru") ? V3_SWAP_LRU :
201 !strcasecmp(strategy,"default") ? V3_SWAP_RANDOM :
204 vm->swap_state.host_mem_size=alloc;
205 vm->swap_state.swap_count=0;
206 vm->swap_state.last_region_used=0;
207 // already have set swapfd
210 V3_Print(vm,VCORE_NONE,"swapper: swapping enabled (%llu allocated of %llu using %s on %s)\n",
211 (uint64_t)vm->swap_state.host_mem_size, (uint64_t) vm->mem_size, strategy, file);
213 if (vm->swap_state.host_mem_size / v3_mem_block_size < REGION_WARN_THRESH) {
214 V3_Print(vm,VCORE_NONE,"swapper: WARNING: %llu regions is less than threshold of %llu, GUEST MAY FAIL TO MAKE PROGRESS\n",
215 (uint64_t)vm->swap_state.host_mem_size/v3_mem_block_size, (uint64_t)REGION_WARN_THRESH);
222 int v3_deinit_swapping_vm(struct v3_vm_info *vm)
224 PrintDebug(vm, VCORE_NONE, "swapper: vm deinit\n");
226 if (vm->swap_state.enable_swapping) {
227 v3_file_close(vm->swap_state.swapfd);
230 v3_lock_deinit(&(vm->swap_state.lock));
236 int v3_pin_region(struct v3_vm_info *vm, struct v3_mem_region *region)
240 PrintDebug(vm,VCORE_NONE, "Pin Region GPA=%p to %p\n",(void*) region->guest_start, (void*)region->guest_end);
242 if (!(region->flags.base)) {
243 PrintError(vm,VCORE_NONE,"Attempt to pin non-base region\n");
247 if (region->flags.pinned) {
251 flags = v3_lock_irqsave(vm->swap_state.lock);
253 if (region->flags.swapped) {
254 // can't pin since it's swapped out, swap it in an try again
255 v3_unlock_irqrestore(vm->swap_state.lock, flags);
256 if (v3_swap_in_region(vm,region)) {
257 PrintError(vm,VCORE_NONE,"Cannot swap in during a pin operation\n");
260 return v3_pin_region(vm,region);
264 // still holding lock if we got here, so we're the exclusive
265 // manipulator of the swap state
266 region->flags.pinned=1;
268 v3_unlock_irqrestore(vm->swap_state.lock, flags);
274 int v3_unpin_region(struct v3_vm_info *vm, struct v3_mem_region *region)
276 unsigned int flags = v3_lock_irqsave(vm->swap_state.lock);
278 region->flags.pinned=0;
280 v3_unlock_irqrestore(vm->swap_state.lock,flags);
287 #define SEARCH_LIMIT 1024
289 // Must be called with the lock held
290 static struct v3_mem_region * choose_random_victim(struct v3_vm_info * vm)
293 struct v3_mem_map * map = &(vm->mem_map);
294 uint64_t num_base_regions = map->num_base_regions;
296 struct v3_mem_region *reg=0;
299 PrintDebug(vm, VCORE_NONE, "swapper: choosing random victim\n");
302 i<SEARCH_LIMIT && reg==0 ;
305 // cycle counter used as pseudorandom number generator
308 reg = &(map->base_regions[thetime % num_base_regions]);
310 if (reg->flags.swapped || reg->flags.pinned) {
311 // region is already swapped or is pinned - try again
317 PrintError(vm,VCORE_NONE,"swapper: Unable to find a random victim\n");
319 PrintDebug(vm,VCORE_NONE,"swapper: Random victim GPA=%p to %p\n", (void*)reg->guest_start, (void*)reg->guest_end);
326 // Must be called with the lock held
327 static struct v3_mem_region * choose_next_victim(struct v3_vm_info * vm)
329 struct v3_mem_map * map = &(vm->mem_map);
330 uint64_t num_base_regions = map->num_base_regions;
331 struct v3_mem_region *reg=0;
334 PrintDebug(vm, VCORE_NONE, "swapper: choosing next victim\n");
337 for (i=vm->swap_state.last_region_used+1, reg=0;
338 i<num_base_regions && reg==0;
341 reg = &(map->base_regions[i]);
343 if (reg->flags.swapped || reg->flags.pinned) {
344 // region is already swapped or is pinned - try again
350 i < vm->swap_state.last_region_used+1 && reg==0;
353 reg = &(map->base_regions[i]);
355 if (reg->flags.swapped || reg->flags.pinned) {
356 // region is already swapped or is pinned - try again
362 PrintError(vm,VCORE_NONE,"swapper: Unable to find the next victim\n");
364 PrintDebug(vm,VCORE_NONE,"swapper: Next victim GPA=%p to %p\n", (void*)reg->guest_start, (void*)reg->guest_end);
370 // Must be called with the lock held
371 static struct v3_mem_region * choose_lru_victim(struct v3_vm_info * vm)
373 struct v3_mem_map * map = &(vm->mem_map);
374 uint64_t num_base_regions = map->num_base_regions;
375 struct v3_mem_region *reg=0;
376 struct v3_mem_region *oldest_reg=0;
378 uint64_t oldest_time;
380 PrintDebug(vm, VCORE_NONE, "swapper: choosing pseudo-lru victim\n");
383 for (i=0, oldest_time=0, oldest_reg=0;
387 reg = &(map->base_regions[i]);
389 if (reg->flags.swapped || reg->flags.pinned) {
391 reg->swap_state.last_accessed < oldest_time) {
393 oldest_time = reg->swap_state.last_accessed;
400 PrintError(vm,VCORE_NONE,"swapper: Unable to find pseudo-lru victim\n");
402 PrintDebug(vm,VCORE_NONE,"swapper: Pseudo-lru victim GPA=%p to %p\n", (void*)oldest_reg->guest_start, (void*)oldest_reg->guest_end);
409 // Must be called with the lock held
410 static struct v3_mem_region * choose_victim(struct v3_vm_info * vm)
412 switch (vm->swap_state.strategy) {
413 case V3_SWAP_NEXT_FIT:
414 return choose_next_victim(vm);
417 return choose_random_victim(vm);
420 return choose_lru_victim(vm);
423 return choose_random_victim(vm);
429 // swaps out region, and marks it as swapped and pinned
430 // no lock should be held
431 static int swap_out_region_internal(struct v3_vm_info *vm, struct v3_mem_region *victim, int ignore_pinning)
437 flags = v3_lock_irqsave(vm->swap_state.lock);
439 if (victim->flags.swapped) {
440 v3_unlock_irqrestore(vm->swap_state.lock,flags);
441 PrintDebug(vm,VCORE_NONE,"swapper: swap out already swapped out region\n");
445 if (!ignore_pinning && victim->flags.pinned) {
446 v3_unlock_irqrestore(vm->swap_state.lock,flags);
447 PrintError(vm,VCORE_NONE,"swapper: attempt to swap out pinned region\n");
451 // now mark it as pinned until we are done with it.
452 victim->flags.pinned=1;
454 // release lock - it's marked pinned so nothing else will touch it
455 v3_unlock_irqrestore(vm->swap_state.lock,flags);
457 // do NOT do this without irqs on...
458 if (write_all(vm->swap_state.swapfd,
459 (uint8_t *)V3_VAddr((void *)victim->host_addr),
460 victim->guest_end - victim->guest_start,
461 victim->guest_start)) {
462 PrintError(vm, VCORE_NONE, "swapper: failed to swap out victim"); //write victim to disk
463 // note write only here - it returns unswapped and unpinned
464 victim->flags.pinned=0;
470 //Invalidate the victim on all cores
472 for (i=0, fail=0; i<vm->num_cores;i++ ) {
473 struct guest_info * core = &(vm->cores[i]);
476 if (core->shdw_pg_mode == SHADOW_PAGING) {
477 v3_mem_mode_t mem_mode = v3_get_vm_mem_mode(core);
478 if (mem_mode == PHYSICAL_MEM) {
479 PrintDebug(vm, VCORE_NONE, "swapper: v3_invalidate_passthrough_addr_range() called for core %d",i);
480 rc = v3_invalidate_passthrough_addr_range(core, victim->guest_start, victim->guest_end-1,NULL,NULL );
482 PrintDebug(vm, VCORE_NONE, "swapper: v3_invalidate_shadow_pts() called for core %d",i);
483 rc = v3_invalidate_shadow_pts(core);
485 } else if (core->shdw_pg_mode == NESTED_PAGING) {
486 PrintDebug(vm, VCORE_NONE, "swapper: v3_invalidate_nested_addr_range() called for core %d",i);
487 rc = v3_invalidate_nested_addr_range(core, victim->guest_start, victim->guest_end-1,NULL,NULL );
491 PrintError(vm,VCORE_NONE,"swapper: paging invalidation failed for victim on core %d.... continuing, but this is not good.\n", i);
496 victim->flags.swapped=1; // now it is in "swapped + pinned" state, meaning it has been written and is now holding for future use
506 // swaps out region, and marks it as swapped
507 int v3_swap_out_region(struct v3_vm_info *vm, struct v3_mem_region *victim)
509 if (!victim->flags.base) {
510 PrintError(vm, VCORE_NONE,"swapper: can only swap out base regions\n");
514 if (victim->flags.pinned) {
515 PrintError(vm, VCORE_NONE,"swapper: cannot swap out a pinned region\n");
519 if (swap_out_region_internal(vm,victim,0)) {
520 PrintError(vm, VCORE_NONE,"swapper: failed to swap out victim.... bad\n");
524 // victim now has its old info, and is marked swapped and pinned
526 victim->host_addr = 0;
527 victim->flags.pinned = 0;
529 // now is simply swapped
535 int v3_swap_in_region(struct v3_vm_info *vm, struct v3_mem_region *perp)
538 struct v3_mem_region *victim;
540 flags = v3_lock_irqsave(vm->swap_state.lock);
542 if (!perp->flags.base) {
543 v3_unlock_irqrestore(vm->swap_state.lock,flags);
544 PrintError(vm,VCORE_NONE,"swapper: can only swap in base regions\n");
548 if (!perp->flags.swapped) {
549 v3_unlock_irqrestore(vm->swap_state.lock,flags);
550 PrintDebug(vm,VCORE_NONE,"swapper: region is already swapped in\n");
554 // while still holding the lock, we will pin it to make sure no one
555 // else will attempt to swap in a race with us
556 perp->flags.pinned=1;
558 victim = choose_victim(vm);
561 perp->flags.pinned=0; // leave perp swapped
562 v3_unlock_irqrestore(vm->swap_state.lock,flags);
563 PrintError(vm,VCORE_NONE,"swapper: cannot find victim\n");
567 victim->flags.pinned=1;
570 // update the next fit info
571 // pointer arith in units of relevant structs...
572 vm->swap_state.last_region_used = (victim - &(vm->mem_map.base_regions[0]));
575 // Now we hold both the perp and the victim (pinned)
576 // and so can release the lcok
577 v3_unlock_irqrestore(vm->swap_state.lock,flags);
580 if (swap_out_region_internal(vm,victim,1)) { // ignore that the victim is marked pinned
581 PrintError(vm, VCORE_NONE,"swapper: failed to swap out victim.... bad\n");
585 // victim is still marked pinned
588 perp->host_addr = victim->host_addr;
589 victim->host_addr = 0;
590 // and we're done, so release it
591 victim->flags.swapped=1;
592 victim->flags.pinned=0;
595 // Now swap in the perp
597 if (read_all(vm->swap_state.swapfd,
598 (uint8_t *)V3_VAddr((void *)perp->host_addr),
599 perp->guest_end - perp->guest_start,
600 perp->guest_start)) {
602 PrintError(vm, VCORE_NONE, "swapper: swap in of region failed!\n");
603 // leave it swapped, but unpin the memory...
604 perp->flags.pinned = 0;
607 perp->flags.swapped = 0; // perp is now OK, so release it
608 perp->flags.pinned = 0;
609 vm->swap_state.swap_count++;
616 void v3_touch_region(struct v3_vm_info *vm, struct v3_mem_region *region)
618 // should be uniform host time, not per core...
619 rdtscll(region->swap_state.last_accessed);