2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2014, The V3VEE Project <http://www.v3vee.org>
11 * All rights reserved.
13 * Author: Daniel Zuo <pengzuo2014@u.northwestern.edu>
14 * Nikhat Karimi <nikhatkarimi@gmail.com>
15 * Ahalya Srinivasan <AhalyaSrinivasan2015@u.northwestern.edu>
16 * Peter Dinda <pdinda@northwestern.edu> (pinning, cleanup, integration, locking etc)
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
22 #include <palacios/vmm_mem.h>
23 #include <palacios/vmm.h>
24 #include <palacios/vmm_util.h>
25 #include <palacios/vmm_emulator.h>
26 #include <palacios/vm_guest.h>
27 #include <palacios/vmm_debug.h>
29 #include <palacios/vmm_shadow_paging.h>
30 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/vmm_xml.h>
37 <mem ... >N_MB</mem> Size of memory in the GPA
40 <allocated>M_MB</allocated> Allocated space (M_MB <= N_MB)
41 <file>FILENAME</file> Where to swap to
42 <strategy>STRATEGY</strategy> Victim picker to use NEXT_FIT, RANDOM (default), LRU, DEFAULT
48 #ifndef V3_CONFIG_DEBUG_SWAPPING
50 #define PrintDebug(fmt, args...)
53 int v3_init_swapping()
55 PrintDebug(VM_NONE,VCORE_NONE, "swapper: init\n");
60 int v3_deinit_swapping()
62 PrintDebug(VM_NONE,VCORE_NONE, "swapper: deinit\n");
67 static int write_all(v3_file_t fd, void *buf, uint64_t len, uint64_t offset)
72 thisop = v3_file_write(fd, buf, len, offset);
83 static int read_all(v3_file_t fd, void *buf, uint64_t len, uint64_t offset)
88 thisop = v3_file_read(fd, buf, len, offset);
100 #define REGION_WARN_THRESH 16
102 #define CEIL_DIV(x,y) (((x)/(y)) + !!((x)%(y)))
104 int v3_init_swapping_vm(struct v3_vm_info *vm, struct v3_xml *config)
106 v3_cfg_tree_t *swap_config;
112 extern uint64_t v3_mem_block_size;
115 PrintDebug(vm, VCORE_NONE, "swapper: vm init\n");
117 memset(&vm->swap_state,0,sizeof(struct v3_swap_impl_state));
119 v3_lock_init(&(vm->swap_state.lock));
121 vm->swap_state.enable_swapping=0;
122 vm->swap_state.host_mem_size=vm->mem_size;
124 if (!config || !(swap_config=v3_cfg_subtree(config,"swapping"))) {
125 PrintDebug(vm,VCORE_NONE,"swapper: no swapping configuration found\n");
129 if (!(enable=v3_cfg_val(swap_config,"enable")) || strcasecmp(enable,"y")) {
130 PrintDebug(vm,VCORE_NONE,"swapper: swapping configuration disabled\n");
134 allocated = v3_cfg_val(swap_config,"allocated");
136 PrintError(vm,VCORE_NONE,"swapper: swapping configuration must included allocated block\n");
139 alloc = ((uint64_t)atoi(allocated))*1024*1024;
141 // make alloc an integer multiple of the memory block size
142 alloc = CEIL_DIV(alloc, v3_mem_block_size) * v3_mem_block_size;
144 PrintDebug(vm,VCORE_NONE,"swapper: adjusted allocated size is %llu\n",alloc);
146 if (alloc > vm->mem_size) {
147 PrintError(vm,VCORE_NONE,"swapper: cannot allocate more than the VM's memory size....\n");
152 file = v3_cfg_val(swap_config,"file");
154 PrintError(vm,VCORE_NONE,"swapper: swapping configuration must included swap file name\n");
158 strategy = v3_cfg_val(swap_config,"strategy");
160 PrintDebug(vm,VCORE_NONE,"swapper: default strategy selected\n");
164 // Can we allocate the file?
166 if (!(vm->swap_state.swapfd = v3_file_open(vm,file, FILE_OPEN_MODE_READ | FILE_OPEN_MODE_WRITE | FILE_OPEN_MODE_CREATE))) {
167 PrintError(vm,VCORE_NONE,"swapper: cannot open or create swap file\n");
170 // Make sure we can write the whole thing
172 char *buf = V3_Malloc(PAGE_SIZE_4KB);
174 PrintError(vm,VCORE_NONE,"swapper: unable to allocate space for writing file\n");
177 memset(buf,0,PAGE_SIZE_4KB);
178 for (addr=0;addr<vm->mem_size;addr+=PAGE_SIZE_4KB) {
179 if (write_all(vm->swap_state.swapfd,
183 PrintError(vm,VCORE_NONE,"swapper: unable to write initial swap file\n");
185 v3_file_close(vm->swap_state.swapfd);
192 // We are now set - we have space to swap to
193 vm->swap_state.enable_swapping=1;
195 vm->swap_state.strategy =
196 !strcasecmp(strategy,"next_fit") ? V3_SWAP_NEXT_FIT :
197 !strcasecmp(strategy,"random") ? V3_SWAP_RANDOM :
198 !strcasecmp(strategy,"lru") ? V3_SWAP_LRU :
199 !strcasecmp(strategy,"default") ? V3_SWAP_RANDOM : // identical branches for clarity
202 vm->swap_state.host_mem_size=alloc;
203 vm->swap_state.swap_count=0;
204 vm->swap_state.last_region_used=0;
205 // already have set swapfd
208 V3_Print(vm,VCORE_NONE,"swapper: swapping enabled (%llu allocated of %llu using %s on %s)\n",
209 (uint64_t)vm->swap_state.host_mem_size, (uint64_t) vm->mem_size, strategy, file);
211 if (vm->swap_state.host_mem_size / v3_mem_block_size < REGION_WARN_THRESH) {
212 V3_Print(vm,VCORE_NONE,"swapper: WARNING: %llu regions is less than threshold of %llu, GUEST MAY FAIL TO MAKE PROGRESS\n",
213 (uint64_t)vm->swap_state.host_mem_size/v3_mem_block_size, (uint64_t)REGION_WARN_THRESH);
220 int v3_deinit_swapping_vm(struct v3_vm_info *vm)
222 PrintDebug(vm, VCORE_NONE, "swapper: vm deinit\n");
224 if (vm->swap_state.enable_swapping) {
225 v3_file_close(vm->swap_state.swapfd);
228 v3_lock_deinit(&(vm->swap_state.lock));
234 int v3_pin_region(struct v3_vm_info *vm, struct v3_mem_region *region)
238 PrintDebug(vm,VCORE_NONE, "Pin Region GPA=%p to %p\n",(void*) region->guest_start, (void*)region->guest_end);
240 if (!(region->flags.base)) {
241 PrintError(vm,VCORE_NONE,"Attempt to pin non-base region\n");
245 if (region->flags.pinned) {
249 flags = v3_lock_irqsave(vm->swap_state.lock);
251 if (region->flags.swapped) {
252 // can't pin since it's swapped out, swap it in an try again
253 v3_unlock_irqrestore(vm->swap_state.lock, flags);
254 if (v3_swap_in_region(vm,region)) {
255 PrintError(vm,VCORE_NONE,"Cannot swap in during a pin operation\n");
258 return v3_pin_region(vm,region);
262 // still holding lock if we got here, so we're the exclusive
263 // manipulator of the swap state
264 region->flags.pinned=1;
266 v3_unlock_irqrestore(vm->swap_state.lock, flags);
272 int v3_unpin_region(struct v3_vm_info *vm, struct v3_mem_region *region)
274 unsigned int flags = v3_lock_irqsave(vm->swap_state.lock);
276 region->flags.pinned=0;
278 v3_unlock_irqrestore(vm->swap_state.lock,flags);
285 #define SEARCH_LIMIT 1024
287 // Must be called with the lock held
288 static struct v3_mem_region * choose_random_victim(struct v3_vm_info * vm)
291 struct v3_mem_map * map = &(vm->mem_map);
292 uint64_t num_base_regions = map->num_base_regions;
294 struct v3_mem_region *reg=0;
297 PrintDebug(vm, VCORE_NONE, "swapper: choosing random victim\n");
300 i<SEARCH_LIMIT && reg==0 ;
303 // cycle counter used as pseudorandom number generator
306 reg = &(map->base_regions[thetime % num_base_regions]);
308 if (reg->flags.swapped || reg->flags.pinned) {
309 // region is already swapped or is pinned - try again
315 PrintError(vm,VCORE_NONE,"swapper: Unable to find a random victim\n");
317 PrintDebug(vm,VCORE_NONE,"swapper: Random victim GPA=%p to %p\n", (void*)reg->guest_start, (void*)reg->guest_end);
324 // Must be called with the lock held
325 static struct v3_mem_region * choose_next_victim(struct v3_vm_info * vm)
327 struct v3_mem_map * map = &(vm->mem_map);
328 uint64_t num_base_regions = map->num_base_regions;
329 struct v3_mem_region *reg=0;
332 PrintDebug(vm, VCORE_NONE, "swapper: choosing next victim\n");
335 for (i=vm->swap_state.last_region_used+1, reg=0;
336 i<num_base_regions && reg==0;
339 reg = &(map->base_regions[i]);
341 if (reg->flags.swapped || reg->flags.pinned) {
342 // region is already swapped or is pinned - try again
348 i < vm->swap_state.last_region_used+1 && reg==0;
351 reg = &(map->base_regions[i]);
353 if (reg->flags.swapped || reg->flags.pinned) {
354 // region is already swapped or is pinned - try again
360 PrintError(vm,VCORE_NONE,"swapper: Unable to find the next victim\n");
362 PrintDebug(vm,VCORE_NONE,"swapper: Next victim GPA=%p to %p\n", (void*)reg->guest_start, (void*)reg->guest_end);
368 // Must be called with the lock held
369 static struct v3_mem_region * choose_lru_victim(struct v3_vm_info * vm)
371 struct v3_mem_map * map = &(vm->mem_map);
372 uint64_t num_base_regions = map->num_base_regions;
373 struct v3_mem_region *reg=0;
374 struct v3_mem_region *oldest_reg=0;
376 uint64_t oldest_time;
378 PrintDebug(vm, VCORE_NONE, "swapper: choosing pseudo-lru victim\n");
381 for (i=0, oldest_time=0, oldest_reg=0;
385 reg = &(map->base_regions[i]);
387 if (reg->flags.swapped || reg->flags.pinned) {
389 reg->swap_state.last_accessed < oldest_time) {
391 oldest_time = reg->swap_state.last_accessed;
398 PrintError(vm,VCORE_NONE,"swapper: Unable to find pseudo-lru victim\n");
400 PrintDebug(vm,VCORE_NONE,"swapper: Pseudo-lru victim GPA=%p to %p\n", (void*)oldest_reg->guest_start, (void*)oldest_reg->guest_end);
407 // Must be called with the lock held
408 static struct v3_mem_region * choose_victim(struct v3_vm_info * vm)
410 switch (vm->swap_state.strategy) {
411 case V3_SWAP_NEXT_FIT:
412 return choose_next_victim(vm);
415 return choose_random_victim(vm);
418 return choose_lru_victim(vm);
421 return choose_random_victim(vm);
427 // swaps out region, and marks it as swapped and pinned
428 // no lock should be held
429 static int swap_out_region_internal(struct v3_vm_info *vm, struct v3_mem_region *victim, int ignore_pinning)
435 flags = v3_lock_irqsave(vm->swap_state.lock);
437 if (victim->flags.swapped) {
438 v3_unlock_irqrestore(vm->swap_state.lock,flags);
439 PrintDebug(vm,VCORE_NONE,"swapper: swap out already swapped out region\n");
443 if (!ignore_pinning && victim->flags.pinned) {
444 v3_unlock_irqrestore(vm->swap_state.lock,flags);
445 PrintError(vm,VCORE_NONE,"swapper: attempt to swap out pinned region\n");
449 // now mark it as pinned until we are done with it.
450 victim->flags.pinned=1;
452 // release lock - it's marked pinned so nothing else will touch it
453 v3_unlock_irqrestore(vm->swap_state.lock,flags);
455 // do NOT do this without irqs on...
456 if (write_all(vm->swap_state.swapfd,
457 (uint8_t *)V3_VAddr((void *)victim->host_addr),
458 victim->guest_end - victim->guest_start,
459 victim->guest_start)) {
460 PrintError(vm, VCORE_NONE, "swapper: failed to swap out victim"); //write victim to disk
461 // note write only here - it returns unswapped and unpinned
462 victim->flags.pinned=0;
468 //Invalidate the victim on all cores
470 for (i=0, fail=0; i<vm->num_cores;i++ ) {
471 struct guest_info * core = &(vm->cores[i]);
474 if (core->shdw_pg_mode == SHADOW_PAGING) {
475 v3_mem_mode_t mem_mode = v3_get_vm_mem_mode(core);
476 if (mem_mode == PHYSICAL_MEM) {
477 PrintDebug(vm, VCORE_NONE, "swapper: v3_invalidate_passthrough_addr_range() called for core %d",i);
478 rc = v3_invalidate_passthrough_addr_range(core, victim->guest_start, victim->guest_end-1,NULL,NULL );
480 PrintDebug(vm, VCORE_NONE, "swapper: v3_invalidate_shadow_pts() called for core %d",i);
481 rc = v3_invalidate_shadow_pts(core);
483 } else if (core->shdw_pg_mode == NESTED_PAGING) {
484 PrintDebug(vm, VCORE_NONE, "swapper: v3_invalidate_nested_addr_range() called for core %d",i);
485 rc = v3_invalidate_nested_addr_range(core, victim->guest_start, victim->guest_end-1,NULL,NULL );
489 PrintError(vm,VCORE_NONE,"swapper: paging invalidation failed for victim on core %d.... continuing, but this is not good.\n", i);
494 victim->flags.swapped=1; // now it is in "swapped + pinned" state, meaning it has been written and is now holding for future use
504 // swaps out region, and marks it as swapped
505 int v3_swap_out_region(struct v3_vm_info *vm, struct v3_mem_region *victim)
507 if (!victim->flags.base) {
508 PrintError(vm, VCORE_NONE,"swapper: can only swap out base regions\n");
512 if (victim->flags.pinned) {
513 PrintError(vm, VCORE_NONE,"swapper: cannot swap out a pinned region\n");
517 if (swap_out_region_internal(vm,victim,0)) {
518 PrintError(vm, VCORE_NONE,"swapper: failed to swap out victim.... bad\n");
522 // victim now has its old info, and is marked swapped and pinned
524 victim->host_addr = 0;
525 victim->flags.pinned = 0;
527 // now is simply swapped
533 int v3_swap_in_region(struct v3_vm_info *vm, struct v3_mem_region *perp)
536 struct v3_mem_region *victim;
538 flags = v3_lock_irqsave(vm->swap_state.lock);
540 if (!perp->flags.base) {
541 v3_unlock_irqrestore(vm->swap_state.lock,flags);
542 PrintError(vm,VCORE_NONE,"swapper: can only swap in base regions\n");
546 if (!perp->flags.swapped) {
547 v3_unlock_irqrestore(vm->swap_state.lock,flags);
548 PrintDebug(vm,VCORE_NONE,"swapper: region is already swapped in\n");
552 // while still holding the lock, we will pin it to make sure no one
553 // else will attempt to swap in a race with us
554 perp->flags.pinned=1;
556 victim = choose_victim(vm);
559 perp->flags.pinned=0; // leave perp swapped
560 v3_unlock_irqrestore(vm->swap_state.lock,flags);
561 PrintError(vm,VCORE_NONE,"swapper: cannot find victim\n");
565 victim->flags.pinned=1;
568 // update the next fit info
569 // pointer arith in units of relevant structs...
570 vm->swap_state.last_region_used = (victim - &(vm->mem_map.base_regions[0]));
573 // Now we hold both the perp and the victim (pinned)
574 // and so can release the lcok
575 v3_unlock_irqrestore(vm->swap_state.lock,flags);
578 if (swap_out_region_internal(vm,victim,1)) { // ignore that the victim is marked pinned
579 PrintError(vm, VCORE_NONE,"swapper: failed to swap out victim.... bad\n");
583 // victim is still marked pinned
586 perp->host_addr = victim->host_addr;
587 victim->host_addr = 0;
588 // and we're done, so release it
589 victim->flags.swapped=1;
590 victim->flags.pinned=0;
593 // Now swap in the perp
595 if (read_all(vm->swap_state.swapfd,
596 (uint8_t *)V3_VAddr((void *)perp->host_addr),
597 perp->guest_end - perp->guest_start,
598 perp->guest_start)) {
600 PrintError(vm, VCORE_NONE, "swapper: swap in of region failed!\n");
601 // leave it swapped, but unpin the memory...
602 perp->flags.pinned = 0;
605 perp->flags.swapped = 0; // perp is now OK, so release it
606 perp->flags.pinned = 0;
607 vm->swap_state.swap_count++;
614 void v3_touch_region(struct v3_vm_info *vm, struct v3_mem_region *region)
616 // should be uniform host time, not per core...
617 rdtscll(region->swap_state.last_accessed);