palacios/src/palacios/vmm_mem.c

   1 /*
   2  * This file is part of the Palacios Virtual Machine Monitor developed
   3  * by the V3VEE Project with funding from the United States National
   4  * Science Foundation and the Department of Energy.
   5  *
   6  * The V3VEE Project is a joint project between Northwestern University
   7  * and the University of New Mexico.  You can find out more at
   8  * http://www.v3vee.org
   9  *
  10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
  11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
  12  * All rights reserved.
  13  *
  14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
  15  *
  16  * This is free software.  You are permitted to use,
  17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  18  */
  19
  20 #include <palacios/vmm_mem.h>
  21 #include <palacios/vmm.h>
  22 #include <palacios/vmm_util.h>
  23 #include <palacios/vmm_emulator.h>
  24 #include <palacios/vm_guest.h>
  25 #include <palacios/vmm_debug.h>
  26
  27 #include <palacios/vmm_shadow_paging.h>
  28 #include <palacios/vmm_direct_paging.h>
  29
  30
  31
  32
  33 static int mem_offset_hypercall(struct guest_info * info, uint_t hcall_id, void * private_data) {
  34     PrintDebug("V3Vee: Memory offset hypercall (offset=%p)\n",
  35                (void *)(info->vm_info->mem_map.base_region.host_addr));
  36
  37     info->vm_regs.rbx = info->vm_info->mem_map.base_region.host_addr;
  38
  39     return 0;
  40 }
  41
  42 static int unhandled_err(struct guest_info * core, addr_t guest_va, addr_t guest_pa,
  43                          struct v3_mem_region * reg, pf_error_t access_info) {
  44
  45     PrintError("Unhandled memory access error (gpa=%p, gva=%p, error_code=%d)\n",
  46                (void *)guest_pa, (void *)guest_va, *(uint32_t *)&access_info);
  47
  48     v3_print_mem_map(core->vm_info);
  49
  50     v3_print_guest_state(core);
  51
  52     return -1;
  53 }
  54
  55 int v3_init_mem_map(struct v3_vm_info * vm) {
  56     struct v3_mem_map * map = &(vm->mem_map);
  57     addr_t mem_pages = vm->mem_size >> 12;
  58
  59     memset(&(map->base_region), 0, sizeof(struct v3_mem_region));
  60
  61     map->mem_regions.rb_node = NULL;
  62
  63     // There is an underlying region that contains all of the guest memory
  64     // PrintDebug("Mapping %d pages of memory (%u bytes)\n", (int)mem_pages, (uint_t)info->mem_size);
  65
  66     // 2MB page alignment needed for 2MB hardware nested paging
  67     map->base_region.guest_start = 0;
  68     map->base_region.guest_end = mem_pages * PAGE_SIZE_4KB;
  69
  70 #ifdef V3_CONFIG_ALIGNED_PG_ALLOC
  71     map->base_region.host_addr = (addr_t)V3_AllocAlignedPages(mem_pages, vm->mem_align);
  72 #else
  73     map->base_region.host_addr = (addr_t)V3_AllocPages(mem_pages);
  74 #endif
  75
  76     if ((void*)map->base_region.host_addr == NULL) {
  77        PrintError("Could not allocate guest memory\n");
  78        return -1;
  79     }
  80
  81     // Clear the memory...
  82     memset(V3_VAddr((void *)map->base_region.host_addr), 0, mem_pages * PAGE_SIZE_4KB);
  83
  84
  85     map->base_region.flags.read = 1;
  86     map->base_region.flags.write = 1;
  87     map->base_region.flags.exec = 1;
  88     map->base_region.flags.base = 1;
  89     map->base_region.flags.alloced = 1;
  90
  91     map->base_region.unhandled = unhandled_err;
  92
  93     v3_register_hypercall(vm, MEM_OFFSET_HCALL, mem_offset_hypercall, NULL);
  94
  95     return 0;
  96 }
  97
  98
  99 void v3_delete_mem_map(struct v3_vm_info * vm) {
 100     struct rb_node * node = v3_rb_first(&(vm->mem_map.mem_regions));
 101     struct v3_mem_region * reg;
 102     struct rb_node * tmp_node = NULL;
 103     addr_t mem_pages = vm->mem_size >> 12;
 104
 105     while (node) {
 106         reg = rb_entry(node, struct v3_mem_region, tree_node);
 107         tmp_node = node;
 108         node = v3_rb_next(node);
 109
 110         v3_delete_mem_region(vm, reg);
 111     }
 112
 113     V3_FreePages((void *)(vm->mem_map.base_region.host_addr), mem_pages);
 114 }
 115
 116
 117 struct v3_mem_region * v3_create_mem_region(struct v3_vm_info * vm, uint16_t core_id,
 118                                                addr_t guest_addr_start, addr_t guest_addr_end) {
 119     struct v3_mem_region * entry = NULL;
 120
 121     if (guest_addr_start >= guest_addr_end) {
 122         PrintError("Region start is after region end\n");
 123         return NULL;
 124     }
 125
 126     entry = (struct v3_mem_region *)V3_Malloc(sizeof(struct v3_mem_region));
 127     memset(entry, 0, sizeof(struct v3_mem_region));
 128
 129     entry->guest_start = guest_addr_start;
 130     entry->guest_end = guest_addr_end;
 131     entry->core_id = core_id;
 132     entry->unhandled = unhandled_err;
 133
 134     return entry;
 135 }
 136
 137
 138
 139
 140 int v3_add_shadow_mem( struct v3_vm_info * vm, uint16_t core_id,
 141                        addr_t               guest_addr_start,
 142                        addr_t               guest_addr_end,
 143                        addr_t               host_addr)
 144 {
 145     struct v3_mem_region * entry = NULL;
 146
 147     entry = v3_create_mem_region(vm, core_id,
 148                                  guest_addr_start,
 149                                  guest_addr_end);
 150
 151     entry->host_addr = host_addr;
 152
 153     entry->flags.read = 1;
 154     entry->flags.write = 1;
 155     entry->flags.exec = 1;
 156     entry->flags.alloced = 1;
 157
 158     if (v3_insert_mem_region(vm, entry) == -1) {
 159         V3_Free(entry);
 160         return -1;
 161     }
 162
 163     return 0;
 164 }
 165
 166
 167
 168 static inline
 169 struct v3_mem_region * __insert_mem_region(struct v3_vm_info * vm,
 170                                            struct v3_mem_region * region) {
 171     struct rb_node ** p = &(vm->mem_map.mem_regions.rb_node);
 172     struct rb_node * parent = NULL;
 173     struct v3_mem_region * tmp_region;
 174
 175     while (*p) {
 176         parent = *p;
 177         tmp_region = rb_entry(parent, struct v3_mem_region, tree_node);
 178
 179         if (region->guest_end <= tmp_region->guest_start) {
 180             p = &(*p)->rb_left;
 181         } else if (region->guest_start >= tmp_region->guest_end) {
 182             p = &(*p)->rb_right;
 183         } else {
 184             if ((region->guest_end != tmp_region->guest_end) ||
 185                 (region->guest_start != tmp_region->guest_start)) {
 186                 PrintError("Trying to map a partial overlapped core specific page...\n");
 187                 return tmp_region; // This is ugly...
 188             } else if (region->core_id == tmp_region->core_id) {
 189                 return tmp_region;
 190             } else if (region->core_id < tmp_region->core_id) {
 191                 p = &(*p)->rb_left;
 192             } else {
 193                 p = &(*p)->rb_right;
 194             }
 195         }
 196     }
 197
 198     rb_link_node(&(region->tree_node), parent, p);
 199
 200     return NULL;
 201 }
 202
 203
 204
 205 int v3_insert_mem_region(struct v3_vm_info * vm, struct v3_mem_region * region) {
 206     struct v3_mem_region * ret;
 207     int i = 0;
 208
 209     if ((ret = __insert_mem_region(vm, region))) {
 210         return -1;
 211     }
 212
 213     v3_rb_insert_color(&(region->tree_node), &(vm->mem_map.mem_regions));
 214
 215
 216
 217     for (i = 0; i < vm->num_cores; i++) {
 218         struct guest_info * info = &(vm->cores[i]);
 219
 220         // flush virtual page tables
 221         // 3 cases shadow, shadow passthrough, and nested
 222
 223         if (info->shdw_pg_mode == SHADOW_PAGING) {
 224             v3_mem_mode_t mem_mode = v3_get_vm_mem_mode(info);
 225
 226             if (mem_mode == PHYSICAL_MEM) {
 227                 addr_t cur_addr;
 228
 229                 for (cur_addr = region->guest_start;
 230                      cur_addr < region->guest_end;
 231                      cur_addr += PAGE_SIZE_4KB) {
 232                     v3_invalidate_passthrough_addr(info, cur_addr);
 233                 }
 234             } else {
 235                 v3_invalidate_shadow_pts(info);
 236             }
 237
 238         } else if (info->shdw_pg_mode == NESTED_PAGING) {
 239             addr_t cur_addr;
 240
 241             for (cur_addr = region->guest_start;
 242                  cur_addr < region->guest_end;
 243                  cur_addr += PAGE_SIZE_4KB) {
 244
 245                 v3_invalidate_nested_addr(info, cur_addr);
 246             }
 247         }
 248     }
 249
 250     return 0;
 251 }
 252
 253
 254
 255
 256 struct v3_mem_region * v3_get_mem_region(struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr) {
 257     struct rb_node * n = vm->mem_map.mem_regions.rb_node;
 258     struct v3_mem_region * reg = NULL;
 259
 260     while (n) {
 261
 262         reg = rb_entry(n, struct v3_mem_region, tree_node);
 263
 264         if (guest_addr < reg->guest_start) {
 265             n = n->rb_left;
 266         } else if (guest_addr >= reg->guest_end) {
 267             n = n->rb_right;
 268         } else {
 269             if (reg->core_id == V3_MEM_CORE_ANY) {
 270                 // found relevant region, it's available on all cores
 271                 return reg;
 272             } else if (core_id == reg->core_id) {
 273                 // found relevant region, it's available on the indicated core
 274                 return reg;
 275             } else if (core_id < reg->core_id) {
 276                 // go left, core too big
 277                 n = n->rb_left;
 278             } else if (core_id > reg->core_id) {
 279                 // go right, core too small
 280                 n = n->rb_right;
 281             } else {
 282                 PrintDebug("v3_get_mem_region: Impossible!\n");
 283                 return NULL;
 284             }
 285         }
 286     }
 287
 288
 289     // There is not registered region, so we check if its a valid address in the base region
 290
 291     if (guest_addr > vm->mem_map.base_region.guest_end) {
 292         PrintError("Guest Address Exceeds Base Memory Size (ga=0x%p), (limit=0x%p) (core=0x%x)\n",
 293                    (void *)guest_addr, (void *)vm->mem_map.base_region.guest_end, core_id);
 294         v3_print_mem_map(vm);
 295
 296         return NULL;
 297     }
 298
 299     return &(vm->mem_map.base_region);
 300 }
 301
 302
 303
 304 /* This returns the next memory region based on a given address.
 305  * If the address falls inside a sub region, that region is returned.
 306  * If the address falls outside a sub region, the next sub region is returned
 307  * NOTE that we have to be careful about core_ids here...
 308  */
 309 static struct v3_mem_region * get_next_mem_region( struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr) {
 310     struct rb_node * n = vm->mem_map.mem_regions.rb_node;
 311     struct v3_mem_region * reg = NULL;
 312     struct v3_mem_region * parent = NULL;
 313
 314     if (n == NULL) {
 315         return NULL;
 316     }
 317
 318     while (n) {
 319
 320         reg = rb_entry(n, struct v3_mem_region, tree_node);
 321
 322         if (guest_addr < reg->guest_start) {
 323             n = n->rb_left;
 324         } else if (guest_addr >= reg->guest_end) {
 325             n = n->rb_right;
 326         } else {
 327             if (reg->core_id == V3_MEM_CORE_ANY) {
 328                 // found relevant region, it's available on all cores
 329                 return reg;
 330             } else if (core_id == reg->core_id) {
 331                 // found relevant region, it's available on the indicated core
 332                 return reg;
 333             } else if (core_id < reg->core_id) {
 334                 // go left, core too big
 335                 n = n->rb_left;
 336             } else if (core_id > reg->core_id) {
 337                 // go right, core too small
 338                 n = n->rb_right;
 339             } else {
 340                 PrintError("v3_get_mem_region: Impossible!\n");
 341                 return NULL;
 342             }
 343         }
 344
 345         if ((reg->core_id == core_id) || (reg->core_id == V3_MEM_CORE_ANY)) {
 346             parent = reg;
 347         }
 348     }
 349
 350
 351     if (parent->guest_start > guest_addr) {
 352         return parent;
 353     } else if (parent->guest_end < guest_addr) {
 354         struct rb_node * node = &(parent->tree_node);
 355
 356         while ((node = v3_rb_next(node)) != NULL) {
 357             struct v3_mem_region * next_reg = rb_entry(node, struct v3_mem_region, tree_node);
 358
 359             if ((next_reg->core_id == V3_MEM_CORE_ANY) ||
 360                 (next_reg->core_id == core_id)) {
 361
 362                 // This check is not strictly necessary, but it makes it clearer
 363                 if (next_reg->guest_start > guest_addr) {
 364                     return next_reg;
 365                 }
 366             }
 367         }
 368     }
 369
 370     return NULL;
 371 }
 372
 373
 374
 375
 376 /* Given an address region of memory, find if there are any regions that overlap with it.
 377  * This checks that the range lies in a single region, and returns that region if it does,
 378  * this can be either the base region or a sub region.
 379  * IF there are multiple regions in the range then it returns NULL
 380  */
 381 static struct v3_mem_region * get_overlapping_region(struct v3_vm_info * vm, uint16_t core_id,
 382                                                      addr_t start_gpa, addr_t end_gpa) {
 383     struct v3_mem_region * start_region = v3_get_mem_region(vm, core_id, start_gpa);
 384
 385     if (start_region == NULL) {
 386         PrintError("Invalid memory region\n");
 387         return NULL;
 388     }
 389
 390
 391     if (start_region->guest_end < end_gpa) {
 392         // Region ends before range
 393         return NULL;
 394     } else if (start_region->flags.base == 0) {
 395         // sub region overlaps range
 396         return start_region;
 397     } else {
 398         // Base region, now we have to scan forward for the next sub region
 399         struct v3_mem_region * next_reg = get_next_mem_region(vm, core_id, start_gpa);
 400
 401         if (next_reg == NULL) {
 402             // no sub regions after start_addr, base region is ok
 403             return start_region;
 404         } else if (next_reg->guest_start >= end_gpa) {
 405             // Next sub region begins outside range
 406             return start_region;
 407         } else {
 408             return NULL;
 409         }
 410     }
 411
 412
 413     // Should never get here
 414     return NULL;
 415 }
 416
 417
 418
 419
 420
 421 void v3_delete_mem_region(struct v3_vm_info * vm, struct v3_mem_region * reg) {
 422     int i = 0;
 423
 424     if (reg == NULL) {
 425         return;
 426     }
 427
 428
 429     v3_rb_erase(&(reg->tree_node), &(vm->mem_map.mem_regions));
 430
 431
 432
 433     // If the guest isn't running then there shouldn't be anything to invalidate.
 434     // Page tables should __always__ be created on demand during execution
 435     // NOTE: This is a sanity check, and can be removed if that assumption changes
 436     if (vm->run_state != VM_RUNNING) {
 437         V3_Free(reg);
 438         return;
 439     }
 440
 441     for (i = 0; i < vm->num_cores; i++) {
 442         struct guest_info * info = &(vm->cores[i]);
 443
 444         // flush virtual page tables
 445         // 3 cases shadow, shadow passthrough, and nested
 446
 447         if (info->shdw_pg_mode == SHADOW_PAGING) {
 448             v3_mem_mode_t mem_mode = v3_get_vm_mem_mode(info);
 449
 450             if (mem_mode == PHYSICAL_MEM) {
 451                 addr_t cur_addr;
 452
 453                 for (cur_addr = reg->guest_start;
 454                      cur_addr < reg->guest_end;
 455                      cur_addr += PAGE_SIZE_4KB) {
 456                     v3_invalidate_passthrough_addr(info, cur_addr);
 457                 }
 458             } else {
 459                 v3_invalidate_shadow_pts(info);
 460             }
 461
 462         } else if (info->shdw_pg_mode == NESTED_PAGING) {
 463             addr_t cur_addr;
 464
 465             for (cur_addr = reg->guest_start;
 466                  cur_addr < reg->guest_end;
 467                  cur_addr += PAGE_SIZE_4KB) {
 468
 469                 v3_invalidate_nested_addr(info, cur_addr);
 470             }
 471         }
 472     }
 473
 474     V3_Free(reg);
 475
 476     // flush virtual page tables
 477     // 3 cases shadow, shadow passthrough, and nested
 478
 479 }
 480
 481 // Determine if a given address can be handled by a large page of the requested size
 482 uint32_t v3_get_max_page_size(struct guest_info * core, addr_t page_addr, v3_cpu_mode_t mode) {
 483     addr_t pg_start = 0;
 484     addr_t pg_end = 0;
 485     uint32_t page_size = PAGE_SIZE_4KB;
 486     struct v3_mem_region * reg = NULL;
 487
 488     switch (mode) {
 489         case PROTECTED:
 490             if (core->use_large_pages == 1) {
 491                 pg_start = PAGE_ADDR_4MB(page_addr);
 492                 pg_end = (pg_start + PAGE_SIZE_4MB);
 493
 494                 reg = get_overlapping_region(core->vm_info, core->vcpu_id, pg_start, pg_end);
 495
 496                 if ((reg) && ((reg->host_addr % PAGE_SIZE_4MB) == 0)) {
 497                     page_size = PAGE_SIZE_4MB;
 498                 }
 499             }
 500             break;
 501         case PROTECTED_PAE:
 502             if (core->use_large_pages == 1) {
 503                 pg_start = PAGE_ADDR_2MB(page_addr);
 504                 pg_end = (pg_start + PAGE_SIZE_2MB);
 505
 506                 reg = get_overlapping_region(core->vm_info, core->vcpu_id, pg_start, pg_end);
 507
 508                 if ((reg) && ((reg->host_addr % PAGE_SIZE_2MB) == 0)) {
 509                     page_size = PAGE_SIZE_2MB;
 510                 }
 511             }
 512             break;
 513         case LONG:
 514         case LONG_32_COMPAT:
 515         case LONG_16_COMPAT:
 516             if (core->use_giant_pages == 1) {
 517                 pg_start = PAGE_ADDR_1GB(page_addr);
 518                 pg_end = (pg_start + PAGE_SIZE_1GB);
 519
 520                 reg = get_overlapping_region(core->vm_info, core->vcpu_id, pg_start, pg_end);
 521
 522                 if ((reg) && ((reg->host_addr % PAGE_SIZE_1GB) == 0)) {
 523                     page_size = PAGE_SIZE_1GB;
 524                     break;
 525                 }
 526             }
 527
 528             if (core->use_large_pages == 1) {
 529                 pg_start = PAGE_ADDR_2MB(page_addr);
 530                 pg_end = (pg_start + PAGE_SIZE_2MB);
 531
 532                 reg = get_overlapping_region(core->vm_info, core->vcpu_id, pg_start, pg_end);
 533
 534                 if ((reg) && ((reg->host_addr % PAGE_SIZE_2MB) == 0)) {
 535                     page_size = PAGE_SIZE_2MB;
 536                 }
 537             }
 538             break;
 539         default:
 540             PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
 541             return -1;
 542     }
 543
 544     return page_size;
 545 }
 546
 547
 548
 549 void v3_print_mem_map(struct v3_vm_info * vm) {
 550     struct rb_node * node = v3_rb_first(&(vm->mem_map.mem_regions));
 551     struct v3_mem_region * reg = &(vm->mem_map.base_region);
 552     int i = 0;
 553
 554     V3_Print("Memory Layout (all cores):\n");
 555
 556
 557     V3_Print("Base Region (all cores):  0x%p - 0x%p -> 0x%p\n",
 558                (void *)(reg->guest_start),
 559                (void *)(reg->guest_end - 1),
 560                (void *)(reg->host_addr));
 561
 562
 563     // If the memory map is empty, don't print it
 564     if (node == NULL) {
 565         return;
 566     }
 567
 568     do {
 569         reg = rb_entry(node, struct v3_mem_region, tree_node);
 570
 571         V3_Print("%d:  0x%p - 0x%p -> 0x%p\n", i,
 572                    (void *)(reg->guest_start),
 573                    (void *)(reg->guest_end - 1),
 574                    (void *)(reg->host_addr));
 575
 576         V3_Print("\t(flags=0x%x) (core=0x%x) (unhandled = 0x%p)\n",
 577                  reg->flags.value,
 578                  reg->core_id,
 579                  reg->unhandled);
 580
 581         i++;
 582     } while ((node = v3_rb_next(node)));
 583 }
 584