palacios/src/palacios/vmm_mem.c

   1 /*
   2  * This file is part of the Palacios Virtual Machine Monitor developed
   3  * by the V3VEE Project with funding from the United States National
   4  * Science Foundation and the Department of Energy.
   5  *
   6  * The V3VEE Project is a joint project between Northwestern University
   7  * and the University of New Mexico.  You can find out more at
   8  * http://www.v3vee.org
   9  *
  10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
  11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
  12  * All rights reserved.
  13  *
  14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
  15  *
  16  * This is free software.  You are permitted to use,
  17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  18  */
  19
  20 #include <palacios/vmm_mem.h>
  21 #include <palacios/vmm.h>
  22 #include <palacios/vmm_util.h>
  23 #include <palacios/vmm_emulator.h>
  24 #include <palacios/vm_guest.h>
  25
  26 #include <palacios/vmm_shadow_paging.h>
  27 #include <palacios/vmm_direct_paging.h>
  28
  29
  30
  31
  32 static int mem_offset_hypercall(struct guest_info * info, uint_t hcall_id, void * private_data) {
  33     PrintDebug("V3Vee: Memory offset hypercall (offset=%p)\n",
  34                (void *)(info->vm_info->mem_map.base_region.host_addr));
  35
  36     info->vm_regs.rbx = info->vm_info->mem_map.base_region.host_addr;
  37
  38     return 0;
  39 }
  40
  41 static int unhandled_err(struct guest_info * core, addr_t guest_va, addr_t guest_pa,
  42                          struct v3_mem_region * reg, pf_error_t access_info) {
  43
  44     PrintError("Unhandled memory access error (gpa=%p, gva=%p, error_code=%d)\n",
  45                (void *)guest_pa, (void *)guest_va, *(uint32_t *)&access_info);
  46
  47     v3_print_mem_map(core->vm_info);
  48
  49     v3_print_guest_state(core);
  50
  51     return -1;
  52 }
  53
  54 int v3_init_mem_map(struct v3_vm_info * vm) {
  55     struct v3_mem_map * map = &(vm->mem_map);
  56     addr_t mem_pages = vm->mem_size >> 12;
  57
  58     memset(&(map->base_region), 0, sizeof(struct v3_mem_region));
  59
  60     map->mem_regions.rb_node = NULL;
  61
  62     // There is an underlying region that contains all of the guest memory
  63     // PrintDebug("Mapping %d pages of memory (%u bytes)\n", (int)mem_pages, (uint_t)info->mem_size);
  64
  65     // 2MB page alignment needed for 2MB hardware nested paging
  66     map->base_region.guest_start = 0;
  67     map->base_region.guest_end = mem_pages * PAGE_SIZE_4KB;
  68
  69 #ifdef V3_CONFIG_ALIGNED_PG_ALLOC
  70     map->base_region.host_addr = (addr_t)V3_AllocAlignedPages(mem_pages, vm->mem_align);
  71 #else
  72     map->base_region.host_addr = (addr_t)V3_AllocPages(mem_pages);
  73 #endif
  74
  75     map->base_region.flags.read = 1;
  76     map->base_region.flags.write = 1;
  77     map->base_region.flags.exec = 1;
  78     map->base_region.flags.base = 1;
  79     map->base_region.flags.alloced = 1;
  80
  81     map->base_region.unhandled = unhandled_err;
  82
  83     if ((void *)map->base_region.host_addr == NULL) {
  84         PrintError("Could not allocate Guest memory\n");
  85         return -1;
  86     }
  87
  88     //memset(V3_VAddr((void *)map->base_region.host_addr), 0xffffffff, map->base_region.guest_end);
  89
  90     v3_register_hypercall(vm, MEM_OFFSET_HCALL, mem_offset_hypercall, NULL);
  91
  92     return 0;
  93 }
  94
  95
  96 void v3_delete_mem_map(struct v3_vm_info * vm) {
  97     struct rb_node * node = v3_rb_first(&(vm->mem_map.mem_regions));
  98     struct v3_mem_region * reg;
  99     struct rb_node * tmp_node = NULL;
 100     addr_t mem_pages = vm->mem_size >> 12;
 101
 102     while (node) {
 103         reg = rb_entry(node, struct v3_mem_region, tree_node);
 104         tmp_node = node;
 105         node = v3_rb_next(node);
 106
 107         v3_delete_mem_region(vm, reg);
 108     }
 109
 110     V3_FreePages((void *)(vm->mem_map.base_region.host_addr), mem_pages);
 111 }
 112
 113
 114 struct v3_mem_region * v3_create_mem_region(struct v3_vm_info * vm, uint16_t core_id,
 115                                                addr_t guest_addr_start, addr_t guest_addr_end) {
 116
 117     struct v3_mem_region * entry = (struct v3_mem_region *)V3_Malloc(sizeof(struct v3_mem_region));
 118     memset(entry, 0, sizeof(struct v3_mem_region));
 119
 120     entry->guest_start = guest_addr_start;
 121     entry->guest_end = guest_addr_end;
 122     entry->core_id = core_id;
 123     entry->unhandled = unhandled_err;
 124
 125     return entry;
 126 }
 127
 128
 129
 130
 131 int v3_add_shadow_mem( struct v3_vm_info * vm, uint16_t core_id,
 132                        addr_t               guest_addr_start,
 133                        addr_t               guest_addr_end,
 134                        addr_t               host_addr)
 135 {
 136     struct v3_mem_region * entry = NULL;
 137
 138     entry = v3_create_mem_region(vm, core_id,
 139                                  guest_addr_start,
 140                                  guest_addr_end);
 141
 142     entry->host_addr = host_addr;
 143
 144     entry->flags.read = 1;
 145     entry->flags.write = 1;
 146     entry->flags.exec = 1;
 147     entry->flags.alloced = 1;
 148
 149     if (v3_insert_mem_region(vm, entry) == -1) {
 150         V3_Free(entry);
 151         return -1;
 152     }
 153
 154     return 0;
 155 }
 156
 157
 158
 159 static inline
 160 struct v3_mem_region * __insert_mem_region(struct v3_vm_info * vm,
 161                                            struct v3_mem_region * region) {
 162     struct rb_node ** p = &(vm->mem_map.mem_regions.rb_node);
 163     struct rb_node * parent = NULL;
 164     struct v3_mem_region * tmp_region;
 165
 166     while (*p) {
 167         parent = *p;
 168         tmp_region = rb_entry(parent, struct v3_mem_region, tree_node);
 169
 170         if (region->guest_end <= tmp_region->guest_start) {
 171             p = &(*p)->rb_left;
 172         } else if (region->guest_start >= tmp_region->guest_end) {
 173             p = &(*p)->rb_right;
 174         } else {
 175             if ((region->guest_end != tmp_region->guest_end) ||
 176                 (region->guest_start != tmp_region->guest_start)) {
 177                 PrintError("Trying to map a partial overlapped core specific page...\n");
 178                 return tmp_region; // This is ugly...
 179             } else if (region->core_id == tmp_region->core_id) {
 180                 return tmp_region;
 181             } else if (region->core_id < tmp_region->core_id) {
 182                 p = &(*p)->rb_left;
 183             } else {
 184                 p = &(*p)->rb_right;
 185             }
 186         }
 187     }
 188
 189     rb_link_node(&(region->tree_node), parent, p);
 190
 191     return NULL;
 192 }
 193
 194
 195
 196 int v3_insert_mem_region(struct v3_vm_info * vm, struct v3_mem_region * region) {
 197     struct v3_mem_region * ret;
 198     int i = 0;
 199
 200     if ((ret = __insert_mem_region(vm, region))) {
 201         return -1;
 202     }
 203
 204     v3_rb_insert_color(&(region->tree_node), &(vm->mem_map.mem_regions));
 205
 206
 207
 208     for (i = 0; i < vm->num_cores; i++) {
 209         struct guest_info * info = &(vm->cores[i]);
 210
 211         // flush virtual page tables
 212         // 3 cases shadow, shadow passthrough, and nested
 213
 214         if (info->shdw_pg_mode == SHADOW_PAGING) {
 215             v3_mem_mode_t mem_mode = v3_get_vm_mem_mode(info);
 216
 217             if (mem_mode == PHYSICAL_MEM) {
 218                 addr_t cur_addr;
 219
 220                 for (cur_addr = region->guest_start;
 221                      cur_addr < region->guest_end;
 222                      cur_addr += PAGE_SIZE_4KB) {
 223                     v3_invalidate_passthrough_addr(info, cur_addr);
 224                 }
 225             } else {
 226                 v3_invalidate_shadow_pts(info);
 227             }
 228
 229         } else if (info->shdw_pg_mode == NESTED_PAGING) {
 230             addr_t cur_addr;
 231
 232             for (cur_addr = region->guest_start;
 233                  cur_addr < region->guest_end;
 234                  cur_addr += PAGE_SIZE_4KB) {
 235
 236                 v3_invalidate_nested_addr(info, cur_addr);
 237             }
 238         }
 239     }
 240
 241     return 0;
 242 }
 243
 244
 245
 246
 247 struct v3_mem_region * v3_get_mem_region(struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr) {
 248     struct rb_node * n = vm->mem_map.mem_regions.rb_node;
 249     struct v3_mem_region * reg = NULL;
 250
 251     while (n) {
 252
 253         reg = rb_entry(n, struct v3_mem_region, tree_node);
 254
 255         if (guest_addr < reg->guest_start) {
 256             n = n->rb_left;
 257         } else if (guest_addr >= reg->guest_end) {
 258             n = n->rb_right;
 259         } else {
 260             if (reg->core_id == V3_MEM_CORE_ANY) {
 261                 // found relevant region, it's available on all cores
 262                 return reg;
 263             } else if (core_id == reg->core_id) {
 264                 // found relevant region, it's available on the indicated core
 265                 return reg;
 266             } else if (core_id < reg->core_id) {
 267                 // go left, core too big
 268                 n = n->rb_left;
 269             } else if (core_id > reg->core_id) {
 270                 // go right, core too small
 271                 n = n->rb_right;
 272             } else {
 273                 PrintDebug("v3_get_mem_region: Impossible!\n");
 274                 return NULL;
 275             }
 276         }
 277     }
 278
 279
 280     // There is not registered region, so we check if its a valid address in the base region
 281
 282     if (guest_addr > vm->mem_map.base_region.guest_end) {
 283         PrintError("Guest Address Exceeds Base Memory Size (ga=0x%p), (limit=0x%p) (core=0x%x)\n",
 284                    (void *)guest_addr, (void *)vm->mem_map.base_region.guest_end, core_id);
 285         v3_print_mem_map(vm);
 286
 287         return NULL;
 288     }
 289
 290     return &(vm->mem_map.base_region);
 291 }
 292
 293
 294
 295 /* This returns the next memory region based on a given address.
 296  * If the address falls inside a sub region, that region is returned.
 297  * If the address falls outside a sub region, the next sub region is returned
 298  * NOTE that we have to be careful about core_ids here...
 299  */
 300 static struct v3_mem_region * get_next_mem_region( struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr) {
 301     struct rb_node * n = vm->mem_map.mem_regions.rb_node;
 302     struct v3_mem_region * reg = NULL;
 303     struct v3_mem_region * parent = NULL;
 304
 305     if (n == NULL) {
 306         return NULL;
 307     }
 308
 309     while (n) {
 310
 311         reg = rb_entry(n, struct v3_mem_region, tree_node);
 312
 313         if (guest_addr < reg->guest_start) {
 314             n = n->rb_left;
 315         } else if (guest_addr >= reg->guest_end) {
 316             n = n->rb_right;
 317         } else {
 318             if (reg->core_id == V3_MEM_CORE_ANY) {
 319                 // found relevant region, it's available on all cores
 320                 return reg;
 321             } else if (core_id == reg->core_id) {
 322                 // found relevant region, it's available on the indicated core
 323                 return reg;
 324             } else if (core_id < reg->core_id) {
 325                 // go left, core too big
 326                 n = n->rb_left;
 327             } else if (core_id > reg->core_id) {
 328                 // go right, core too small
 329                 n = n->rb_right;
 330             } else {
 331                 PrintError("v3_get_mem_region: Impossible!\n");
 332                 return NULL;
 333             }
 334         }
 335
 336         if ((reg->core_id == core_id) || (reg->core_id == V3_MEM_CORE_ANY)) {
 337             parent = reg;
 338         }
 339     }
 340
 341
 342     if (parent->guest_start > guest_addr) {
 343         return parent;
 344     } else if (parent->guest_end < guest_addr) {
 345         struct rb_node * node = &(parent->tree_node);
 346
 347         while ((node = v3_rb_next(node)) != NULL) {
 348             struct v3_mem_region * next_reg = rb_entry(node, struct v3_mem_region, tree_node);
 349
 350             if ((next_reg->core_id == V3_MEM_CORE_ANY) ||
 351                 (next_reg->core_id == core_id)) {
 352
 353                 // This check is not strictly necessary, but it makes it clearer
 354                 if (next_reg->guest_start > guest_addr) {
 355                     return next_reg;
 356                 }
 357             }
 358         }
 359     }
 360
 361     return NULL;
 362 }
 363
 364
 365
 366
 367 /* Given an address region of memory, find if there are any regions that overlap with it.
 368  * This checks that the range lies in a single region, and returns that region if it does,
 369  * this can be either the base region or a sub region.
 370  * IF there are multiple regions in the range then it returns NULL
 371  */
 372 static struct v3_mem_region * get_overlapping_region(struct v3_vm_info * vm, uint16_t core_id,
 373                                                      addr_t start_gpa, addr_t end_gpa) {
 374     struct v3_mem_region * start_region = v3_get_mem_region(vm, core_id, start_gpa);
 375
 376     if (start_region == NULL) {
 377         PrintError("Invalid memory region\n");
 378         return NULL;
 379     }
 380
 381
 382     if (start_region->guest_end < end_gpa) {
 383         // Region ends before range
 384         return NULL;
 385     } else if (start_region->flags.base == 0) {
 386         // sub region overlaps range
 387         return start_region;
 388     } else {
 389         // Base region, now we have to scan forward for the next sub region
 390         struct v3_mem_region * next_reg = get_next_mem_region(vm, core_id, start_gpa);
 391
 392         if (next_reg == NULL) {
 393             // no sub regions after start_addr, base region is ok
 394             return start_region;
 395         } else if (next_reg->guest_start >= end_gpa) {
 396             // Next sub region begins outside range
 397             return start_region;
 398         } else {
 399             return NULL;
 400         }
 401     }
 402
 403
 404     // Should never get here
 405     return NULL;
 406 }
 407
 408
 409
 410
 411
 412 void v3_delete_mem_region(struct v3_vm_info * vm, struct v3_mem_region * reg) {
 413     int i = 0;
 414
 415     if (reg == NULL) {
 416         return;
 417     }
 418
 419
 420     v3_rb_erase(&(reg->tree_node), &(vm->mem_map.mem_regions));
 421
 422
 423
 424     // If the guest isn't running then there shouldn't be anything to invalidate.
 425     // Page tables should __always__ be created on demand during execution
 426     // NOTE: This is a sanity check, and can be removed if that assumption changes
 427     if (vm->run_state != VM_RUNNING) {
 428         V3_Free(reg);
 429         return;
 430     }
 431
 432     for (i = 0; i < vm->num_cores; i++) {
 433         struct guest_info * info = &(vm->cores[i]);
 434
 435         // flush virtual page tables
 436         // 3 cases shadow, shadow passthrough, and nested
 437
 438         if (info->shdw_pg_mode == SHADOW_PAGING) {
 439             v3_mem_mode_t mem_mode = v3_get_vm_mem_mode(info);
 440
 441             if (mem_mode == PHYSICAL_MEM) {
 442                 addr_t cur_addr;
 443
 444                 for (cur_addr = reg->guest_start;
 445                      cur_addr < reg->guest_end;
 446                      cur_addr += PAGE_SIZE_4KB) {
 447                     v3_invalidate_passthrough_addr(info, cur_addr);
 448                 }
 449             } else {
 450                 v3_invalidate_shadow_pts(info);
 451             }
 452
 453         } else if (info->shdw_pg_mode == NESTED_PAGING) {
 454             addr_t cur_addr;
 455
 456             for (cur_addr = reg->guest_start;
 457                  cur_addr < reg->guest_end;
 458                  cur_addr += PAGE_SIZE_4KB) {
 459
 460                 v3_invalidate_nested_addr(info, cur_addr);
 461             }
 462         }
 463     }
 464
 465     V3_Free(reg);
 466
 467     // flush virtual page tables
 468     // 3 cases shadow, shadow passthrough, and nested
 469
 470 }
 471
 472 // Determine if a given address can be handled by a large page of the requested size
 473 uint32_t v3_get_max_page_size(struct guest_info * core, addr_t page_addr, v3_cpu_mode_t mode) {
 474     addr_t pg_start = 0;
 475     addr_t pg_end = 0;
 476     uint32_t page_size = PAGE_SIZE_4KB;
 477     struct v3_mem_region * reg = NULL;
 478
 479     switch (mode) {
 480         case PROTECTED:
 481             if (core->use_large_pages == 1) {
 482                 pg_start = PAGE_ADDR_4MB(page_addr);
 483                 pg_end = (pg_start + PAGE_SIZE_4MB);
 484
 485                 reg = get_overlapping_region(core->vm_info, core->vcpu_id, pg_start, pg_end);
 486
 487                 if ((reg) && ((reg->host_addr % PAGE_SIZE_4MB) == 0)) {
 488                     page_size = PAGE_SIZE_4MB;
 489                 }
 490             }
 491             break;
 492         case PROTECTED_PAE:
 493             if (core->use_large_pages == 1) {
 494                 pg_start = PAGE_ADDR_2MB(page_addr);
 495                 pg_end = (pg_start + PAGE_SIZE_2MB);
 496
 497                 reg = get_overlapping_region(core->vm_info, core->vcpu_id, pg_start, pg_end);
 498
 499                 if ((reg) && ((reg->host_addr % PAGE_SIZE_2MB) == 0)) {
 500                     page_size = PAGE_SIZE_2MB;
 501                 }
 502             }
 503             break;
 504         case LONG:
 505         case LONG_32_COMPAT:
 506         case LONG_16_COMPAT:
 507             if (core->use_giant_pages == 1) {
 508                 pg_start = PAGE_ADDR_1GB(page_addr);
 509                 pg_end = (pg_start + PAGE_SIZE_1GB);
 510
 511                 reg = get_overlapping_region(core->vm_info, core->vcpu_id, pg_start, pg_end);
 512
 513                 if ((reg) && ((reg->host_addr % PAGE_SIZE_1GB) == 0)) {
 514                     page_size = PAGE_SIZE_1GB;
 515                     break;
 516                 }
 517             }
 518
 519             if (core->use_large_pages == 1) {
 520                 pg_start = PAGE_ADDR_2MB(page_addr);
 521                 pg_end = (pg_start + PAGE_SIZE_2MB);
 522
 523                 reg = get_overlapping_region(core->vm_info, core->vcpu_id, pg_start, pg_end);
 524
 525                 if ((reg) && ((reg->host_addr % PAGE_SIZE_2MB) == 0)) {
 526                     page_size = PAGE_SIZE_2MB;
 527                 }
 528             }
 529             break;
 530         default:
 531             PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
 532             return -1;
 533     }
 534
 535     return page_size;
 536 }
 537
 538
 539
 540 void v3_print_mem_map(struct v3_vm_info * vm) {
 541     struct rb_node * node = v3_rb_first(&(vm->mem_map.mem_regions));
 542     struct v3_mem_region * reg = &(vm->mem_map.base_region);
 543     int i = 0;
 544
 545     V3_Print("Memory Layout (all cores):\n");
 546
 547
 548     V3_Print("Base Region (all cores):  0x%p - 0x%p -> 0x%p\n",
 549                (void *)(reg->guest_start),
 550                (void *)(reg->guest_end - 1),
 551                (void *)(reg->host_addr));
 552
 553
 554     // If the memory map is empty, don't print it
 555     if (node == NULL) {
 556         return;
 557     }
 558
 559     do {
 560         reg = rb_entry(node, struct v3_mem_region, tree_node);
 561
 562         V3_Print("%d:  0x%p - 0x%p -> 0x%p\n", i,
 563                    (void *)(reg->guest_start),
 564                    (void *)(reg->guest_end - 1),
 565                    (void *)(reg->host_addr));
 566
 567         V3_Print("\t(flags=0x%x) (core=0x%x) (unhandled = 0x%p)\n",
 568                  reg->flags.value,
 569                  reg->core_id,
 570                  reg->unhandled);
 571
 572         i++;
 573     } while ((node = v3_rb_next(node)));
 574 }
 575