palacios/src/palacios/vmm_mem.c

   1 /*
   2  * This file is part of the Palacios Virtual Machine Monitor developed
   3  * by the V3VEE Project with funding from the United States National
   4  * Science Foundation and the Department of Energy.
   5  *
   6  * The V3VEE Project is a joint project between Northwestern University
   7  * and the University of New Mexico.  You can find out more at
   8  * http://www.v3vee.org
   9  *
  10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
  11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
  12  * All rights reserved.
  13  *
  14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
  15  *
  16  * This is free software.  You are permitted to use,
  17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  18  */
  19
  20 #include <palacios/vmm_mem.h>
  21 #include <palacios/vmm.h>
  22 #include <palacios/vmm_util.h>
  23 #include <palacios/vmm_emulator.h>
  24 #include <palacios/vm_guest.h>
  25
  26 #include <palacios/vmm_shadow_paging.h>
  27 #include <palacios/vmm_direct_paging.h>
  28
  29
  30
  31
  32 static int mem_offset_hypercall(struct guest_info * info, uint_t hcall_id, void * private_data) {
  33     PrintDebug("V3Vee: Memory offset hypercall (offset=%p)\n",
  34                (void *)(info->vm_info->mem_map.base_region.host_addr));
  35
  36     info->vm_regs.rbx = info->vm_info->mem_map.base_region.host_addr;
  37
  38     return 0;
  39 }
  40
  41 static int unhandled_err(struct guest_info * core, addr_t guest_va, addr_t guest_pa,
  42                          struct v3_mem_region * reg, pf_error_t access_info) {
  43
  44     PrintError("Unhandled memory access error\n");
  45
  46     v3_print_mem_map(core->vm_info);
  47
  48     v3_print_guest_state(core);
  49
  50     return -1;
  51 }
  52
  53 int v3_init_mem_map(struct v3_vm_info * vm) {
  54     struct v3_mem_map * map = &(vm->mem_map);
  55     addr_t mem_pages = vm->mem_size >> 12;
  56
  57     memset(&(map->base_region), 0, sizeof(struct v3_mem_region));
  58
  59     map->mem_regions.rb_node = NULL;
  60
  61     // There is an underlying region that contains all of the guest memory
  62     // PrintDebug("Mapping %d pages of memory (%u bytes)\n", (int)mem_pages, (uint_t)info->mem_size);
  63
  64     // 2MB page alignment needed for 2MB hardware nested paging
  65     map->base_region.guest_start = 0;
  66     map->base_region.guest_end = mem_pages * PAGE_SIZE_4KB;
  67
  68 #ifdef CONFIG_ALIGNED_PG_ALLOC
  69     map->base_region.host_addr = (addr_t)V3_AllocAlignedPages(mem_pages, vm->mem_align);
  70 #else
  71     map->base_region.host_addr = (addr_t)V3_AllocPages(mem_pages);
  72 #endif
  73
  74     map->base_region.flags.read = 1;
  75     map->base_region.flags.write = 1;
  76     map->base_region.flags.exec = 1;
  77     map->base_region.flags.base = 1;
  78     map->base_region.flags.alloced = 1;
  79
  80     map->base_region.unhandled = unhandled_err;
  81
  82     if ((void *)map->base_region.host_addr == NULL) {
  83         PrintError("Could not allocate Guest memory\n");
  84         return -1;
  85     }
  86
  87     //memset(V3_VAddr((void *)map->base_region.host_addr), 0xffffffff, map->base_region.guest_end);
  88
  89     v3_register_hypercall(vm, MEM_OFFSET_HCALL, mem_offset_hypercall, NULL);
  90
  91     return 0;
  92 }
  93
  94
  95 void v3_delete_mem_map(struct v3_vm_info * vm) {
  96     struct rb_node * node = v3_rb_first(&(vm->mem_map.mem_regions));
  97     struct v3_mem_region * reg;
  98     struct rb_node * tmp_node = NULL;
  99     addr_t mem_pages = vm->mem_size >> 12;
 100
 101     while (node) {
 102         reg = rb_entry(node, struct v3_mem_region, tree_node);
 103         tmp_node = node;
 104         node = v3_rb_next(node);
 105
 106         v3_delete_mem_region(vm, reg);
 107     }
 108
 109     V3_FreePages((void *)(vm->mem_map.base_region.host_addr), mem_pages);
 110 }
 111
 112
 113 struct v3_mem_region * v3_create_mem_region(struct v3_vm_info * vm, uint16_t core_id,
 114                                                addr_t guest_addr_start, addr_t guest_addr_end) {
 115
 116     struct v3_mem_region * entry = (struct v3_mem_region *)V3_Malloc(sizeof(struct v3_mem_region));
 117     memset(entry, 0, sizeof(struct v3_mem_region));
 118
 119     entry->guest_start = guest_addr_start;
 120     entry->guest_end = guest_addr_end;
 121     entry->core_id = core_id;
 122     entry->unhandled = unhandled_err;
 123
 124     return entry;
 125 }
 126
 127
 128
 129
 130 int v3_add_shadow_mem( struct v3_vm_info * vm, uint16_t core_id,
 131                        addr_t               guest_addr_start,
 132                        addr_t               guest_addr_end,
 133                        addr_t               host_addr)
 134 {
 135     struct v3_mem_region * entry = NULL;
 136
 137     entry = v3_create_mem_region(vm, core_id,
 138                                  guest_addr_start,
 139                                  guest_addr_end);
 140
 141     entry->host_addr = host_addr;
 142
 143     entry->flags.read = 1;
 144     entry->flags.write = 1;
 145     entry->flags.exec = 1;
 146     entry->flags.alloced = 1;
 147
 148     if (v3_insert_mem_region(vm, entry) == -1) {
 149         V3_Free(entry);
 150         return -1;
 151     }
 152
 153     return 0;
 154 }
 155
 156
 157
 158 static inline
 159 struct v3_mem_region * __insert_mem_region(struct v3_vm_info * vm,
 160                                            struct v3_mem_region * region) {
 161     struct rb_node ** p = &(vm->mem_map.mem_regions.rb_node);
 162     struct rb_node * parent = NULL;
 163     struct v3_mem_region * tmp_region;
 164
 165     while (*p) {
 166         parent = *p;
 167         tmp_region = rb_entry(parent, struct v3_mem_region, tree_node);
 168
 169         if (region->guest_end <= tmp_region->guest_start) {
 170             p = &(*p)->rb_left;
 171         } else if (region->guest_start >= tmp_region->guest_end) {
 172             p = &(*p)->rb_right;
 173         } else {
 174             if ((region->guest_end != tmp_region->guest_end) ||
 175                 (region->guest_start != tmp_region->guest_start)) {
 176                 PrintError("Trying to map a partial overlapped core specific page...\n");
 177                 return tmp_region; // This is ugly...
 178             } else if (region->core_id == tmp_region->core_id) {
 179                 return tmp_region;
 180             } else if (region->core_id < tmp_region->core_id) {
 181                 p = &(*p)->rb_left;
 182             } else {
 183                 p = &(*p)->rb_right;
 184             }
 185         }
 186     }
 187
 188     rb_link_node(&(region->tree_node), parent, p);
 189
 190     return NULL;
 191 }
 192
 193
 194
 195 int v3_insert_mem_region(struct v3_vm_info * vm, struct v3_mem_region * region) {
 196     struct v3_mem_region * ret;
 197     int i = 0;
 198
 199     if ((ret = __insert_mem_region(vm, region))) {
 200         return -1;
 201     }
 202
 203     v3_rb_insert_color(&(region->tree_node), &(vm->mem_map.mem_regions));
 204
 205
 206
 207     for (i = 0; i < vm->num_cores; i++) {
 208         struct guest_info * info = &(vm->cores[i]);
 209
 210         // flush virtual page tables
 211         // 3 cases shadow, shadow passthrough, and nested
 212
 213         if (info->shdw_pg_mode == SHADOW_PAGING) {
 214             v3_mem_mode_t mem_mode = v3_get_vm_mem_mode(info);
 215
 216             if (mem_mode == PHYSICAL_MEM) {
 217                 addr_t cur_addr;
 218
 219                 for (cur_addr = region->guest_start;
 220                      cur_addr < region->guest_end;
 221                      cur_addr += PAGE_SIZE_4KB) {
 222                     v3_invalidate_passthrough_addr(info, cur_addr);
 223                 }
 224             } else {
 225                 v3_invalidate_shadow_pts(info);
 226             }
 227
 228         } else if (info->shdw_pg_mode == NESTED_PAGING) {
 229             addr_t cur_addr;
 230
 231             for (cur_addr = region->guest_start;
 232                  cur_addr < region->guest_end;
 233                  cur_addr += PAGE_SIZE_4KB) {
 234
 235                 v3_invalidate_nested_addr(info, cur_addr);
 236             }
 237         }
 238     }
 239
 240     return 0;
 241 }
 242
 243
 244
 245
 246 struct v3_mem_region * v3_get_mem_region(struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr) {
 247     struct rb_node * n = vm->mem_map.mem_regions.rb_node;
 248     struct v3_mem_region * reg = NULL;
 249
 250     while (n) {
 251
 252         reg = rb_entry(n, struct v3_mem_region, tree_node);
 253
 254         if (guest_addr < reg->guest_start) {
 255             n = n->rb_left;
 256         } else if (guest_addr >= reg->guest_end) {
 257             n = n->rb_right;
 258         } else {
 259             if (reg->core_id == V3_MEM_CORE_ANY) {
 260                 // found relevant region, it's available on all cores
 261                 return reg;
 262             } else if (core_id == reg->core_id) {
 263                 // found relevant region, it's available on the indicated core
 264                 return reg;
 265             } else if (core_id < reg->core_id) {
 266                 // go left, core too big
 267                 n = n->rb_left;
 268             } else if (core_id > reg->core_id) {
 269                 // go right, core too small
 270                 n = n->rb_right;
 271             } else {
 272                 PrintDebug("v3_get_mem_region: Impossible!\n");
 273                 return NULL;
 274             }
 275         }
 276     }
 277
 278
 279     // There is not registered region, so we check if its a valid address in the base region
 280
 281     if (guest_addr > vm->mem_map.base_region.guest_end) {
 282         PrintError("Guest Address Exceeds Base Memory Size (ga=0x%p), (limit=0x%p) (core=0x%x)\n",
 283                    (void *)guest_addr, (void *)vm->mem_map.base_region.guest_end, core_id);
 284         v3_print_mem_map(vm);
 285
 286         return NULL;
 287     }
 288
 289     return &(vm->mem_map.base_region);
 290 }
 291
 292
 293
 294 /* This returns the next memory region based on a given address.
 295  * If the address falls inside a sub region, that region is returned.
 296  * If the address falls outside a sub region, the next sub region is returned
 297  * NOTE that we have to be careful about core_ids here...
 298  */
 299 static struct v3_mem_region * get_next_mem_region( struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr) {
 300     struct rb_node * n = vm->mem_map.mem_regions.rb_node;
 301     struct v3_mem_region * reg = NULL;
 302     struct v3_mem_region * parent = NULL;
 303
 304     while (n) {
 305
 306         reg = rb_entry(n, struct v3_mem_region, tree_node);
 307
 308         if (guest_addr < reg->guest_start) {
 309             n = n->rb_left;
 310         } else if (guest_addr >= reg->guest_end) {
 311             n = n->rb_right;
 312         } else {
 313             if (reg->core_id == V3_MEM_CORE_ANY) {
 314                 // found relevant region, it's available on all cores
 315                 return reg;
 316             } else if (core_id == reg->core_id) {
 317                 // found relevant region, it's available on the indicated core
 318                 return reg;
 319             } else if (core_id < reg->core_id) {
 320                 // go left, core too big
 321                 n = n->rb_left;
 322             } else if (core_id > reg->core_id) {
 323                 // go right, core too small
 324                 n = n->rb_right;
 325             } else {
 326                 PrintError("v3_get_mem_region: Impossible!\n");
 327                 return NULL;
 328             }
 329         }
 330
 331         if ((reg->core_id == core_id) || (reg->core_id == V3_MEM_CORE_ANY)) {
 332             parent = reg;
 333         }
 334     }
 335
 336
 337     if (parent->guest_start > guest_addr) {
 338         return parent;
 339     } else if (parent->guest_end < guest_addr) {
 340         struct rb_node * node = &(parent->tree_node);
 341
 342         while ((node = v3_rb_next(node)) != NULL) {
 343             struct v3_mem_region * next_reg = rb_entry(node, struct v3_mem_region, tree_node);
 344
 345             if ((next_reg->core_id == V3_MEM_CORE_ANY) ||
 346                 (next_reg->core_id == core_id)) {
 347
 348                 // This check is not strictly necessary, but it makes it clearer
 349                 if (next_reg->guest_start > guest_addr) {
 350                     return next_reg;
 351                 }
 352             }
 353         }
 354     }
 355
 356     return NULL;
 357 }
 358
 359
 360
 361
 362 /* Given an address region of memory, find if there are any regions that overlap with it.
 363  * This checks that the range lies in a single region, and returns that region if it does,
 364  * this can be either the base region or a sub region.
 365  * IF there are multiple regions in the range then it returns NULL
 366  */
 367 static struct v3_mem_region * get_overlapping_region(struct v3_vm_info * vm, uint16_t core_id,
 368                                                      addr_t start_gpa, addr_t end_gpa) {
 369     struct v3_mem_region * start_region = v3_get_mem_region(vm, core_id, start_gpa);
 370
 371     if (start_region == NULL) {
 372         PrintError("Invalid memory region\n");
 373         return NULL;
 374     }
 375
 376
 377     if (start_region->guest_end < end_gpa) {
 378         // Region ends before range
 379         return NULL;
 380     } else if (start_region->flags.base == 0) {
 381         // sub region overlaps range
 382         return start_region;
 383     } else {
 384         // Base region, now we have to scan forward for the next sub region
 385         struct v3_mem_region * next_reg = get_next_mem_region(vm, core_id, start_gpa);
 386
 387         if (next_reg == NULL) {
 388             // no sub regions after start_addr, base region is ok
 389             return start_region;
 390         } else if (next_reg->guest_start >= end_gpa) {
 391             // Next sub region begins outside range
 392             return start_region;
 393         } else {
 394             return NULL;
 395         }
 396     }
 397
 398
 399     // Should never get here
 400     return NULL;
 401 }
 402
 403
 404
 405
 406
 407 void v3_delete_mem_region(struct v3_vm_info * vm, struct v3_mem_region * reg) {
 408     int i = 0;
 409
 410     if (reg == NULL) {
 411         return;
 412     }
 413
 414
 415     v3_rb_erase(&(reg->tree_node), &(vm->mem_map.mem_regions));
 416
 417     V3_Free(reg);
 418
 419
 420     // If the guest isn't running then there shouldn't be anything to invalidate.
 421     // Page tables should __always__ be created on demand during execution
 422     // NOTE: This is a sanity check, and can be removed if that assumption changes
 423     if (vm->run_state != VM_RUNNING) {
 424         return;
 425     }
 426
 427     for (i = 0; i < vm->num_cores; i++) {
 428         struct guest_info * info = &(vm->cores[i]);
 429
 430         // flush virtual page tables
 431         // 3 cases shadow, shadow passthrough, and nested
 432
 433         if (info->shdw_pg_mode == SHADOW_PAGING) {
 434             v3_mem_mode_t mem_mode = v3_get_vm_mem_mode(info);
 435
 436             if (mem_mode == PHYSICAL_MEM) {
 437                 addr_t cur_addr;
 438
 439                 for (cur_addr = reg->guest_start;
 440                      cur_addr < reg->guest_end;
 441                      cur_addr += PAGE_SIZE_4KB) {
 442                     v3_invalidate_passthrough_addr(info, cur_addr);
 443                 }
 444             } else {
 445                 v3_invalidate_shadow_pts(info);
 446             }
 447
 448         } else if (info->shdw_pg_mode == NESTED_PAGING) {
 449             addr_t cur_addr;
 450
 451             for (cur_addr = reg->guest_start;
 452                  cur_addr < reg->guest_end;
 453                  cur_addr += PAGE_SIZE_4KB) {
 454
 455                 v3_invalidate_nested_addr(info, cur_addr);
 456             }
 457         }
 458     }
 459
 460     // flush virtual page tables
 461     // 3 cases shadow, shadow passthrough, and nested
 462
 463 }
 464
 465 // Determine if a given address can be handled by a large page of the requested size
 466 uint32_t v3_get_max_page_size(struct guest_info * core, addr_t page_addr, v3_cpu_mode_t mode) {
 467     addr_t pg_start = 0;
 468     addr_t pg_end = 0;
 469     uint32_t page_size = PAGE_SIZE_4KB;
 470     struct v3_mem_region * reg = NULL;
 471
 472     switch (mode) {
 473         case PROTECTED:
 474             if (core->use_large_pages == 1) {
 475                 pg_start = PAGE_ADDR_4MB(page_addr);
 476                 pg_end = (pg_start + PAGE_SIZE_4MB);
 477
 478                 reg = get_overlapping_region(core->vm_info, core->cpu_id, pg_start, pg_end);
 479
 480                 if ((reg) && ((reg->host_addr % PAGE_SIZE_4MB) == 0)) {
 481                     page_size = PAGE_SIZE_4MB;
 482                 }
 483             }
 484             break;
 485         case PROTECTED_PAE:
 486             if (core->use_large_pages == 1) {
 487                 pg_start = PAGE_ADDR_2MB(page_addr);
 488                 pg_end = (pg_start + PAGE_SIZE_2MB);
 489
 490                 reg = get_overlapping_region(core->vm_info, core->cpu_id, pg_start, pg_end);
 491
 492                 if ((reg) && ((reg->host_addr % PAGE_SIZE_2MB) == 0)) {
 493                     page_size = PAGE_SIZE_2MB;
 494                 }
 495             }
 496             break;
 497         case LONG:
 498         case LONG_32_COMPAT:
 499         case LONG_16_COMPAT:
 500             if (core->use_giant_pages == 1) {
 501                 pg_start = PAGE_ADDR_1GB(page_addr);
 502                 pg_end = (pg_start + PAGE_SIZE_1GB);
 503
 504                 reg = get_overlapping_region(core->vm_info, core->cpu_id, pg_start, pg_end);
 505
 506                 if ((reg) && ((reg->host_addr % PAGE_SIZE_1GB) == 0)) {
 507                     page_size = PAGE_SIZE_1GB;
 508                     break;
 509                 }
 510             }
 511
 512             if (core->use_large_pages == 1) {
 513                 pg_start = PAGE_ADDR_2MB(page_addr);
 514                 pg_end = (pg_start + PAGE_SIZE_2MB);
 515
 516                 reg = get_overlapping_region(core->vm_info, core->cpu_id, pg_start, pg_end);
 517
 518                 if ((reg) && ((reg->host_addr % PAGE_SIZE_2MB) == 0)) {
 519                     page_size = PAGE_SIZE_2MB;
 520                 }
 521             }
 522             break;
 523         default:
 524             PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
 525             return -1;
 526     }
 527
 528     return page_size;
 529 }
 530
 531
 532
 533 void v3_print_mem_map(struct v3_vm_info * vm) {
 534     struct rb_node * node = v3_rb_first(&(vm->mem_map.mem_regions));
 535     struct v3_mem_region * reg = &(vm->mem_map.base_region);
 536     int i = 0;
 537
 538     V3_Print("Memory Layout (all cores):\n");
 539
 540
 541     V3_Print("Base Region (all cores):  0x%p - 0x%p -> 0x%p\n",
 542                (void *)(reg->guest_start),
 543                (void *)(reg->guest_end - 1),
 544                (void *)(reg->host_addr));
 545
 546
 547     // If the memory map is empty, don't print it
 548     if (node == NULL) {
 549         return;
 550     }
 551
 552     do {
 553         reg = rb_entry(node, struct v3_mem_region, tree_node);
 554
 555         V3_Print("%d:  0x%p - 0x%p -> 0x%p\n", i,
 556                    (void *)(reg->guest_start),
 557                    (void *)(reg->guest_end - 1),
 558                    (void *)(reg->host_addr));
 559
 560         V3_Print("\t(flags=0x%x) (core=0x%x) (unhandled = 0x%p)\n",
 561                  reg->flags.value,
 562                  reg->core_id,
 563                  reg->unhandled);
 564
 565         i++;
 566     } while ((node = v3_rb_next(node)));
 567 }
 568