2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2015, The V3VEE Project <http://www.v3vee.org>
11 * All rights reserved.
13 * Author: Peter Dinda <pdinda@northwestern.edu>
15 * This is free software. You are permitted to use,
16 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
19 #include <palacios/vmm_mem.h>
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_util.h>
22 #include <palacios/vmm_emulator.h>
23 #include <palacios/vm_guest.h>
24 #include <palacios/vmm_debug.h>
25 #include <palacios/vmm_hypercall.h>
27 #include <palacios/vmm_xml.h>
29 #include <palacios/vm_guest_mem.h>
31 #include <palacios/vmm_debug.h>
39 <file id="multibootelf" filename="multibootelf.o" />
42 <multiboot enable="y" file_id="multibootelf" />
47 #ifndef V3_CONFIG_DEBUG_MULTIBOOT
49 #define PrintDebug(fmt, args...)
53 int v3_init_multiboot()
55 PrintDebug(VM_NONE,VCORE_NONE, "multiboot: init\n");
59 int v3_deinit_multiboot()
61 PrintDebug(VM_NONE,VCORE_NONE, "multiboot: deinit\n");
67 #define CEIL_DIV(x,y) (((x)/(y)) + !!((x)%(y)))
69 int v3_init_multiboot_vm(struct v3_vm_info *vm, struct v3_xml *config)
71 v3_cfg_tree_t *mb_config;
75 PrintDebug(vm, VCORE_NONE, "multiboot: vm init\n");
77 memset(&vm->mb_state,0,sizeof(struct v3_vm_multiboot));
78 vm->mb_state.is_multiboot=0;
80 if (!config || !(mb_config=v3_cfg_subtree(config,"multiboot"))) {
81 PrintDebug(vm,VCORE_NONE,"multiboot: no multiboot configuration found - normal boot will occur\n");
85 if (!(enable=v3_cfg_val(mb_config,"enable")) || strcasecmp(enable,"y")) {
86 PrintDebug(vm,VCORE_NONE,"multiboot: multiboot configuration disabled\n");
90 if (!(mb_file_id=v3_cfg_val(mb_config,"file_id"))) {
91 PrintError(vm,VCORE_NONE,"multiboot: multiboot block without file_id...\n");
95 vm->mb_state.mb_file = v3_cfg_get_file(vm,mb_file_id);
97 if (!vm->mb_state.mb_file) {
98 PrintError(vm,VCORE_NONE,"multiboot: multiboot block contains bad file_id (%s)\n",mb_file_id);
102 vm->mb_state.is_multiboot=1;
105 if (vm->mb_state.is_multiboot) {
106 V3_Print(vm,VCORE_NONE,"multiboot: file_id=%s (tag %s)]\n",
108 vm->mb_state.mb_file->tag);
110 V3_Print(vm,VCORE_NONE,"multiboot: This is not a multiboot VM\n");
117 int v3_deinit_multiboot_vm(struct v3_vm_info *vm)
119 PrintDebug(vm, VCORE_NONE, "multiboot: multiboot VM deinit\n");
124 int v3_init_multiboot_core(struct guest_info *core)
126 PrintDebug(core->vm_info, VCORE_NONE, "multiboot: multiboot core init\n");
128 // Nothing to do at this point
133 int v3_deinit_multiboot_core(struct guest_info *core)
135 PrintDebug(core->vm_info, VCORE_NONE, "multiboot: multiboot core deinit\n");
143 #define ERROR(fmt, args...) PrintError(VM_NONE,VCORE_NONE,"multiboot: " fmt,##args)
144 #define INFO(fmt, args...) PrintDebug(VM_NONE,VCORE_NONE,"multiboot: " fmt,##args)
148 /******************************************************************
149 Data contained in the ELF file we will attempt to boot
150 ******************************************************************/
152 #define ELF_MAGIC 0x464c457f
153 #define MB2_MAGIC 0xe85250d6
156 /******************************************************************
157 Data we will pass to the kernel via rbx
158 ******************************************************************/
160 #define MB2_INFO_MAGIC 0x36d76289
162 typedef struct mb_info_header {
165 } __attribute__((packed)) mb_info_header_t;
167 // A tag of type 0, size 8 indicates last value
169 typedef struct mb_info_tag {
172 } __attribute__((packed)) mb_info_tag_t;
175 #define MB_INFO_MEM_TAG 4
176 typedef struct mb_info_mem {
178 uint32_t mem_lower; // 0..640K in KB
179 uint32_t mem_upper; // in KB to first hole - 1 MB
180 } __attribute__((packed)) mb_info_mem_t;
182 #define MB_INFO_CMDLINE_TAG 1
183 // note alignment of 8 bytes required for each...
184 typedef struct mb_info_cmdline {
186 uint32_t size; // includes zero termination
187 uint8_t string[]; // zero terminated
188 } __attribute__((packed)) mb_info_cmdline_t;
195 typedef struct mb_info_memmap_entry {
200 } __attribute__((packed)) mb_info_memmap_entry_t;
202 #define MB_INFO_MEMMAP_TAG 6
203 // note alignment of 8 bytes required for each...
204 typedef struct mb_info_memmap {
206 uint32_t entry_size; // multiple of 8
207 uint32_t entry_version; // 0
208 mb_info_memmap_entry_t entries[];
209 } __attribute__((packed)) mb_info_memmap_t;
211 #define MB_INFO_HRT_TAG 0xf00df00d
212 typedef struct mb_info_hrt {
214 // apic ids are 0..num_apics-1
215 // apic and ioapic addresses are the well known places
216 uint32_t total_num_apics;
217 uint32_t first_hrt_apic_id;
218 uint32_t have_hrt_ioapic;
219 uint32_t first_hrt_ioapic_entry;
220 uint64_t first_hrt_addr;
221 } __attribute__((packed)) mb_info_hrt_t;
229 // - Boot Loader name
232 // - Framebuffer info
235 static int is_elf(uint8_t *data, uint64_t size)
237 if (*((uint32_t*)data)==ELF_MAGIC) {
244 static mb_header_t *find_mb_header(uint8_t *data, uint64_t size)
246 uint64_t limit = size > 32768 ? 32768 : size;
249 // Scan for the .boot magic cookie
250 // must be in first 32K, assume 4 byte aligned
251 for (i=0;i<limit;i+=4) {
252 if (*((uint32_t*)&data[i])==MB2_MAGIC) {
253 INFO("Found multiboot header at offset 0x%llx\n",i);
254 return (mb_header_t *) &data[i];
260 static int checksum4_ok(uint32_t *data, uint64_t size)
265 for (i=0;i<size;i++) {
272 static int parse_multiboot_kernel(uint8_t *data, uint64_t size, mb_data_t *mb)
276 mb_header_t *mb_header=0;
279 mb_addr_t *mb_addr=0;
280 mb_entry_t *mb_entry=0;
281 mb_flags_t *mb_flags=0;
282 mb_framebuf_t *mb_framebuf=0;
283 mb_modalign_t *mb_modalign=0;
284 mb_mb64_hrt_t *mb_mb64_hrt=0;
287 if (!is_elf(data,size)) {
288 ERROR("HRT is not an ELF\n");
292 mb_header = find_mb_header(data,size);
295 ERROR("No multiboot header found\n");
299 // Checksum applies only to the header itself, not to
300 // the subsequent tags...
301 if (!checksum4_ok((uint32_t*)mb_header,4)) {
302 ERROR("Multiboot header has bad checksum\n");
306 INFO("Multiboot header: arch=0x%x, headerlen=0x%x\n", mb_header->arch, mb_header->headerlen);
308 mb_tag = (mb_tag_t*)((void*)mb_header+16);
310 while (!(mb_tag->type==0 && mb_tag->size==8)) {
311 INFO("tag: type 0x%x flags=0x%x size=0x%x\n",mb_tag->type, mb_tag->flags,mb_tag->size);
312 switch (mb_tag->type) {
315 ERROR("Multiple info tags found!\n");
318 mb_inf = (mb_info_t*)mb_tag;
319 INFO(" info request - types follow\n");
320 for (i=0;(mb_tag->size-8)/4;i++) {
321 INFO(" %llu: type 0x%x\n", i, mb_inf->types[i]);
326 case MB_TAG_ADDRESS: {
328 ERROR("Multiple address tags found!\n");
331 mb_addr = (mb_addr_t*)mb_tag;
333 INFO(" header_addr = 0x%x\n", mb_addr->header_addr);
334 INFO(" load_addr = 0x%x\n", mb_addr->load_addr);
335 INFO(" load_end_addr = 0x%x\n", mb_addr->load_end_addr);
336 INFO(" bss_end_addr = 0x%x\n", mb_addr->bss_end_addr);
342 ERROR("Multiple entry tags found!\n");
345 mb_entry=(mb_entry_t*)mb_tag;
347 INFO(" entry_addr = 0x%x\n", mb_entry->entry_addr);
353 ERROR("Multiple flags tags found!\n");
356 mb_flags = (mb_flags_t*)mb_tag;
358 INFO(" console_flags = 0x%x\n", mb_flags->console_flags);
362 case MB_TAG_FRAMEBUF: {
364 ERROR("Multiple framebuf tags found!\n");
367 mb_framebuf = (mb_framebuf_t*)mb_tag;
369 INFO(" width = 0x%x\n", mb_framebuf->width);
370 INFO(" height = 0x%x\n", mb_framebuf->height);
371 INFO(" depth = 0x%x\n", mb_framebuf->depth);
375 case MB_TAG_MODALIGN: {
377 ERROR("Multiple modalign tags found!\n");
380 mb_modalign = (mb_modalign_t*)mb_tag;
382 INFO(" size = 0x%x\n", mb_modalign->size);
386 case MB_TAG_MB64_HRT: {
388 ERROR("Multiple mb64_hrt tags found!\n");
391 mb_mb64_hrt = (mb_mb64_hrt_t*)mb_tag;
398 INFO("Unknown tag... Skipping...\n");
401 mb_tag = (mb_tag_t *)(((void*)mb_tag) + mb_tag->size);
404 // copy out to caller
405 mb->header=mb_header;
410 mb->framebuf=mb_framebuf;
411 mb->modalign=mb_modalign;
412 mb->mb64_hrt=mb_mb64_hrt;
418 int v3_parse_multiboot_header(struct v3_cfg_file *file, mb_data_t *result)
420 return parse_multiboot_kernel(file->data,file->size,result);
424 #define APIC_BASE 0xfee00000
425 #define IOAPIC_BASE 0xfec00000
429 MB_HRT (if this is an HVM
434 1024..ioapic_base RAM
435 ioapic_base to ioapic_base+page reserved
436 ioapic_base+page to apic_base ram
437 apic_base oto apic_base+page reserved
438 apic_base+page to total RAM
441 The multiboot structure that is written reflects the
442 perspective of the core given the kind of VM it is part of.
445 - core does not matter
450 - only ROS memory visible
451 - regular multiboot or bios boot assumed
454 - HRT64 multiboot assumed
458 uint64_t v3_build_multiboot_table(struct guest_info *core, uint8_t *dest, uint64_t size)
460 struct v3_vm_info *vm = core->vm_info;
461 mb_info_header_t *header=0;
463 mb_info_hrt_t *hrt=0;
465 mb_info_mem_t *mem=0;
466 mb_info_memmap_t *memmap=0;
467 mb_info_tag_t *tag=0;
468 uint64_t num_mem=0, cur_mem=0;
470 uint64_t total_mem = vm->mem_size;
473 if (vm->hvm_state.is_hvm) {
474 if (v3_is_hvm_ros_core(core)) {
475 PrintDebug(core->vm_info,core,"multiboot: hvm: building mb table from ROS core perspective\n");
476 total_mem = v3_get_hvm_ros_memsize(vm);
478 PrintDebug(core->vm_info,core,"multiboot: hvm: building mb table from HRT core perspective\n");
479 total_mem = v3_get_hvm_hrt_memsize(vm);
484 // assume we have > 1 MB + apic+ioapic
486 if (total_mem>IOAPIC_BASE+PAGE_SIZE) {
489 if (total_mem>APIC_BASE+PAGE_SIZE) {
495 sizeof(mb_info_header_t) +
497 core->vm_info->hvm_state.is_hvm && core->hvm_state.is_hrt ? sizeof(mb_info_hrt_t) : 0
500 sizeof(mb_info_mem_t) +
501 sizeof(mb_info_memmap_t) +
502 sizeof(mb_info_memmap_entry_t) * num_mem +
503 sizeof(mb_info_tag_t);
512 ERROR("Cannot fit MB info in needed space\n");
518 header = (mb_info_header_t*)next;
519 next += sizeof(mb_info_header_t);
522 if (core->vm_info->hvm_state.is_hvm && v3_is_hvm_hrt_core(core)) {
523 hrt = (mb_info_hrt_t*)next;
524 next += sizeof(mb_info_hrt_t);
528 mem = (mb_info_mem_t*)next;
529 next += sizeof(mb_info_mem_t);
531 memmap = (mb_info_memmap_t*)next;
532 next += sizeof(mb_info_memmap_t) + num_mem * sizeof(mb_info_memmap_entry_t);
534 tag = (mb_info_tag_t*)next;
535 next += sizeof(mb_info_tag_t);
537 header->totalsize = (uint32_t)(next - dest);
538 header->reserved = 0;
541 if (core->vm_info->hvm_state.is_hvm && v3_is_hvm_hrt_core(core)) {
542 hrt->tag.type = MB_INFO_HRT_TAG;
543 hrt->tag.size = sizeof(mb_info_hrt_t);
544 hrt->total_num_apics = vm->num_cores;
545 hrt->first_hrt_apic_id = vm->hvm_state.first_hrt_core;
546 hrt->have_hrt_ioapic=0;
547 hrt->first_hrt_ioapic_entry=0;
548 hrt->first_hrt_addr = vm->hvm_state.first_hrt_gpa;
552 mem->tag.type = MB_INFO_MEM_TAG;
553 mem->tag.size = sizeof(mb_info_mem_t);
554 mem->mem_lower = 640; // thank you, bill gates
555 mem->mem_upper = (total_mem - 1024 * 1024) / 1024;
557 memmap->tag.type = MB_INFO_MEMMAP_TAG;
558 memmap->tag.size = sizeof(mb_info_memmap_t) + num_mem * sizeof(mb_info_memmap_entry_t);
559 memmap->entry_size = 24;
560 memmap->entry_version = 0;
565 memmap->entries[cur_mem].base_addr = 0;
566 memmap->entries[cur_mem].length = 640*1024;
567 memmap->entries[cur_mem].type = MEM_RAM;
568 memmap->entries[cur_mem].reserved = 0;
571 // legacy io (640K->1 MB)
572 memmap->entries[cur_mem].base_addr = 640*1024;
573 memmap->entries[cur_mem].length = 384*1024;
574 memmap->entries[cur_mem].type = MEM_RESV;
575 memmap->entries[cur_mem].reserved = 1;
578 // first meg to ioapic
579 memmap->entries[cur_mem].base_addr = 1024*1024;
580 memmap->entries[cur_mem].length = (total_mem < IOAPIC_BASE ? total_mem : IOAPIC_BASE) - 1024*1024;
581 memmap->entries[cur_mem].type = MEM_RAM;
582 memmap->entries[cur_mem].reserved = 0;
585 // ioapic reservation
586 memmap->entries[cur_mem].base_addr = IOAPIC_BASE;
587 memmap->entries[cur_mem].length = PAGE_SIZE;
588 memmap->entries[cur_mem].type = MEM_RESV;
589 memmap->entries[cur_mem].reserved = 1;
592 if (total_mem > (IOAPIC_BASE + PAGE_SIZE)) {
593 // memory between ioapic and apic
594 memmap->entries[cur_mem].base_addr = IOAPIC_BASE+PAGE_SIZE;
595 memmap->entries[cur_mem].length = (total_mem < APIC_BASE ? total_mem : APIC_BASE) - (IOAPIC_BASE+PAGE_SIZE);;
596 memmap->entries[cur_mem].type = MEM_RAM;
597 memmap->entries[cur_mem].reserved = 0;
602 memmap->entries[cur_mem].base_addr = APIC_BASE;
603 memmap->entries[cur_mem].length = PAGE_SIZE;
604 memmap->entries[cur_mem].type = MEM_RESV;
605 memmap->entries[cur_mem].reserved = 1;
608 if (total_mem > (APIC_BASE + PAGE_SIZE)) {
610 memmap->entries[cur_mem].base_addr = APIC_BASE+PAGE_SIZE;
611 memmap->entries[cur_mem].length = total_mem - (APIC_BASE+PAGE_SIZE);
612 memmap->entries[cur_mem].type = MEM_RAM;
613 memmap->entries[cur_mem].reserved = 0;
617 for (cur_mem=0;cur_mem<num_mem;cur_mem++) {
618 PrintDebug(vm, VCORE_NONE,
619 "multiboot: entry %llu: %p (%llx bytes) - type %x %s\n",
621 (void*) memmap->entries[cur_mem].base_addr,
622 memmap->entries[cur_mem].length,
623 memmap->entries[cur_mem].type,
624 memmap->entries[cur_mem].reserved ? "reserved" : "");
629 // This demarcates end of list
633 return header->totalsize;
638 int v3_write_multiboot_kernel(struct v3_vm_info *vm, mb_data_t *mb, struct v3_cfg_file *file,
639 void *base, uint64_t limit)
642 uint32_t header_offset = (uint32_t) ((uint64_t)(mb->header) - (uint64_t)(file->data));
645 if (!mb->addr || !mb->entry) {
646 PrintError(vm,VCORE_NONE, "multiboot: kernel is missing address or entry point\n");
650 if (((void*)(uint64_t)(mb->addr->header_addr) < base ) ||
651 ((void*)(uint64_t)(mb->addr->load_end_addr) > base+limit) ||
652 ((void*)(uint64_t)(mb->addr->bss_end_addr) > base+limit)) {
653 PrintError(vm,VCORE_NONE, "multiboot: kernel is not within the allowed portion of VM\n");
657 offset = header_offset - (mb->addr->header_addr - mb->addr->load_addr);
658 size = mb->addr->load_end_addr - mb->addr->load_addr;
660 if (size != file->size-offset) {
661 V3_Print(vm,VCORE_NONE,"multiboot: strange: size computed as %u, but file->size-offset = %llu\n",size,file->size-offset);
664 // We are trying to do as little ELF loading here as humanly possible
665 v3_write_gpa_memory(&vm->cores[0],
666 (addr_t)(mb->addr->load_addr),
670 PrintDebug(vm,VCORE_NONE,
671 "multiboot: wrote 0x%llx bytes starting at offset 0x%llx to %p\n",
674 (void*)(addr_t)(mb->addr->load_addr));
676 size = mb->addr->bss_end_addr - mb->addr->load_end_addr + 1;
678 // Now we need to zero the BSS
679 v3_set_gpa_memory(&vm->cores[0],
680 (addr_t)(mb->addr->load_end_addr),
684 PrintDebug(vm,VCORE_NONE,
685 "multiboot: zeroed 0x%llx bytes starting at %p\n",
687 (void*)(addr_t)(mb->addr->load_end_addr));
695 static int setup_multiboot_kernel(struct v3_vm_info *vm)
698 uint64_t limit = vm->mem_size;
701 if (vm->mb_state.mb_file->size > limit) {
702 PrintError(vm,VCORE_NONE,"multiboot: Cannot map kernel because it is too big (%llu bytes, but only have %llu space\n", vm->mb_state.mb_file->size, (uint64_t)limit);
706 if (!is_elf(vm->mb_state.mb_file->data,vm->mb_state.mb_file->size)) {
707 PrintError(vm,VCORE_NONE,"multiboot: supplied kernel is not an ELF\n");
710 if (find_mb_header(vm->mb_state.mb_file->data,vm->mb_state.mb_file->size)) {
711 PrintDebug(vm,VCORE_NONE,"multiboot: appears to be a multiboot kernel\n");
712 if (v3_parse_multiboot_header(vm->mb_state.mb_file,&vm->mb_state.mb_data)) {
713 PrintError(vm,VCORE_NONE,"multiboot: cannot parse multiboot kernel header\n");
716 if (v3_write_multiboot_kernel(vm, &(vm->mb_state.mb_data),vm->mb_state.mb_file,base,limit)) {
717 PrintError(vm,VCORE_NONE,"multiboot: multiboot kernel setup failed\n");
721 PrintError(vm,VCORE_NONE,"multiboot: multiboot kernel has no header\n");
730 // 32 bit GDT entries
732 // base24-31 flags2 limit16-19 access8 base16-23 base0-15 limit0-15
733 // null 0 0 0 0 0 0 0
734 // code 0 1100 f 10011010 0 0 ffff
735 // data 0 1100 f 10010010 0 0 ffff
737 // null = 00 00 00 00 00 00 00 00
738 // code = 00 cf 9a 00 00 00 ff ff
739 // data = 00 cf 92 00 00 00 ff ff
741 static uint64_t gdt32[3] = {
742 0x0000000000000000, /* null */
743 0x00cf9a000000ffff, /* code (note lme=0) */
744 0x00cf92000000ffff, /* data */
747 static void write_gdt(struct v3_vm_info *vm, void *base, uint64_t limit)
749 v3_write_gpa_memory(&vm->cores[0],(addr_t)base,limit,(uint8_t*) gdt32);
751 PrintDebug(vm,VCORE_NONE,"multiboot: wrote GDT at %p\n",base);
755 static void write_tss(struct v3_vm_info *vm, void *base, uint64_t limit)
757 v3_set_gpa_memory(&vm->cores[0],(addr_t)base,limit,0);
759 PrintDebug(vm,VCORE_NONE,"multiboot: wrote TSS at %p\n",base);
762 static void write_table(struct v3_vm_info *vm, void *base, uint64_t limit)
767 limit = limit < 256 ? limit : 256;
769 size = v3_build_multiboot_table(&vm->cores[0], buf, limit);
771 if (size>256 || size==0) {
772 PrintError(vm,VCORE_NONE,"multiboot: cannot build multiboot table\n");
776 v3_write_gpa_memory(&vm->cores[0],(addr_t)base,size,buf);
788 Kernel at its desired load address (or error)
793 int v3_setup_multiboot_vm_for_boot(struct v3_vm_info *vm)
795 void *kernel_start_gpa;
796 void *kernel_end_gpa;
801 if (!vm->mb_state.is_multiboot) {
802 PrintDebug(vm,VCORE_NONE,"multiboot: skipping multiboot setup for boot as this is not a multiboot VM\n");
807 if (setup_multiboot_kernel(vm)) {
808 PrintError(vm,VCORE_NONE,"multiboot: failed to setup kernel\n");
812 kernel_start_gpa = (void*) (uint64_t) (vm->mb_state.mb_data.addr->load_addr);
813 kernel_end_gpa = (void*) (uint64_t) (vm->mb_state.mb_data.addr->bss_end_addr);
815 // Is there room below the kernel?
816 if ((uint64_t)kernel_start_gpa > 19*4096 ) {
817 // at least 3 pages between 64K and start of kernel
819 mb_gpa=(void*)(16*4096);
821 // is there room above the kernel?
822 if ((uint64_t)kernel_end_gpa < vm->mem_size-4*4096) {
823 if (((uint64_t)kernel_end_gpa + 4 * 4096) <= 0xffffffff) {
824 mb_gpa=(void*) (4096*((uint64_t)kernel_end_gpa/4096 + 1));
826 PrintError(vm,VCORE_NONE,"multiboot: no room for mb data below 4 GB\n");
830 PrintError(vm,VCORE_NONE,"multiboot: no room for mb data above kernel\n");
835 PrintDebug(vm,VCORE_NONE,"multiboot: mb data will start at %p\n",mb_gpa);
837 vm->mb_state.mb_data_gpa=mb_gpa;
839 tss_gpa = mb_gpa + 1 * 4096;
840 gdt_gpa = mb_gpa + 2 * 4096;
842 write_table(vm,mb_gpa,4096);
844 write_tss(vm,tss_gpa,4096);
846 write_gdt(vm,gdt_gpa,4096);
848 PrintDebug(vm,VCORE_NONE,"multiboot: setup of memory done\n");
857 GDTR points to stub GDT
858 TR points to stub TSS
859 CR0 has PE and not PG
860 EIP is entry point to kernel
861 EBX points to multiboot info
862 EAX multiboot magic cookie
865 int v3_setup_multiboot_core_for_boot(struct guest_info *core)
870 if (!core->vm_info->mb_state.is_multiboot) {
871 PrintDebug(core->vm_info,core,"multiboot: skipping mb core setup as this is not an mb VM\n");
875 if (core->vcpu_id != 0) {
876 PrintDebug(core->vm_info,core,"multiboot: skipping mb core setup as this is not the BSP core\n");
881 PrintDebug(core->vm_info, core, "multiboot: setting up MB BSP core for boot\n");
884 memset(&core->vm_regs,0,sizeof(core->vm_regs));
885 memset(&core->ctrl_regs,0,sizeof(core->ctrl_regs));
886 memset(&core->dbg_regs,0,sizeof(core->dbg_regs));
887 memset(&core->segments,0,sizeof(core->segments));
888 memset(&core->msrs,0,sizeof(core->msrs));
889 memset(&core->fp_state,0,sizeof(core->fp_state));
891 // We need to be in protected mode at ring zero
892 core->cpl = 0; // we are going right into the kernel
893 core->cpu_mode = PROTECTED;
894 core->mem_mode = PHYSICAL_MEM;
895 // default run-state is fine, we are core zero
896 // core->core_run_state = CORE_RUNNING ;
898 // right into the kernel
899 core->rip = (uint64_t) core->vm_info->mb_state.mb_data.entry->entry_addr;
901 // Setup CRs for protected mode
902 // CR0: PE (but no PG)
903 core->ctrl_regs.cr0 = 0x1;
904 core->shdw_pg_state.guest_cr0 = core->ctrl_regs.cr0;
906 // CR2: don't care (output from #PF)
907 // CR3: don't care (no paging)
908 core->ctrl_regs.cr3 = 0;
909 core->shdw_pg_state.guest_cr3 = core->ctrl_regs.cr3;
912 core->ctrl_regs.cr4 = 0x0;
913 core->shdw_pg_state.guest_cr4 = core->ctrl_regs.cr4;
915 // RFLAGS zeroed is fine: come in with interrupts off
916 // EFER needs SVME and LME but not LMA (last 16 bits: 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0
917 core->ctrl_regs.efer = 0x1400;
918 core->shdw_pg_state.guest_efer.value = core->ctrl_regs.efer;
924 selector is 13 bits of index, 1 bit table indicator
927 index is scaled by 8, even in long mode, where some entries
928 are 16 bytes long....
929 -> code, data descriptors have 8 byte format
930 because base, limit, etc, are ignored (no segmentation)
931 -> interrupt/trap gates have 16 byte format
932 because offset needs to be 64 bits
935 // There is no IDTR set and interrupts are disabled
937 // Install our stub GDT
938 core->segments.gdtr.selector = 0;
939 core->segments.gdtr.base = (addr_t) core->vm_info->mb_state.mb_data_gpa+2*4096;
940 core->segments.gdtr.limit = 4096-1;
941 core->segments.gdtr.type = 0x6;
942 core->segments.gdtr.system = 1;
943 core->segments.gdtr.dpl = 0;
944 core->segments.gdtr.present = 1;
945 core->segments.gdtr.long_mode = 0;
948 core->segments.tr.selector = 0;
949 core->segments.tr.base = (addr_t) core->vm_info->mb_state.mb_data_gpa+1*4096;
950 core->segments.tr.limit = 4096-1;
951 core->segments.tr.type = 0x6;
952 core->segments.tr.system = 1;
953 core->segments.tr.dpl = 0;
954 core->segments.tr.present = 1;
955 core->segments.tr.long_mode = 0;
961 core->segments.cs.selector = 0x8 ; // entry 1 of GDT (RPL=0)
962 core->segments.cs.base = (addr_t) base;
963 core->segments.cs.limit = limit;
964 core->segments.cs.type = 0xa;
965 core->segments.cs.system = 1;
966 core->segments.cs.dpl = 0;
967 core->segments.cs.present = 1;
968 core->segments.cs.long_mode = 0;
969 core->segments.cs.db = 1; // 32 bit operand and address size
970 core->segments.cs.granularity = 1; // pages
972 // DS, SS, etc are identical
973 core->segments.ds.selector = 0x10; // entry 2 of GDT (RPL=0)
974 core->segments.ds.base = (addr_t) base;
975 core->segments.ds.limit = limit;
976 core->segments.ds.type = 0x2;
977 core->segments.ds.system = 1;
978 core->segments.ds.dpl = 0;
979 core->segments.ds.present = 1;
980 core->segments.ds.long_mode = 0;
981 core->segments.ds.db = 1; // 32 bit operand and address size
982 core->segments.ds.granularity = 1; // pages
984 memcpy(&core->segments.ss,&core->segments.ds,sizeof(core->segments.ds));
985 memcpy(&core->segments.es,&core->segments.ds,sizeof(core->segments.ds));
986 memcpy(&core->segments.fs,&core->segments.ds,sizeof(core->segments.ds));
987 memcpy(&core->segments.gs,&core->segments.ds,sizeof(core->segments.ds));
991 // Now for our magic - this signals
992 // the kernel that a multiboot loader loaded it
993 // and that rbx points to its offered data
994 core->vm_regs.rax = MB2_INFO_MAGIC;
996 core->vm_regs.rbx = (uint64_t) (core->vm_info->mb_state.mb_data_gpa);
998 // reset paging here for shadow...
1000 if (core->shdw_pg_mode != NESTED_PAGING) {
1001 PrintError(core->vm_info, core, "multiboot: shadow paging guest... this will end badly\n");
1010 int v3_handle_multiboot_reset(struct guest_info *core)
1014 if (core->core_run_state!=CORE_RESETTING) {
1018 if (!core->vm_info->mb_state.is_multiboot) {
1022 // wait for everyone
1023 v3_counting_barrier(&core->vm_info->reset_barrier);
1025 if (core->vcpu_id==0) {
1026 // I am leader (this is true if I am a ROS core or this is a non-HVM)
1027 core->vm_info->run_state = VM_RESETTING;
1032 if (core->vcpu_id==0) {
1033 // we will recopy the image
1034 rc |= v3_setup_multiboot_vm_for_boot(core->vm_info);
1037 rc |= v3_setup_multiboot_core_for_boot(core);
1039 if (core->vcpu_id==0) {
1040 core->core_run_state = CORE_RUNNING;
1041 core->vm_info->run_state = VM_RUNNING;
1043 // for APs, we need to bring them back to the init state
1044 core->cpu_mode = REAL;
1045 core->mem_mode = PHYSICAL_MEM;
1046 core->core_run_state = CORE_STOPPED;
1049 // sync on the way out
1050 v3_counting_barrier(&core->vm_info->reset_barrier);