V3_Print(core->vm_info,core, "hvm: received hypercall %x rax=%llx rbx=%llx rcx=%llx at cycle count %llu (%llu cycles since last boot start) num_exits=%llu since initial boot\n",
hcall_id, core->vm_regs.rax, core->vm_regs.rbx, core->vm_regs.rcx, c, c-core->hvm_state.last_boot_start, core->num_exits);
- v3_print_core_telemetry(core);
+ //v3_print_core_telemetry(core);
// v3_print_guest_state(core);
return 0;
}
uint64_t v3_get_hvm_hrt_memsize(struct v3_vm_info *vm)
{
- if (vm->hvm_state.is_hvm) {
- return vm->mem_size - vm->hvm_state.first_hrt_gpa;
- } else {
- return 0;
- }
+ return vm->mem_size;
}
uint32_t v3_get_hvm_ros_cores(struct v3_vm_info *vm)
*limit = PAGE_SIZE;
}
-static uint64_t tss_data=0x0;
-
static void write_tss(struct v3_vm_info *vm)
{
void *base;
uint64_t limit;
- int i;
get_tss_loc(vm,&base,&limit);
- for (i=0;i<limit/8;i++) {
- v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+8*i),8,(uint8_t*) &tss_data);
- }
+
+ v3_set_gpa_memory(&vm->cores[0],(addr_t)base,limit,0);
PrintDebug(vm,VCORE_NONE,"hvm: wrote TSS at %p\n",base);
}
#endif
}
-static void get_bp_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
+static void get_mb_info_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
{
#ifdef HVM_MAP_1G_2M
*base = (void*) PAGE_ADDR(BOOT_STATE_END_ADDR-(6+2)*PAGE_SIZE);
*limit = PAGE_SIZE;
}
-static void write_bp(struct v3_vm_info *vm)
+static void write_mb_info(struct v3_vm_info *vm)
{
- void *base;
- uint64_t limit;
- uint64_t data=-1;
- int i;
+ if (vm->hvm_state.hrt_type!=HRT_MBOOT64) {
+ PrintError(vm, VCORE_NONE,"hvm: Cannot handle this HRT type\n");
+ return;
+ } else {
+ uint8_t buf[256];
+ uint64_t size;
+ void *base;
+ uint64_t limit;
- get_bp_loc(vm,&base,&limit);
-
- for (i=0;i<limit/8;i++) {
- v3_write_gpa_memory(&vm->cores[0],(addr_t)(base+i*8),8,(uint8_t*)&data);
- }
+ get_mb_info_loc(vm,&base,&limit);
+
+ if ((size=v3_build_multiboot_table(&vm->cores[vm->hvm_state.first_hrt_core],buf,256))==-1) {
+ PrintError(vm,VCORE_NONE,"hvm: Failed to build MB info\n");
+ return;
+ }
- PrintDebug(vm,VCORE_NONE,"hvm: wrote boundary page at %p\n", base);
-
+ if (size>limit) {
+ PrintError(vm,VCORE_NONE,"hvm: MB info is too large\n");
+ return;
+ }
+
+ v3_write_gpa_memory(&vm->cores[vm->hvm_state.first_hrt_core],
+ (addr_t)base,
+ size,
+ buf);
+
+ PrintDebug(vm,VCORE_NONE, "hvm: wrote MB info at %p\n", base);
+ }
}
-#define MIN_STACK (4096*4)
+#define SCRATCH_STACK_SIZE 4096
static void get_hrt_loc(struct v3_vm_info *vm, void **base, uint64_t *limit)
{
- void *bp_base;
- uint64_t bp_limit;
+ void *mb_base;
+ uint64_t mb_limit;
- get_bp_loc(vm,&bp_base,&bp_limit);
+ get_mb_info_loc(vm,&mb_base,&mb_limit);
- // assume at least a minimal stack
-
- bp_base-=MIN_STACK;
+ mb_base-=SCRATCH_STACK_SIZE*v3_get_hvm_hrt_cores(vm);
*base = (void*)PAGE_ADDR(vm->hvm_state.first_hrt_gpa);
- if (bp_base < *base+PAGE_SIZE) {
+ if (mb_base < *base+PAGE_SIZE) {
PrintError(vm,VCORE_NONE,"hvm: HRT stack colides with HRT\n");
}
- *limit = bp_base - *base;
+ *limit = mb_base - *base;
}
static int setup_mb_kernel(struct v3_vm_info *vm, void *base, uint64_t limit)
{
mb_data_t mb;
- uint32_t offset;
-
-
- // FIX USING GENERIC TOOLS
if (v3_parse_multiboot_header(vm->hvm_state.hrt_file,&mb)) {
PrintError(vm,VCORE_NONE, "hvm: failed to parse multiboot kernel header\n");
return -1;
}
+
+ if (v3_write_multiboot_kernel(vm,&mb,vm->hvm_state.hrt_file,base,limit)) {
+ PrintError(vm,VCORE_NONE, "hvm: failed to write multiboot kernel into memory\n");
+ return -1;
+ }
+
+ /*
if (!mb.addr || !mb.entry) {
PrintError(vm,VCORE_NONE, "hvm: kernel is missing address or entry point\n");
return -1;
offset = mb.addr->load_addr - mb.addr->header_addr;
// Skip the ELF header - assume 1 page... weird....
+ // FIX ME TO CONFORM TO MULTIBOOT.C
v3_write_gpa_memory(&vm->cores[0],
(addr_t)(mb.addr->load_addr),
vm->hvm_state.hrt_file->size-PAGE_SIZE-offset,
// vm->hvm_state.hrt_entry_addr = (uint64_t) mb.entry->entry_addr + PAGE_SIZE; //HACK PAD
- vm->hvm_state.hrt_entry_addr = (uint64_t) mb.entry->entry_addr;
-
- vm->hvm_state.hrt_type = HRT_MBOOT64;
PrintDebug(vm,VCORE_NONE,
"hvm: wrote 0x%llx bytes starting at offset 0x%llx to %p; set entry to %p\n",
(uint64_t) PAGE_SIZE+offset,
(void*)(addr_t)(mb.addr->load_addr),
(void*) vm->hvm_state.hrt_entry_addr);
+
+
+ */
+
+ vm->hvm_state.hrt_entry_addr = (uint64_t) mb.entry->entry_addr;
+
+ vm->hvm_state.hrt_type = HRT_MBOOT64;
+
return 0;
}
PAGETABLES (identy map of first N GB)
ROOT PT first, followed by 2nd level, etc.
Currently PML4 followed by 1 PDPE for 512 GB of mapping
- BOUNDARY PAGE (all 0xff - avoid smashing page tables in case we keep going...)
- (stack - we will push machine description)
+ MBINFO_PAGE
+ SCRATCH_STACK_HRT_CORE0
+ SCRATCH_STACK_HRT_CORE1
+ ..
+ SCRATCH_STACK_HRT_COREN
...
HRT (as many pages as needed, page-aligned, starting at first HRT address)
---
write_pt(vm);
- write_bp(vm);
if (setup_hrt(vm)) {
PrintError(vm,VCORE_NONE,"hvm: failed to setup HRT\n");
return -1;
}
+ // need to parse HRT first
+ write_mb_info(vm);
PrintDebug(vm,VCORE_NONE,"hvm: setup of HVM memory done\n");
}
/*
- On entry:
+ On entry for every core:
IDTR points to stub IDT
GDTR points to stub GDT
CR3 points to root page table
CR0 has PE and PG
EFER has LME AND LMA
- RSP is TOS (looks like a call)
- INFO <= RDI
- 0 (fake return address) <= RSP
-
- RIP is entry point to HRT
- RDI points to machine info on stack
+ RSP is TOS of core's scratch stack (looks like a call)
+
+ RAX = MB magic cookie
+ RBX = address of multiboot info table
+ RCX = this core id / apic id (0..N-1)
+ RDX = this core id - first HRT core ID (==0 for the first HRT core)
Other regs are zeroed
PrintDebug(core->vm_info, core, "hvm: setting up HRT core (%u) for boot\n", core->vcpu_id);
+
+
memset(&core->vm_regs,0,sizeof(core->vm_regs));
memset(&core->ctrl_regs,0,sizeof(core->ctrl_regs));
memset(&core->dbg_regs,0,sizeof(core->dbg_regs));
core->mem_mode = VIRTUAL_MEM;
core->core_run_state = CORE_RUNNING ;
- // We are going to enter right into the HRT
- // HRT stack and argument passing
- get_bp_loc(core->vm_info, &base,&limit);
- // TODO: push description here
- core->vm_regs.rsp = (v3_reg_t) base; // so if we ret, we will blow up
- core->vm_regs.rbp = (v3_reg_t) base;
- // TODO: RDI should really get pointer to description
- core->vm_regs.rdi = (v3_reg_t) base;
+
+ // magic
+ core->vm_regs.rax = MB2_INFO_MAGIC;
+
+ // multiboot info pointer
+ get_mb_info_loc(core->vm_info, &base,&limit);
+ core->vm_regs.rbx = (uint64_t) base;
+
+ // core number
+ core->vm_regs.rcx = core->vcpu_id;
+
+ // HRT core number
+ core->vm_regs.rdx = core->vcpu_id - core->vm_info->hvm_state.first_hrt_core;
+
+ // Now point to scratch stack for this core
+ // it begins at an ofset relative to the MB info page
+ get_mb_info_loc(core->vm_info, &base,&limit);
+ base -= core->vm_regs.rdx * SCRATCH_STACK_SIZE;
+ core->vm_regs.rsp = (v3_reg_t) base;
+ core->vm_regs.rbp = (v3_reg_t) base-8;
+
+ // push onto the stack a bad rbp and bad return address
+ core->vm_regs.rsp-=16;
+ v3_set_gpa_memory(core,
+ core->vm_regs.rsp,
+ 16,
+ 0xff);
+
+
// HRT entry point
get_hrt_loc(core->vm_info, &base,&limit);
core->rip = (uint64_t) core->vm_info->hvm_state.hrt_entry_addr ;
+
+ PrintDebug(core->vm_info,core,"hvm: hrt core %u has rip=%p, rsp=%p, rbp=%p, rax=%p, rbx=%p, rcx=%p, rdx=%p\n",
+ (core->vcpu_id - core->vm_info->hvm_state.first_hrt_core),
+ (void*)(core->rip),
+ (void*)(core->vm_regs.rsp),
+ (void*)(core->vm_regs.rbp),
+ (void*)(core->vm_regs.rax),
+ (void*)(core->vm_regs.rbx),
+ (void*)(core->vm_regs.rcx),
+ (void*)(core->vm_regs.rdx));
+
// Setup CRs for long mode and our stub page table
// CR0: PG, PE
core->ctrl_regs.cr0 = 0x80000001;
memcpy(&core->segments.gs,&core->segments.ds,sizeof(core->segments.ds));
- if (core->vm_info->hvm_state.hrt_type==HRT_MBOOT64) {
- /*
- Temporary hackery for multiboot2 "64"
- We will push the MB structure onto the stack and update RSP
- and RBX
- */
- uint8_t buf[256];
- uint64_t size;
-
- if ((size=v3_build_multiboot_table(core,buf,256))==-1) {
- PrintError(core->vm_info,core,"hvm: Failed to write MB info\n");
- return -1;
- }
- core->vm_regs.rsp -= size;
+ // reset paging here for shadow...
- v3_write_gpa_memory(core,
- core->vm_regs.rsp,
- size,
- buf);
+ if (core->shdw_pg_mode != NESTED_PAGING) {
+ PrintError(core->vm_info, core, "hvm: shadow paging guest... this will end badly\n");
+ return -1;
+ }
- PrintDebug(core->vm_info,core, "hvm: wrote MB info at %p\n", (void*)core->vm_regs.rsp);
- if (core->vcpu_id == core->vm_info->hvm_state.first_hrt_core) {
- // We are the BSP for this HRT
- // this is where rbx needs to point
- core->vm_regs.rbx = core->vm_regs.rsp;
- PrintDebug(core->vm_info,core, "hvm: \"BSP\" core\n");
- } else {
- // We are an AP for this HRT
- // so we don't get the multiboot struct
- core->vm_regs.rbx = 0;
- PrintDebug(core->vm_info,core, "hvm: \"AP\" core\n");
- }
+ return 0;
+}
+int v3_handle_hvm_reset(struct guest_info *core)
+{
+ if (core->core_run_state != CORE_RESETTING) {
+ return 0;
+ }
- // one more push, something that looks like a return address
- size=0;
- core->vm_regs.rsp -= 8;
+ if (!core->vm_info->hvm_state.is_hvm) {
+ return 0;
+ }
- v3_write_gpa_memory(core,
- core->vm_regs.rsp,
- 8,
- (uint8_t*) &size);
-
- // Now for our magic - this signals
- // the kernel that a multiboot loader loaded it
- // and that rbx points to its offered data
- core->vm_regs.rax = MB2_INFO_MAGIC;
-
- /*
- Note that "real" MB starts in protected mode without paging
- This hack starts in long mode... so these requirements go
- out the window for a large part
+ if (v3_is_hvm_hrt_core(core)) {
+ // this is an HRT reset
+ int rc=0;
- Requirements:
+ // wait for all the HRT cores
+ v3_counting_barrier(&core->vm_info->reset_barrier);
- OK EAX has magic
- OK EBX points to MB info
- OK CS = base 0, offset big, code (LONG MODE)
- OK DS,ES,FS,GS,SS => base 0, offset big, data (LONG MODE)
- OK A20 gate on
- XXX CR0 PE on PG off (nope)
- XXX EFLAGS IF and VM off
- */
-
+ if (core->vcpu_id==core->vm_info->hvm_state.first_hrt_core) {
+ // I am leader
+ core->vm_info->run_state = VM_RESETTING;
+ }
+ core->core_run_state = CORE_RESETTING;
- }
+ if (core->vcpu_id==core->vm_info->hvm_state.first_hrt_core) {
+ // we really only need to clear the bss
+ // and recopy the .data, but for now we'll just
+ // do everything
+ rc |= v3_setup_hvm_vm_for_boot(core->vm_info);
+ }
+ // now everyone is ready to reset
+ rc |= v3_setup_hvm_hrt_core_for_boot(core);
- // reset paging here for shadow...
+ core->core_run_state = CORE_RUNNING;
- if (core->shdw_pg_mode != NESTED_PAGING) {
- PrintError(core->vm_info, core, "hvm: shadow paging guest... this will end badly\n");
- return -1;
- }
+ if (core->vcpu_id==core->vm_info->hvm_state.first_hrt_core) {
+ // leader
+ core->vm_info->run_state = VM_RUNNING;
+ }
+ v3_counting_barrier(&core->vm_info->reset_barrier);
- return 0;
+ if (rc<0) {
+ return rc;
+ } else {
+ return 1;
+ }
+
+ } else {
+ // ROS core will be handled by normal reset functionality
+ return 0;
+ }
}