Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


added swintr support to update_irq_entry_state
[palacios.git] / palacios / src / palacios / svm.c
1
2 /* 
3  * This file is part of the Palacios Virtual Machine Monitor developed
4  * by the V3VEE Project with funding from the United States National 
5  * Science Foundation and the Department of Energy.  
6  *
7  * The V3VEE Project is a joint project between Northwestern University
8  * and the University of New Mexico.  You can find out more at 
9  * http://www.v3vee.org
10  *
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Jack Lange <jarusl@cs.northwestern.edu>
16  *
17  * This is free software.  You are permitted to use,
18  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
19  */
20
21
22 #include <palacios/svm.h>
23 #include <palacios/vmm.h>
24
25 #include <palacios/vmcb.h>
26 #include <palacios/vmm_mem.h>
27 #include <palacios/vmm_paging.h>
28 #include <palacios/svm_handler.h>
29
30 #include <palacios/vmm_debug.h>
31 #include <palacios/vm_guest_mem.h>
32
33 #include <palacios/vmm_decoder.h>
34 #include <palacios/vmm_string.h>
35 #include <palacios/vmm_lowlevel.h>
36 #include <palacios/svm_msr.h>
37
38 #include <palacios/vmm_rbtree.h>
39
40 #include <palacios/vmm_direct_paging.h>
41
42 #include <palacios/vmm_ctrl_regs.h>
43 #include <palacios/svm_io.h>
44
45 #include <palacios/vmm_sprintf.h>
46
47
48 #ifndef V3_CONFIG_DEBUG_SVM
49 #undef PrintDebug
50 #define PrintDebug(fmt, args...)
51 #endif
52
53
54 uint32_t v3_last_exit;
55
56 // This is a global pointer to the host's VMCB
57 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
58
59
60
61 extern void v3_stgi();
62 extern void v3_clgi();
63 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
64 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
65
66
67 static vmcb_t * Allocate_VMCB() {
68     vmcb_t * vmcb_page = NULL;
69     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
70
71     if ((void *)vmcb_pa == NULL) {
72         PrintError("Error allocating VMCB\n");
73         return NULL;
74     }
75
76     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
77
78     memset(vmcb_page, 0, 4096);
79
80     return vmcb_page;
81 }
82
83
84
85 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
86     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
87     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
88     uint_t i;
89
90
91     //
92     ctrl_area->svm_instrs.VMRUN = 1;
93     ctrl_area->svm_instrs.VMMCALL = 1;
94     ctrl_area->svm_instrs.VMLOAD = 1;
95     ctrl_area->svm_instrs.VMSAVE = 1;
96     ctrl_area->svm_instrs.STGI = 1;
97     ctrl_area->svm_instrs.CLGI = 1;
98     ctrl_area->svm_instrs.SKINIT = 1;
99     ctrl_area->svm_instrs.ICEBP = 1;
100     ctrl_area->svm_instrs.WBINVD = 1;
101     ctrl_area->svm_instrs.MONITOR = 1;
102     ctrl_area->svm_instrs.MWAIT_always = 1;
103     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
104     ctrl_area->instrs.INVLPGA = 1;
105     ctrl_area->instrs.CPUID = 1;
106
107     ctrl_area->instrs.HLT = 1;
108
109 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
110     ctrl_area->instrs.RDTSC = 1;
111     ctrl_area->svm_instrs.RDTSCP = 1;
112 #endif
113
114     // guest_state->cr0 = 0x00000001;    // PE 
115   
116     /*
117       ctrl_area->exceptions.de = 1;
118       ctrl_area->exceptions.df = 1;
119       
120       ctrl_area->exceptions.ts = 1;
121       ctrl_area->exceptions.ss = 1;
122       ctrl_area->exceptions.ac = 1;
123       ctrl_area->exceptions.mc = 1;
124       ctrl_area->exceptions.gp = 1;
125       ctrl_area->exceptions.ud = 1;
126       ctrl_area->exceptions.np = 1;
127       ctrl_area->exceptions.of = 1;
128       
129       ctrl_area->exceptions.nmi = 1;
130     */
131     
132
133     ctrl_area->instrs.NMI = 1;
134     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
135     ctrl_area->instrs.INIT = 1;
136     ctrl_area->instrs.PAUSE = 1;
137     ctrl_area->instrs.shutdown_evts = 1;
138
139
140     /* DEBUG FOR RETURN CODE */
141     ctrl_area->exit_code = 1;
142
143
144     /* Setup Guest Machine state */
145
146     core->vm_regs.rsp = 0x00;
147     core->rip = 0xfff0;
148
149     core->vm_regs.rdx = 0x00000f00;
150
151
152     core->cpl = 0;
153
154     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
155     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
156     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
157
158
159
160
161
162     core->segments.cs.selector = 0xf000;
163     core->segments.cs.limit = 0xffff;
164     core->segments.cs.base = 0x0000000f0000LL;
165
166     // (raw attributes = 0xf3)
167     core->segments.cs.type = 0x3;
168     core->segments.cs.system = 0x1;
169     core->segments.cs.dpl = 0x3;
170     core->segments.cs.present = 1;
171
172
173
174     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
175                                       &(core->segments.es), &(core->segments.fs), 
176                                       &(core->segments.gs), NULL};
177
178     for ( i = 0; segregs[i] != NULL; i++) {
179         struct v3_segment * seg = segregs[i];
180         
181         seg->selector = 0x0000;
182         //    seg->base = seg->selector << 4;
183         seg->base = 0x00000000;
184         seg->limit = ~0u;
185
186         // (raw attributes = 0xf3)
187         seg->type = 0x3;
188         seg->system = 0x1;
189         seg->dpl = 0x3;
190         seg->present = 1;
191     }
192
193     core->segments.gdtr.limit = 0x0000ffff;
194     core->segments.gdtr.base = 0x0000000000000000LL;
195     core->segments.idtr.limit = 0x0000ffff;
196     core->segments.idtr.base = 0x0000000000000000LL;
197
198     core->segments.ldtr.selector = 0x0000;
199     core->segments.ldtr.limit = 0x0000ffff;
200     core->segments.ldtr.base = 0x0000000000000000LL;
201     core->segments.tr.selector = 0x0000;
202     core->segments.tr.limit = 0x0000ffff;
203     core->segments.tr.base = 0x0000000000000000LL;
204
205
206     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
207     core->dbg_regs.dr7 = 0x0000000000000400LL;
208
209
210     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
211     ctrl_area->instrs.IOIO_PROT = 1;
212             
213     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
214     ctrl_area->instrs.MSR_PROT = 1;   
215
216
217     PrintDebug("Exiting on interrupts\n");
218     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
219     ctrl_area->instrs.INTR = 1;
220
221
222     v3_hook_msr(core->vm_info, EFER_MSR, 
223                 &v3_handle_efer_read,
224                 &v3_handle_efer_write, 
225                 core);
226
227     if (core->shdw_pg_mode == SHADOW_PAGING) {
228         PrintDebug("Creating initial shadow page table\n");
229         
230         /* JRL: This is a performance killer, and a simplistic solution */
231         /* We need to fix this */
232         ctrl_area->TLB_CONTROL = 1;
233         ctrl_area->guest_ASID = 1;
234         
235         
236         if (v3_init_passthrough_pts(core) == -1) {
237             PrintError("Could not initialize passthrough page tables\n");
238             return ;
239         }
240
241
242         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
243         PrintDebug("Created\n");
244         
245         core->ctrl_regs.cr0 |= 0x80000000;
246         core->ctrl_regs.cr3 = core->direct_map_pt;
247
248         ctrl_area->cr_reads.cr0 = 1;
249         ctrl_area->cr_writes.cr0 = 1;
250         //ctrl_area->cr_reads.cr4 = 1;
251         ctrl_area->cr_writes.cr4 = 1;
252         ctrl_area->cr_reads.cr3 = 1;
253         ctrl_area->cr_writes.cr3 = 1;
254
255
256
257         ctrl_area->instrs.INVLPG = 1;
258
259         ctrl_area->exceptions.pf = 1;
260
261         guest_state->g_pat = 0x7040600070406ULL;
262
263
264
265     } else if (core->shdw_pg_mode == NESTED_PAGING) {
266         // Flush the TLB on entries/exits
267         ctrl_area->TLB_CONTROL = 1;
268         ctrl_area->guest_ASID = 1;
269
270         // Enable Nested Paging
271         ctrl_area->NP_ENABLE = 1;
272
273         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
274
275         // Set the Nested Page Table pointer
276         if (v3_init_passthrough_pts(core) == -1) {
277             PrintError("Could not initialize Nested page tables\n");
278             return ;
279         }
280
281         ctrl_area->N_CR3 = core->direct_map_pt;
282
283         guest_state->g_pat = 0x7040600070406ULL;
284     }
285     
286     /* tell the guest that we don't support SVM */
287     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
288         &v3_handle_vm_cr_read,
289         &v3_handle_vm_cr_write, 
290         core);
291 }
292
293
294 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
295
296     PrintDebug("Allocating VMCB\n");
297     core->vmm_data = (void *)Allocate_VMCB();
298     
299     if (core->vmm_data == NULL) {
300         PrintError("Could not allocate VMCB, Exiting...\n");
301         return -1;
302     }
303
304     if (vm_class == V3_PC_VM) {
305         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
306         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
307     } else {
308         PrintError("Invalid VM class\n");
309         return -1;
310     }
311
312     return 0;
313 }
314
315
316 int v3_deinit_svm_vmcb(struct guest_info * core) {
317     V3_FreePages(V3_PAddr(core->vmm_data), 1);
318     return 0;
319 }
320
321
322 static int update_irq_exit_state(struct guest_info * info) {
323     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
324
325     // Fix for QEMU bug using EVENTINJ as an internal cache
326     guest_ctrl->EVENTINJ.valid = 0;
327
328     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
329         
330 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
331         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
332 #endif
333
334         info->intr_core_state.irq_started = 1;
335         info->intr_core_state.irq_pending = 0;
336
337         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
338     }
339
340     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
341 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
342         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
343 #endif
344
345         // Interrupt was taken fully vectored
346         info->intr_core_state.irq_started = 0;
347
348     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
349 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
350         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
351 #endif
352     }
353
354     return 0;
355 }
356
357
358 static int update_irq_entry_state(struct guest_info * info) {
359     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
360
361
362     if (info->intr_core_state.irq_pending == 0) {
363         guest_ctrl->guest_ctrl.V_IRQ = 0;
364         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
365     }
366     
367     if (v3_excp_pending(info)) {
368         uint_t excp = v3_get_excp_number(info);
369         
370         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
371         
372         if (info->excp_state.excp_error_code_valid) {
373             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
374             guest_ctrl->EVENTINJ.ev = 1;
375 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
376             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
377 #endif
378         }
379         
380         guest_ctrl->EVENTINJ.vector = excp;
381         
382         guest_ctrl->EVENTINJ.valid = 1;
383
384 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
385         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
386                    (int)info->num_exits, 
387                    guest_ctrl->EVENTINJ.vector, 
388                    (void *)(addr_t)info->ctrl_regs.cr2,
389                    (void *)(addr_t)info->rip);
390 #endif
391
392         v3_injecting_excp(info, excp);
393     } else if (info->intr_core_state.irq_started == 1) {
394 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
395         PrintDebug("IRQ pending from previous injection\n");
396 #endif
397         guest_ctrl->guest_ctrl.V_IRQ = 1;
398         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
399         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
400         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
401
402     } else {
403         switch (v3_intr_pending(info)) {
404             case V3_EXTERNAL_IRQ: {
405                 uint32_t irq = v3_get_intr(info);
406
407                 guest_ctrl->guest_ctrl.V_IRQ = 1;
408                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
409                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
410                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
411
412 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
413                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
414                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
415                            (void *)(addr_t)info->rip);
416 #endif
417
418                 info->intr_core_state.irq_pending = 1;
419                 info->intr_core_state.irq_vector = irq;
420                 
421                 break;
422             }
423             case V3_NMI:
424                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
425                 break;
426             case V3_SOFTWARE_INTR:
427             guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
428 #ifdef V3_CONFIG_EXT_SW_INTERRUPTS
429 #ifdef V3_CONFIG_DEBUG_EXT_SW_INTERRUPTS
430             PrintDebug("Caught an injected software interrupt\n");
431             PrintDebug("\ttype: %d, vector: %d\n", SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
432 #endif
433             guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
434             guest_ctrl->EVENTINJ.valid = 1;
435             
436             /* reset swintr state */
437             info->intr_core_state.swintr_posted = 0;
438             info->intr_core_state.swintr_vector = 0;
439 #endif
440             break;
441             case V3_VIRTUAL_IRQ:
442                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
443                 break;
444
445             case V3_INVALID_INTR:
446             default:
447                 break;
448         }
449         
450     }
451
452     return 0;
453 }
454
455
456 /* 
457  * CAUTION and DANGER!!! 
458  * 
459  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
460  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
461  * on its contents will cause things to break. The contents at the time of the exit WILL 
462  * change before the exit handler is executed.
463  */
464 int v3_svm_enter(struct guest_info * info) {
465     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
466     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
467     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
468
469     // Conditionally yield the CPU if the timeslice has expired
470     v3_yield_cond(info);
471
472     // Perform any additional yielding needed for time adjustment
473     v3_adjust_time(info);
474
475     // disable global interrupts for vm state transition
476     v3_clgi();
477
478     // Update timer devices after being in the VM, with interupts
479     // disabled, but before doing IRQ updates, so that any interrupts they 
480     //raise get seen immediately.
481     v3_update_timers(info);
482
483     // Synchronize the guest state to the VMCB
484     guest_state->cr0 = info->ctrl_regs.cr0;
485     guest_state->cr2 = info->ctrl_regs.cr2;
486     guest_state->cr3 = info->ctrl_regs.cr3;
487     guest_state->cr4 = info->ctrl_regs.cr4;
488     guest_state->dr6 = info->dbg_regs.dr6;
489     guest_state->dr7 = info->dbg_regs.dr7;
490     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
491     guest_state->rflags = info->ctrl_regs.rflags;
492     guest_state->efer = info->ctrl_regs.efer;
493     
494     guest_state->cpl = info->cpl;
495
496     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
497
498     guest_state->rax = info->vm_regs.rax;
499     guest_state->rip = info->rip;
500     guest_state->rsp = info->vm_regs.rsp;
501
502 #ifdef V3_CONFIG_SYMCALL
503     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
504         update_irq_entry_state(info);
505     }
506 #else 
507     update_irq_entry_state(info);
508 #endif
509
510
511     /* ** */
512
513     /*
514       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
515       (void *)(addr_t)info->segments.cs.base, 
516       (void *)(addr_t)info->rip);
517     */
518
519 #ifdef V3_CONFIG_SYMCALL
520     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
521         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
522             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
523         }
524     }
525 #endif
526
527     v3_time_enter_vm(info);
528     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
529
530     //V3_Print("Calling v3_svm_launch\n");
531
532     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
533
534     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
535
536     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
537
538     // Immediate exit from VM time bookkeeping
539     v3_time_exit_vm(info);
540
541     info->num_exits++;
542
543     // Save Guest state from VMCB
544     info->rip = guest_state->rip;
545     info->vm_regs.rsp = guest_state->rsp;
546     info->vm_regs.rax = guest_state->rax;
547
548     info->cpl = guest_state->cpl;
549
550     info->ctrl_regs.cr0 = guest_state->cr0;
551     info->ctrl_regs.cr2 = guest_state->cr2;
552     info->ctrl_regs.cr3 = guest_state->cr3;
553     info->ctrl_regs.cr4 = guest_state->cr4;
554     info->dbg_regs.dr6 = guest_state->dr6;
555     info->dbg_regs.dr7 = guest_state->dr7;
556     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
557     info->ctrl_regs.rflags = guest_state->rflags;
558     info->ctrl_regs.efer = guest_state->efer;
559     
560     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
561     info->cpu_mode = v3_get_vm_cpu_mode(info);
562     info->mem_mode = v3_get_vm_mem_mode(info);
563     /* ** */
564
565     // save exit info here
566     exit_code = guest_ctrl->exit_code;
567     exit_info1 = guest_ctrl->exit_info1;
568     exit_info2 = guest_ctrl->exit_info2;
569
570 #ifdef V3_CONFIG_SYMCALL
571     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
572         update_irq_exit_state(info);
573     }
574 #else
575     update_irq_exit_state(info);
576 #endif
577
578     // reenable global interrupts after vm exit
579     v3_stgi();
580  
581     // Conditionally yield the CPU if the timeslice has expired
582     v3_yield_cond(info);
583
584     {
585         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
586         
587         if (ret != 0) {
588             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
589             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
590             return -1;
591         }
592     }
593
594
595     return 0;
596 }
597
598
599 int v3_start_svm_guest(struct guest_info * info) {
600     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
601     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
602
603     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
604
605     if (info->vcpu_id == 0) {
606         info->core_run_state = CORE_RUNNING;
607         info->vm_info->run_state = VM_RUNNING;
608     } else  { 
609         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
610
611         while (info->core_run_state == CORE_STOPPED) {
612             v3_yield(info);
613             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
614         }
615
616         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
617     } 
618
619     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
620                info->vcpu_id, info->pcpu_id, 
621                info->segments.cs.selector, (void *)(info->segments.cs.base), 
622                info->segments.cs.limit, (void *)(info->rip));
623
624
625
626     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
627                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
628     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
629     
630     v3_start_time(info);
631
632     while (1) {
633
634         if (info->vm_info->run_state == VM_STOPPED) {
635             info->core_run_state = CORE_STOPPED;
636             break;
637         }
638         
639         if (v3_svm_enter(info) == -1) {
640             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
641             addr_t host_addr;
642             addr_t linear_addr = 0;
643             
644             info->vm_info->run_state = VM_ERROR;
645             
646             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
647             
648             v3_print_guest_state(info);
649             
650             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
651             
652             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
653             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
654             
655             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
656             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
657             
658             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
659             
660             if (info->mem_mode == PHYSICAL_MEM) {
661                 v3_gpa_to_hva(info, linear_addr, &host_addr);
662             } else if (info->mem_mode == VIRTUAL_MEM) {
663                 v3_gva_to_hva(info, linear_addr, &host_addr);
664             }
665             
666             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
667             
668             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
669             v3_dump_mem((uint8_t *)host_addr, 15);
670             
671             v3_print_stack(info);
672
673             break;
674         }
675
676
677         if (info->vm_info->run_state == VM_STOPPED) {
678             info->core_run_state = CORE_STOPPED;
679             break;
680         }
681
682         
683
684 /*
685         if ((info->num_exits % 50000) == 0) {
686             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
687             v3_print_guest_state(info);
688         }
689 */
690         
691     }
692
693     // Need to take down the other cores on error... 
694
695     return 0;
696 }
697
698
699
700
701 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
702     // init vmcb_bios
703
704     // Write the RIP, CS, and descriptor
705     // assume the rest is already good to go
706     //
707     // vector VV -> rip at 0
708     //              CS = VV00
709     //  This means we start executing at linear address VV000
710     //
711     // So the selector needs to be VV00
712     // and the base needs to be VV000
713     //
714     core->rip = 0;
715     core->segments.cs.selector = rip << 8;
716     core->segments.cs.limit = 0xffff;
717     core->segments.cs.base = rip << 12;
718
719     return 0;
720 }
721
722
723
724
725
726
727 /* Checks machine SVM capability */
728 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
729 int v3_is_svm_capable() {
730     uint_t vm_cr_low = 0, vm_cr_high = 0;
731     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
732
733     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
734   
735     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
736
737     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
738       V3_Print("SVM Not Available\n");
739       return 0;
740     }  else {
741         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
742         
743         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
744         
745         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
746             V3_Print("SVM is available but is disabled.\n");
747             
748             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
749             
750             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
751             
752             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
753                 V3_Print("SVM BIOS Disabled, not unlockable\n");
754             } else {
755                 V3_Print("SVM is locked with a key\n");
756             }
757             return 0;
758
759         } else {
760             V3_Print("SVM is available and  enabled.\n");
761
762             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
763             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
764             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
765             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
766             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
767
768             return 1;
769         }
770     }
771 }
772
773 static int has_svm_nested_paging() {
774     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
775     
776     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
777     
778     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
779     
780     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
781         V3_Print("SVM Nested Paging not supported\n");
782         return 0;
783     } else {
784         V3_Print("SVM Nested Paging supported\n");
785         return 1;
786     }
787  }
788  
789
790
791 void v3_init_svm_cpu(int cpu_id) {
792     reg_ex_t msr;
793     extern v3_cpu_arch_t v3_cpu_types[];
794
795     // Enable SVM on the CPU
796     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
797     msr.e_reg.low |= EFER_MSR_svm_enable;
798     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
799
800     V3_Print("SVM Enabled\n");
801
802     // Setup the host state save area
803     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
804
805     /* 64-BIT-ISSUE */
806     //  msr.e_reg.high = 0;
807     //msr.e_reg.low = (uint_t)host_vmcb;
808     msr.r_reg = host_vmcbs[cpu_id];
809
810     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
811     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
812
813
814     if (has_svm_nested_paging() == 1) {
815         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
816     } else {
817         v3_cpu_types[cpu_id] = V3_SVM_CPU;
818     }
819 }
820
821
822
823 void v3_deinit_svm_cpu(int cpu_id) {
824     reg_ex_t msr;
825     extern v3_cpu_arch_t v3_cpu_types[];
826
827     // reset SVM_VM_HSAVE_PA_MSR
828     // Does setting it to NULL disable??
829     msr.r_reg = 0;
830     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
831
832     // Disable SVM?
833     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
834     msr.e_reg.low &= ~EFER_MSR_svm_enable;
835     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
836
837     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
838
839     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
840
841     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
842     return;
843 }
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894 #if 0
895 /* 
896  * Test VMSAVE/VMLOAD Latency 
897  */
898 #define vmsave ".byte 0x0F,0x01,0xDB ; "
899 #define vmload ".byte 0x0F,0x01,0xDA ; "
900 {
901     uint32_t start_lo, start_hi;
902     uint32_t end_lo, end_hi;
903     uint64_t start, end;
904     
905     __asm__ __volatile__ (
906                           "rdtsc ; "
907                           "movl %%eax, %%esi ; "
908                           "movl %%edx, %%edi ; "
909                           "movq  %%rcx, %%rax ; "
910                           vmsave
911                           "rdtsc ; "
912                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
913                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
914                           );
915     
916     start = start_hi;
917     start <<= 32;
918     start += start_lo;
919     
920     end = end_hi;
921     end <<= 32;
922     end += end_lo;
923     
924     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
925     
926     __asm__ __volatile__ (
927                           "rdtsc ; "
928                           "movl %%eax, %%esi ; "
929                           "movl %%edx, %%edi ; "
930                           "movq  %%rcx, %%rax ; "
931                           vmload
932                           "rdtsc ; "
933                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
934                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
935                               );
936         
937         start = start_hi;
938         start <<= 32;
939         start += start_lo;
940
941         end = end_hi;
942         end <<= 32;
943         end += end_lo;
944
945
946         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
947     }
948     /* End Latency Test */
949
950 #endif
951
952
953
954
955
956
957
958 #if 0
959 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
960   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
961   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
962   uint_t i = 0;
963
964
965   guest_state->rsp = vm_info.vm_regs.rsp;
966   guest_state->rip = vm_info.rip;
967
968
969   /* I pretty much just gutted this from TVMM */
970   /* Note: That means its probably wrong */
971
972   // set the segment registers to mirror ours
973   guest_state->cs.selector = 1<<3;
974   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
975   guest_state->cs.attrib.fields.S = 1;
976   guest_state->cs.attrib.fields.P = 1;
977   guest_state->cs.attrib.fields.db = 1;
978   guest_state->cs.attrib.fields.G = 1;
979   guest_state->cs.limit = 0xfffff;
980   guest_state->cs.base = 0;
981   
982   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
983   for ( i = 0; segregs[i] != NULL; i++) {
984     struct vmcb_selector * seg = segregs[i];
985     
986     seg->selector = 2<<3;
987     seg->attrib.fields.type = 0x2; // Data Segment+read/write
988     seg->attrib.fields.S = 1;
989     seg->attrib.fields.P = 1;
990     seg->attrib.fields.db = 1;
991     seg->attrib.fields.G = 1;
992     seg->limit = 0xfffff;
993     seg->base = 0;
994   }
995
996
997   {
998     /* JRL THIS HAS TO GO */
999     
1000     //    guest_state->tr.selector = GetTR_Selector();
1001     guest_state->tr.attrib.fields.type = 0x9; 
1002     guest_state->tr.attrib.fields.P = 1;
1003     // guest_state->tr.limit = GetTR_Limit();
1004     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1005     /* ** */
1006   }
1007
1008
1009   /* ** */
1010
1011
1012   guest_state->efer |= EFER_MSR_svm_enable;
1013   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1014   ctrl_area->svm_instrs.VMRUN = 1;
1015   guest_state->cr0 = 0x00000001;    // PE 
1016   ctrl_area->guest_ASID = 1;
1017
1018
1019   //  guest_state->cpl = 0;
1020
1021
1022
1023   // Setup exits
1024
1025   ctrl_area->cr_writes.cr4 = 1;
1026   
1027   ctrl_area->exceptions.de = 1;
1028   ctrl_area->exceptions.df = 1;
1029   ctrl_area->exceptions.pf = 1;
1030   ctrl_area->exceptions.ts = 1;
1031   ctrl_area->exceptions.ss = 1;
1032   ctrl_area->exceptions.ac = 1;
1033   ctrl_area->exceptions.mc = 1;
1034   ctrl_area->exceptions.gp = 1;
1035   ctrl_area->exceptions.ud = 1;
1036   ctrl_area->exceptions.np = 1;
1037   ctrl_area->exceptions.of = 1;
1038   ctrl_area->exceptions.nmi = 1;
1039
1040   
1041
1042   ctrl_area->instrs.IOIO_PROT = 1;
1043   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1044   
1045   {
1046     reg_ex_t tmp_reg;
1047     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1048     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1049   }
1050
1051   ctrl_area->instrs.INTR = 1;
1052
1053   
1054   {
1055     char gdt_buf[6];
1056     char idt_buf[6];
1057
1058     memset(gdt_buf, 0, 6);
1059     memset(idt_buf, 0, 6);
1060
1061
1062     uint_t gdt_base, idt_base;
1063     ushort_t gdt_limit, idt_limit;
1064     
1065     GetGDTR(gdt_buf);
1066     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1067     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1068     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1069
1070     GetIDTR(idt_buf);
1071     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1072     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1073     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1074
1075
1076     // gdt_base -= 0x2000;
1077     //idt_base -= 0x2000;
1078
1079     guest_state->gdtr.base = gdt_base;
1080     guest_state->gdtr.limit = gdt_limit;
1081     guest_state->idtr.base = idt_base;
1082     guest_state->idtr.limit = idt_limit;
1083
1084
1085   }
1086   
1087   
1088   // also determine if CPU supports nested paging
1089   /*
1090   if (vm_info.page_tables) {
1091     //   if (0) {
1092     // Flush the TLB on entries/exits
1093     ctrl_area->TLB_CONTROL = 1;
1094
1095     // Enable Nested Paging
1096     ctrl_area->NP_ENABLE = 1;
1097
1098     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1099
1100         // Set the Nested Page Table pointer
1101     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1102
1103
1104     //   ctrl_area->N_CR3 = Get_CR3();
1105     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1106
1107     guest_state->g_pat = 0x7040600070406ULL;
1108
1109     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1110     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1111     // Enable Paging
1112     //    guest_state->cr0 |= 0x80000000;
1113   }
1114   */
1115
1116 }
1117
1118
1119
1120
1121
1122 #endif
1123
1124