Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


enabled stopping a VM before the secondary cores have been initialized
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
42 #endif
43
44 #include <palacios/vmm_direct_paging.h>
45
46 #include <palacios/vmm_ctrl_regs.h>
47 #include <palacios/svm_io.h>
48
49 #include <palacios/vmm_sprintf.h>
50
51
52 #ifndef V3_CONFIG_DEBUG_SVM
53 #undef PrintDebug
54 #define PrintDebug(fmt, args...)
55 #endif
56
57
58 uint32_t v3_last_exit;
59
60 // This is a global pointer to the host's VMCB
61 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
62
63
64
65 extern void v3_stgi();
66 extern void v3_clgi();
67 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
68 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
69
70
71 static vmcb_t * Allocate_VMCB() {
72     vmcb_t * vmcb_page = NULL;
73     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
74
75     if ((void *)vmcb_pa == NULL) {
76         PrintError("Error allocating VMCB\n");
77         return NULL;
78     }
79
80     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
81
82     memset(vmcb_page, 0, 4096);
83
84     return vmcb_page;
85 }
86
87
88 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
89 {
90     int status;
91
92     // Call arch-independent handler
93     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
94         return status;
95     }
96
97     // SVM-specific code
98     {
99         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
100         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
101         hw_efer->svme = 1;
102     }
103
104     return 0;
105 }
106
107
108 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
109     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
110     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
111     uint_t i;
112
113
114     //
115     ctrl_area->svm_instrs.VMRUN = 1;
116     ctrl_area->svm_instrs.VMMCALL = 1;
117     ctrl_area->svm_instrs.VMLOAD = 1;
118     ctrl_area->svm_instrs.VMSAVE = 1;
119     ctrl_area->svm_instrs.STGI = 1;
120     ctrl_area->svm_instrs.CLGI = 1;
121     ctrl_area->svm_instrs.SKINIT = 1;
122     ctrl_area->svm_instrs.ICEBP = 1;
123     ctrl_area->svm_instrs.WBINVD = 1;
124     ctrl_area->svm_instrs.MONITOR = 1;
125     ctrl_area->svm_instrs.MWAIT_always = 1;
126     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
127     ctrl_area->instrs.INVLPGA = 1;
128     ctrl_area->instrs.CPUID = 1;
129
130     ctrl_area->instrs.HLT = 1;
131
132 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
133     ctrl_area->instrs.RDTSC = 1;
134     ctrl_area->svm_instrs.RDTSCP = 1;
135 #endif
136
137     // guest_state->cr0 = 0x00000001;    // PE 
138   
139     /*
140       ctrl_area->exceptions.de = 1;
141       ctrl_area->exceptions.df = 1;
142       
143       ctrl_area->exceptions.ts = 1;
144       ctrl_area->exceptions.ss = 1;
145       ctrl_area->exceptions.ac = 1;
146       ctrl_area->exceptions.mc = 1;
147       ctrl_area->exceptions.gp = 1;
148       ctrl_area->exceptions.ud = 1;
149       ctrl_area->exceptions.np = 1;
150       ctrl_area->exceptions.of = 1;
151       
152       ctrl_area->exceptions.nmi = 1;
153     */
154     
155
156     ctrl_area->instrs.NMI = 1;
157     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
158     ctrl_area->instrs.INIT = 1;
159     ctrl_area->instrs.PAUSE = 1;
160     ctrl_area->instrs.shutdown_evts = 1;
161
162
163     /* DEBUG FOR RETURN CODE */
164     ctrl_area->exit_code = 1;
165
166
167     /* Setup Guest Machine state */
168
169     core->vm_regs.rsp = 0x00;
170     core->rip = 0xfff0;
171
172     core->vm_regs.rdx = 0x00000f00;
173
174
175     core->cpl = 0;
176
177     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
178     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
179     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
180
181
182
183
184
185     core->segments.cs.selector = 0xf000;
186     core->segments.cs.limit = 0xffff;
187     core->segments.cs.base = 0x0000000f0000LL;
188
189     // (raw attributes = 0xf3)
190     core->segments.cs.type = 0x3;
191     core->segments.cs.system = 0x1;
192     core->segments.cs.dpl = 0x3;
193     core->segments.cs.present = 1;
194
195
196
197     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
198                                       &(core->segments.es), &(core->segments.fs), 
199                                       &(core->segments.gs), NULL};
200
201     for ( i = 0; segregs[i] != NULL; i++) {
202         struct v3_segment * seg = segregs[i];
203         
204         seg->selector = 0x0000;
205         //    seg->base = seg->selector << 4;
206         seg->base = 0x00000000;
207         seg->limit = ~0u;
208
209         // (raw attributes = 0xf3)
210         seg->type = 0x3;
211         seg->system = 0x1;
212         seg->dpl = 0x3;
213         seg->present = 1;
214     }
215
216     core->segments.gdtr.limit = 0x0000ffff;
217     core->segments.gdtr.base = 0x0000000000000000LL;
218     core->segments.idtr.limit = 0x0000ffff;
219     core->segments.idtr.base = 0x0000000000000000LL;
220
221     core->segments.ldtr.selector = 0x0000;
222     core->segments.ldtr.limit = 0x0000ffff;
223     core->segments.ldtr.base = 0x0000000000000000LL;
224     core->segments.tr.selector = 0x0000;
225     core->segments.tr.limit = 0x0000ffff;
226     core->segments.tr.base = 0x0000000000000000LL;
227
228
229     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
230     core->dbg_regs.dr7 = 0x0000000000000400LL;
231
232
233     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
234     ctrl_area->instrs.IOIO_PROT = 1;
235             
236     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
237     ctrl_area->instrs.MSR_PROT = 1;   
238
239
240     PrintDebug("Exiting on interrupts\n");
241     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
242     ctrl_area->instrs.INTR = 1;
243
244
245     v3_hook_msr(core->vm_info, EFER_MSR, 
246                 &v3_handle_efer_read,
247                 &v3_svm_handle_efer_write, 
248                 core);
249
250     if (core->shdw_pg_mode == SHADOW_PAGING) {
251         PrintDebug("Creating initial shadow page table\n");
252         
253         /* JRL: This is a performance killer, and a simplistic solution */
254         /* We need to fix this */
255         ctrl_area->TLB_CONTROL = 1;
256         ctrl_area->guest_ASID = 1;
257         
258         
259         if (v3_init_passthrough_pts(core) == -1) {
260             PrintError("Could not initialize passthrough page tables\n");
261             return ;
262         }
263
264
265         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
266         PrintDebug("Created\n");
267         
268         core->ctrl_regs.cr0 |= 0x80000000;
269         core->ctrl_regs.cr3 = core->direct_map_pt;
270
271         ctrl_area->cr_reads.cr0 = 1;
272         ctrl_area->cr_writes.cr0 = 1;
273         //ctrl_area->cr_reads.cr4 = 1;
274         ctrl_area->cr_writes.cr4 = 1;
275         ctrl_area->cr_reads.cr3 = 1;
276         ctrl_area->cr_writes.cr3 = 1;
277
278
279
280         ctrl_area->instrs.INVLPG = 1;
281
282         ctrl_area->exceptions.pf = 1;
283
284         guest_state->g_pat = 0x7040600070406ULL;
285
286
287
288     } else if (core->shdw_pg_mode == NESTED_PAGING) {
289         // Flush the TLB on entries/exits
290         ctrl_area->TLB_CONTROL = 1;
291         ctrl_area->guest_ASID = 1;
292
293         // Enable Nested Paging
294         ctrl_area->NP_ENABLE = 1;
295
296         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
297
298         // Set the Nested Page Table pointer
299         if (v3_init_passthrough_pts(core) == -1) {
300             PrintError("Could not initialize Nested page tables\n");
301             return ;
302         }
303
304         ctrl_area->N_CR3 = core->direct_map_pt;
305
306         guest_state->g_pat = 0x7040600070406ULL;
307     }
308     
309     /* tell the guest that we don't support SVM */
310     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
311         &v3_handle_vm_cr_read,
312         &v3_handle_vm_cr_write, 
313         core);
314
315
316     {
317         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
318         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
319         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
320         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
321         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
322
323         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
324         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
325         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
326     }
327 }
328
329
330 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
331
332     PrintDebug("Allocating VMCB\n");
333     core->vmm_data = (void *)Allocate_VMCB();
334     
335     if (core->vmm_data == NULL) {
336         PrintError("Could not allocate VMCB, Exiting...\n");
337         return -1;
338     }
339
340     if (vm_class == V3_PC_VM) {
341         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
342         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
343     } else {
344         PrintError("Invalid VM class\n");
345         return -1;
346     }
347
348     return 0;
349 }
350
351
352 int v3_deinit_svm_vmcb(struct guest_info * core) {
353     V3_FreePages(V3_PAddr(core->vmm_data), 1);
354     return 0;
355 }
356
357
358 #ifdef V3_CONFIG_CHECKPOINT
359 int v3_svm_save_core(struct guest_info * core, void * ctx){
360
361     v3_chkpt_save_8(ctx, "cpl", &(core->cpl));
362     v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
363
364     return 0;
365 }
366
367 int v3_svm_load_core(struct guest_info * core, void * ctx){
368     
369     v3_chkpt_load_8(ctx, "cpl", &(core->cpl));
370
371     if (v3_chkpt_load(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1) {
372         return -1;
373     }
374
375     return 0;
376 }
377 #endif
378
379 static int update_irq_exit_state(struct guest_info * info) {
380     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
381
382     // Fix for QEMU bug using EVENTINJ as an internal cache
383     guest_ctrl->EVENTINJ.valid = 0;
384
385     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
386         
387 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
388         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
389 #endif
390
391         info->intr_core_state.irq_started = 1;
392         info->intr_core_state.irq_pending = 0;
393
394         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
395     }
396
397     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
398 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
399         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
400 #endif
401
402         // Interrupt was taken fully vectored
403         info->intr_core_state.irq_started = 0;
404
405     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
406 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
407         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
408 #endif
409     }
410
411     return 0;
412 }
413
414
415 static int update_irq_entry_state(struct guest_info * info) {
416     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
417
418
419     if (info->intr_core_state.irq_pending == 0) {
420         guest_ctrl->guest_ctrl.V_IRQ = 0;
421         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
422     }
423     
424     if (v3_excp_pending(info)) {
425         uint_t excp = v3_get_excp_number(info);
426         
427         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
428         
429         if (info->excp_state.excp_error_code_valid) {
430             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
431             guest_ctrl->EVENTINJ.ev = 1;
432 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
433             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
434 #endif
435         }
436         
437         guest_ctrl->EVENTINJ.vector = excp;
438         
439         guest_ctrl->EVENTINJ.valid = 1;
440
441 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
442         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
443                    (int)info->num_exits, 
444                    guest_ctrl->EVENTINJ.vector, 
445                    (void *)(addr_t)info->ctrl_regs.cr2,
446                    (void *)(addr_t)info->rip);
447 #endif
448
449         v3_injecting_excp(info, excp);
450     } else if (info->intr_core_state.irq_started == 1) {
451 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
452         PrintDebug("IRQ pending from previous injection\n");
453 #endif
454         guest_ctrl->guest_ctrl.V_IRQ = 1;
455         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
456         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
457         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
458
459     } else {
460         switch (v3_intr_pending(info)) {
461             case V3_EXTERNAL_IRQ: {
462                 uint32_t irq = v3_get_intr(info);
463
464                 guest_ctrl->guest_ctrl.V_IRQ = 1;
465                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
466                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
467                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
468
469 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
470                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
471                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
472                            (void *)(addr_t)info->rip);
473 #endif
474
475                 info->intr_core_state.irq_pending = 1;
476                 info->intr_core_state.irq_vector = irq;
477                 
478                 break;
479             }
480             case V3_NMI:
481                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
482                 break;
483             case V3_SOFTWARE_INTR:
484                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
485
486 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
487                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
488                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
489 #endif
490                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
491                 guest_ctrl->EVENTINJ.valid = 1;
492             
493                 /* reset swintr state */
494                 info->intr_core_state.swintr_posted = 0;
495                 info->intr_core_state.swintr_vector = 0;
496                 
497                 break;
498             case V3_VIRTUAL_IRQ:
499                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
500                 break;
501
502             case V3_INVALID_INTR:
503             default:
504                 break;
505         }
506         
507     }
508
509     return 0;
510 }
511
512
513 /* 
514  * CAUTION and DANGER!!! 
515  * 
516  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
517  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
518  * on its contents will cause things to break. The contents at the time of the exit WILL 
519  * change before the exit handler is executed.
520  */
521 int v3_svm_enter(struct guest_info * info) {
522     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
523     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
524     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
525     sint64_t tsc_offset;
526
527     // Conditionally yield the CPU if the timeslice has expired
528     v3_yield_cond(info);
529
530     // Perform any additional yielding needed for time adjustment
531     v3_adjust_time(info);
532
533     // disable global interrupts for vm state transition
534     v3_clgi();
535
536     // Update timer devices after being in the VM, with interupts
537     // disabled, but before doing IRQ updates, so that any interrupts they 
538     //raise get seen immediately.
539     v3_update_timers(info);
540
541     // Synchronize the guest state to the VMCB
542     guest_state->cr0 = info->ctrl_regs.cr0;
543     guest_state->cr2 = info->ctrl_regs.cr2;
544     guest_state->cr3 = info->ctrl_regs.cr3;
545     guest_state->cr4 = info->ctrl_regs.cr4;
546     guest_state->dr6 = info->dbg_regs.dr6;
547     guest_state->dr7 = info->dbg_regs.dr7;
548     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
549     guest_state->rflags = info->ctrl_regs.rflags;
550     guest_state->efer = info->ctrl_regs.efer;
551     
552     guest_state->cpl = info->cpl;
553
554     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
555
556     guest_state->rax = info->vm_regs.rax;
557     guest_state->rip = info->rip;
558     guest_state->rsp = info->vm_regs.rsp;
559
560 #ifdef V3_CONFIG_SYMCALL
561     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
562         update_irq_entry_state(info);
563     }
564 #else 
565     update_irq_entry_state(info);
566 #endif
567
568
569     /* ** */
570
571     /*
572       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
573       (void *)(addr_t)info->segments.cs.base, 
574       (void *)(addr_t)info->rip);
575     */
576
577 #ifdef V3_CONFIG_SYMCALL
578     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
579         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
580             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
581         }
582     }
583 #endif
584
585     v3_time_enter_vm(info);
586     tsc_offset = v3_tsc_host_offset(&info->time_state);
587     guest_ctrl->TSC_OFFSET = tsc_offset;
588
589
590     //V3_Print("Calling v3_svm_launch\n");
591
592     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
593
594     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
595
596     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
597
598     // Immediate exit from VM time bookkeeping
599     v3_time_exit_vm(info);
600
601     info->num_exits++;
602
603     // Save Guest state from VMCB
604     info->rip = guest_state->rip;
605     info->vm_regs.rsp = guest_state->rsp;
606     info->vm_regs.rax = guest_state->rax;
607
608     info->cpl = guest_state->cpl;
609
610     info->ctrl_regs.cr0 = guest_state->cr0;
611     info->ctrl_regs.cr2 = guest_state->cr2;
612     info->ctrl_regs.cr3 = guest_state->cr3;
613     info->ctrl_regs.cr4 = guest_state->cr4;
614     info->dbg_regs.dr6 = guest_state->dr6;
615     info->dbg_regs.dr7 = guest_state->dr7;
616     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
617     info->ctrl_regs.rflags = guest_state->rflags;
618     info->ctrl_regs.efer = guest_state->efer;
619     
620     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
621     info->cpu_mode = v3_get_vm_cpu_mode(info);
622     info->mem_mode = v3_get_vm_mem_mode(info);
623     /* ** */
624
625     // save exit info here
626     exit_code = guest_ctrl->exit_code;
627     exit_info1 = guest_ctrl->exit_info1;
628     exit_info2 = guest_ctrl->exit_info2;
629
630 #ifdef V3_CONFIG_SYMCALL
631     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
632         update_irq_exit_state(info);
633     }
634 #else
635     update_irq_exit_state(info);
636 #endif
637
638     // reenable global interrupts after vm exit
639     v3_stgi();
640  
641     // Conditionally yield the CPU if the timeslice has expired
642     v3_yield_cond(info);
643
644     {
645         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
646         
647         if (ret != 0) {
648             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
649             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
650             return -1;
651         }
652     }
653
654
655     return 0;
656 }
657
658
659 int v3_start_svm_guest(struct guest_info * info) {
660     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
661     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
662
663     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
664
665     if (info->vcpu_id == 0) {
666         info->core_run_state = CORE_RUNNING;
667         info->vm_info->run_state = VM_RUNNING;
668     } else  { 
669         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
670
671         while (info->core_run_state == CORE_STOPPED) {
672             
673             if (info->vm_info->run_state == VM_STOPPED) {
674                 // The VM was stopped before this core was initialized. 
675                 return 0;
676             }
677
678             v3_yield(info);
679             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
680         }
681
682         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
683
684         // We'll be paranoid about race conditions here
685         v3_wait_at_barrier(info);
686     } 
687
688     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
689                info->vcpu_id, info->pcpu_id, 
690                info->segments.cs.selector, (void *)(info->segments.cs.base), 
691                info->segments.cs.limit, (void *)(info->rip));
692
693
694
695     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
696                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
697     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
698     
699     v3_start_time(info);
700
701     while (1) {
702
703         if (info->vm_info->run_state == VM_STOPPED) {
704             info->core_run_state = CORE_STOPPED;
705             break;
706         }
707         
708         if (v3_svm_enter(info) == -1) {
709             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
710             addr_t host_addr;
711             addr_t linear_addr = 0;
712             
713             info->vm_info->run_state = VM_ERROR;
714             
715             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
716             
717             v3_print_guest_state(info);
718             
719             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
720             
721             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
722             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
723             
724             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
725             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
726             
727             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
728             
729             if (info->mem_mode == PHYSICAL_MEM) {
730                 v3_gpa_to_hva(info, linear_addr, &host_addr);
731             } else if (info->mem_mode == VIRTUAL_MEM) {
732                 v3_gva_to_hva(info, linear_addr, &host_addr);
733             }
734             
735             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
736             
737             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
738             v3_dump_mem((uint8_t *)host_addr, 15);
739             
740             v3_print_stack(info);
741
742             break;
743         }
744
745         v3_wait_at_barrier(info);
746
747
748         if (info->vm_info->run_state == VM_STOPPED) {
749             info->core_run_state = CORE_STOPPED;
750             break;
751         }
752
753         
754
755 /*
756         if ((info->num_exits % 50000) == 0) {
757             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
758             v3_print_guest_state(info);
759         }
760 */
761         
762     }
763
764     // Need to take down the other cores on error... 
765
766     return 0;
767 }
768
769
770
771
772 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
773     // init vmcb_bios
774
775     // Write the RIP, CS, and descriptor
776     // assume the rest is already good to go
777     //
778     // vector VV -> rip at 0
779     //              CS = VV00
780     //  This means we start executing at linear address VV000
781     //
782     // So the selector needs to be VV00
783     // and the base needs to be VV000
784     //
785     core->rip = 0;
786     core->segments.cs.selector = rip << 8;
787     core->segments.cs.limit = 0xffff;
788     core->segments.cs.base = rip << 12;
789
790     return 0;
791 }
792
793
794
795
796
797
798 /* Checks machine SVM capability */
799 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
800 int v3_is_svm_capable() {
801     uint_t vm_cr_low = 0, vm_cr_high = 0;
802     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
803
804     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
805   
806     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
807
808     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
809       V3_Print("SVM Not Available\n");
810       return 0;
811     }  else {
812         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
813         
814         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
815         
816         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
817             V3_Print("SVM is available but is disabled.\n");
818             
819             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
820             
821             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
822             
823             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
824                 V3_Print("SVM BIOS Disabled, not unlockable\n");
825             } else {
826                 V3_Print("SVM is locked with a key\n");
827             }
828             return 0;
829
830         } else {
831             V3_Print("SVM is available and  enabled.\n");
832
833             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
834             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
835             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
836             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
837             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
838
839             return 1;
840         }
841     }
842 }
843
844 static int has_svm_nested_paging() {
845     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
846     
847     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
848     
849     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
850     
851     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
852         V3_Print("SVM Nested Paging not supported\n");
853         return 0;
854     } else {
855         V3_Print("SVM Nested Paging supported\n");
856         return 1;
857     }
858  }
859  
860
861
862 void v3_init_svm_cpu(int cpu_id) {
863     reg_ex_t msr;
864     extern v3_cpu_arch_t v3_cpu_types[];
865
866     // Enable SVM on the CPU
867     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
868     msr.e_reg.low |= EFER_MSR_svm_enable;
869     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
870
871     V3_Print("SVM Enabled\n");
872
873     // Setup the host state save area
874     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
875
876     /* 64-BIT-ISSUE */
877     //  msr.e_reg.high = 0;
878     //msr.e_reg.low = (uint_t)host_vmcb;
879     msr.r_reg = host_vmcbs[cpu_id];
880
881     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
882     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
883
884
885     if (has_svm_nested_paging() == 1) {
886         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
887     } else {
888         v3_cpu_types[cpu_id] = V3_SVM_CPU;
889     }
890 }
891
892
893
894 void v3_deinit_svm_cpu(int cpu_id) {
895     reg_ex_t msr;
896     extern v3_cpu_arch_t v3_cpu_types[];
897
898     // reset SVM_VM_HSAVE_PA_MSR
899     // Does setting it to NULL disable??
900     msr.r_reg = 0;
901     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
902
903     // Disable SVM?
904     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
905     msr.e_reg.low &= ~EFER_MSR_svm_enable;
906     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
907
908     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
909
910     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
911
912     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
913     return;
914 }
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965 #if 0
966 /* 
967  * Test VMSAVE/VMLOAD Latency 
968  */
969 #define vmsave ".byte 0x0F,0x01,0xDB ; "
970 #define vmload ".byte 0x0F,0x01,0xDA ; "
971 {
972     uint32_t start_lo, start_hi;
973     uint32_t end_lo, end_hi;
974     uint64_t start, end;
975     
976     __asm__ __volatile__ (
977                           "rdtsc ; "
978                           "movl %%eax, %%esi ; "
979                           "movl %%edx, %%edi ; "
980                           "movq  %%rcx, %%rax ; "
981                           vmsave
982                           "rdtsc ; "
983                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
984                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
985                           );
986     
987     start = start_hi;
988     start <<= 32;
989     start += start_lo;
990     
991     end = end_hi;
992     end <<= 32;
993     end += end_lo;
994     
995     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
996     
997     __asm__ __volatile__ (
998                           "rdtsc ; "
999                           "movl %%eax, %%esi ; "
1000                           "movl %%edx, %%edi ; "
1001                           "movq  %%rcx, %%rax ; "
1002                           vmload
1003                           "rdtsc ; "
1004                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1005                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1006                               );
1007         
1008         start = start_hi;
1009         start <<= 32;
1010         start += start_lo;
1011
1012         end = end_hi;
1013         end <<= 32;
1014         end += end_lo;
1015
1016
1017         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1018     }
1019     /* End Latency Test */
1020
1021 #endif
1022
1023
1024
1025
1026
1027
1028
1029 #if 0
1030 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1031   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1032   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1033   uint_t i = 0;
1034
1035
1036   guest_state->rsp = vm_info.vm_regs.rsp;
1037   guest_state->rip = vm_info.rip;
1038
1039
1040   /* I pretty much just gutted this from TVMM */
1041   /* Note: That means its probably wrong */
1042
1043   // set the segment registers to mirror ours
1044   guest_state->cs.selector = 1<<3;
1045   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1046   guest_state->cs.attrib.fields.S = 1;
1047   guest_state->cs.attrib.fields.P = 1;
1048   guest_state->cs.attrib.fields.db = 1;
1049   guest_state->cs.attrib.fields.G = 1;
1050   guest_state->cs.limit = 0xfffff;
1051   guest_state->cs.base = 0;
1052   
1053   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1054   for ( i = 0; segregs[i] != NULL; i++) {
1055     struct vmcb_selector * seg = segregs[i];
1056     
1057     seg->selector = 2<<3;
1058     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1059     seg->attrib.fields.S = 1;
1060     seg->attrib.fields.P = 1;
1061     seg->attrib.fields.db = 1;
1062     seg->attrib.fields.G = 1;
1063     seg->limit = 0xfffff;
1064     seg->base = 0;
1065   }
1066
1067
1068   {
1069     /* JRL THIS HAS TO GO */
1070     
1071     //    guest_state->tr.selector = GetTR_Selector();
1072     guest_state->tr.attrib.fields.type = 0x9; 
1073     guest_state->tr.attrib.fields.P = 1;
1074     // guest_state->tr.limit = GetTR_Limit();
1075     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1076     /* ** */
1077   }
1078
1079
1080   /* ** */
1081
1082
1083   guest_state->efer |= EFER_MSR_svm_enable;
1084   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1085   ctrl_area->svm_instrs.VMRUN = 1;
1086   guest_state->cr0 = 0x00000001;    // PE 
1087   ctrl_area->guest_ASID = 1;
1088
1089
1090   //  guest_state->cpl = 0;
1091
1092
1093
1094   // Setup exits
1095
1096   ctrl_area->cr_writes.cr4 = 1;
1097   
1098   ctrl_area->exceptions.de = 1;
1099   ctrl_area->exceptions.df = 1;
1100   ctrl_area->exceptions.pf = 1;
1101   ctrl_area->exceptions.ts = 1;
1102   ctrl_area->exceptions.ss = 1;
1103   ctrl_area->exceptions.ac = 1;
1104   ctrl_area->exceptions.mc = 1;
1105   ctrl_area->exceptions.gp = 1;
1106   ctrl_area->exceptions.ud = 1;
1107   ctrl_area->exceptions.np = 1;
1108   ctrl_area->exceptions.of = 1;
1109   ctrl_area->exceptions.nmi = 1;
1110
1111   
1112
1113   ctrl_area->instrs.IOIO_PROT = 1;
1114   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1115   
1116   {
1117     reg_ex_t tmp_reg;
1118     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1119     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1120   }
1121
1122   ctrl_area->instrs.INTR = 1;
1123
1124   
1125   {
1126     char gdt_buf[6];
1127     char idt_buf[6];
1128
1129     memset(gdt_buf, 0, 6);
1130     memset(idt_buf, 0, 6);
1131
1132
1133     uint_t gdt_base, idt_base;
1134     ushort_t gdt_limit, idt_limit;
1135     
1136     GetGDTR(gdt_buf);
1137     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1138     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1139     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1140
1141     GetIDTR(idt_buf);
1142     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1143     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1144     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1145
1146
1147     // gdt_base -= 0x2000;
1148     //idt_base -= 0x2000;
1149
1150     guest_state->gdtr.base = gdt_base;
1151     guest_state->gdtr.limit = gdt_limit;
1152     guest_state->idtr.base = idt_base;
1153     guest_state->idtr.limit = idt_limit;
1154
1155
1156   }
1157   
1158   
1159   // also determine if CPU supports nested paging
1160   /*
1161   if (vm_info.page_tables) {
1162     //   if (0) {
1163     // Flush the TLB on entries/exits
1164     ctrl_area->TLB_CONTROL = 1;
1165
1166     // Enable Nested Paging
1167     ctrl_area->NP_ENABLE = 1;
1168
1169     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1170
1171         // Set the Nested Page Table pointer
1172     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1173
1174
1175     //   ctrl_area->N_CR3 = Get_CR3();
1176     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1177
1178     guest_state->g_pat = 0x7040600070406ULL;
1179
1180     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1181     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1182     // Enable Paging
1183     //    guest_state->cr0 |= 0x80000000;
1184   }
1185   */
1186
1187 }
1188
1189
1190
1191
1192
1193 #endif
1194
1195