Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Minor bugfix
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21
22 #include <palacios/svm.h>
23 #include <palacios/vmm.h>
24
25 #include <palacios/vmcb.h>
26 #include <palacios/vmm_mem.h>
27 #include <palacios/vmm_paging.h>
28 #include <palacios/svm_handler.h>
29
30 #include <palacios/vmm_debug.h>
31 #include <palacios/vm_guest_mem.h>
32
33 #include <palacios/vmm_decoder.h>
34 #include <palacios/vmm_string.h>
35 #include <palacios/vmm_lowlevel.h>
36 #include <palacios/svm_msr.h>
37
38 #include <palacios/vmm_rbtree.h>
39 #include <palacios/vmm_barrier.h>
40 #include <palacios/vmm_debug.h>
41
42 #include <palacios/vmm_perftune.h>
43
44 #include <palacios/vmm_bios.h>
45
46
47 #ifdef V3_CONFIG_CHECKPOINT
48 #include <palacios/vmm_checkpoint.h>
49 #endif
50
51 #include <palacios/vmm_direct_paging.h>
52
53 #include <palacios/vmm_ctrl_regs.h>
54 #include <palacios/svm_io.h>
55
56 #include <palacios/vmm_sprintf.h>
57
58 #ifdef V3_CONFIG_MEM_TRACK
59 #include <palacios/vmm_mem_track.h>
60 #endif 
61
62 #ifdef V3_CONFIG_TM_FUNC
63 #include <extensions/trans_mem.h>
64 #endif
65
66 #ifndef V3_CONFIG_DEBUG_SVM
67 #undef PrintDebug
68 #define PrintDebug(fmt, args...)
69 #endif
70
71
72
73 uint32_t v3_last_exit;
74
75 // This is a global pointer to the host's VMCB
76 // These are physical addresses
77 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
78
79
80
81 extern void v3_stgi();
82 extern void v3_clgi();
83 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
84 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
85
86
87
88 static vmcb_t * Allocate_VMCB() {
89     vmcb_t * vmcb_page = NULL;
90     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);   // need not be shadow safe, not exposed to guest
91
92     if ((void *)vmcb_pa == NULL) {
93       PrintError(VM_NONE, VCORE_NONE, "Error allocating VMCB\n");
94         return NULL;
95     }
96
97     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
98
99     memset(vmcb_page, 0, 4096);
100
101     return vmcb_page;
102 }
103
104
105 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
106 {
107     int status;
108
109     // Call arch-independent handler
110     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
111         return status;
112     }
113
114     // SVM-specific code
115     {
116         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
117         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
118         hw_efer->svme = 1;
119     }
120
121     return 0;
122 }
123
124 /*
125  * This is invoked both on an initial boot and on a reset
126  * 
127  * The difference is that on a reset we will not rehook anything
128  *
129  */
130
131 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
132     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
133     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
134     uint_t i;
135
136     if (core->core_run_state!=CORE_INVALID && core->core_run_state!=CORE_RESETTING) { 
137         PrintError(core->vm_info, core, "Atempt to Init_VMCB_BIOS in invalid state (%d)\n",core->core_run_state);
138         return;
139     }
140
141     // need to invalidate any shadow page tables early
142     if (core->shdw_pg_mode == SHADOW_PAGING && core->core_run_state==CORE_RESETTING) {
143         if (v3_get_vm_cpu_mode(core) != REAL) {
144             if (v3_invalidate_shadow_pts(core) == -1) {
145                 PrintError(core->vm_info,core,"Could not invalidate shadow page tables\n");
146                 return;
147             }
148         }
149     }
150
151     // Guarantee we are starting from a clean slate
152     // even on a reset
153     memset(vmcb,0,4096);
154
155     ctrl_area->svm_instrs.VMRUN = 1;
156     ctrl_area->svm_instrs.VMMCALL = 1;
157     ctrl_area->svm_instrs.VMLOAD = 1;
158     ctrl_area->svm_instrs.VMSAVE = 1;
159     ctrl_area->svm_instrs.STGI = 1;
160     ctrl_area->svm_instrs.CLGI = 1;
161     ctrl_area->svm_instrs.SKINIT = 1; // secure startup... why
162     ctrl_area->svm_instrs.ICEBP = 1;  // in circuit emulator breakpoint
163     ctrl_area->svm_instrs.WBINVD = 1; // write back and invalidate caches... why?
164     ctrl_area->svm_instrs.MONITOR = 1;
165     ctrl_area->svm_instrs.MWAIT_always = 1;
166     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
167     ctrl_area->instrs.INVLPGA = 1;   // invalidate page in asid... why?
168     ctrl_area->instrs.CPUID = 1;
169
170     ctrl_area->instrs.HLT = 1;
171
172     /* Set at VMM launch as needed */
173     ctrl_area->instrs.RDTSC = 0;
174     ctrl_area->svm_instrs.RDTSCP = 0;
175
176
177 #ifdef V3_CONFIG_TM_FUNC
178     v3_tm_set_excp_intercepts(ctrl_area);
179 #endif
180     
181
182     ctrl_area->instrs.NMI = 1;
183     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
184     ctrl_area->instrs.INIT = 1;
185     //    ctrl_area->instrs.PAUSE = 1;    // do not care as does not halt
186     ctrl_area->instrs.shutdown_evts = 1;
187
188
189     /* DEBUG FOR RETURN CODE */
190     ctrl_area->exit_code = 1;
191
192
193     /* Setup Guest Machine state */
194
195     memset(&core->vm_regs,0,sizeof(core->vm_regs));
196     memset(&core->ctrl_regs,0,sizeof(core->ctrl_regs));
197     memset(&core->dbg_regs,0,sizeof(core->dbg_regs));
198     memset(&core->segments,0,sizeof(core->segments));    
199     memset(&core->msrs,0,sizeof(core->msrs));    
200     memset(&core->fp_state,0,sizeof(core->fp_state));    
201
202     // reset interrupts
203     core->intr_core_state.irq_pending=0; 
204     core->intr_core_state.irq_started=0; 
205     core->intr_core_state.swintr_posted=0; 
206
207     // reset exceptions
208     core->excp_state.excp_pending=0;
209
210     // reset of gprs to expected values at init
211     core->vm_regs.rsp = 0x00;
212     core->rip = 0xfff0;
213     core->vm_regs.rdx = 0x00000f00;  // family/stepping/etc
214
215     
216     core->cpl = 0;
217
218     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
219
220     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
221     core->shdw_pg_state.guest_cr0 = core->ctrl_regs.cr0;
222
223     // cr3 zeroed above
224     core->shdw_pg_state.guest_cr3 = core->ctrl_regs.cr3;
225     // cr4 zeroed above
226     core->shdw_pg_state.guest_cr4 = core->ctrl_regs.cr4;
227
228     core->ctrl_regs.efer |= EFER_MSR_svm_enable ;
229     core->shdw_pg_state.guest_efer.value = core->ctrl_regs.efer;
230
231     core->segments.cs.selector = 0xf000;
232     core->segments.cs.limit = 0xffff;
233     core->segments.cs.base = 0x0000f0000LL;
234
235     // (raw attributes = 0xf3)
236     core->segments.cs.type = 0xa;
237     core->segments.cs.system = 0x1;
238     core->segments.cs.dpl = 0x0;
239     core->segments.cs.present = 1;
240
241
242
243     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
244                                       &(core->segments.es), &(core->segments.fs), 
245                                       &(core->segments.gs), NULL};
246
247     for ( i = 0; segregs[i] != NULL; i++) {
248         struct v3_segment * seg = segregs[i];
249         
250         seg->selector = 0x0000;
251         //    seg->base = seg->selector << 4;
252         seg->base = 0x00000000;
253         seg->limit = 0xffff;
254
255         // (raw attributes = 0xf3)
256         seg->type = 0x2;
257         seg->system = 0x1;
258         seg->dpl = 0x0;
259         seg->present = 1;
260     }
261
262     core->segments.gdtr.selector = 0x0000;
263     core->segments.gdtr.limit = 0x0000ffff;
264     core->segments.gdtr.base = 0x0000000000000000LL;
265     core->segments.gdtr.dpl = 0x0;
266
267     core->segments.idtr.selector = 0x0000; 
268     core->segments.idtr.limit = 0x0000ffff;
269     core->segments.idtr.base = 0x0000000000000000LL;
270     core->segments.ldtr.limit = 0x0000ffff;
271     core->segments.ldtr.base = 0x0000000000000000LL;
272     core->segments.ldtr.system = 0;
273     core->segments.ldtr.type = 0x2;
274     core->segments.ldtr.dpl = 0x0;
275
276     core->segments.tr.selector = 0x0000;
277     core->segments.tr.limit = 0x0000ffff;
278     core->segments.tr.base = 0x0000000000000000LL;
279     core->segments.tr.system = 0;
280     core->segments.tr.type = 0x3;
281     core->segments.tr.dpl = 0x0;
282
283     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
284     core->dbg_regs.dr7 = 0x0000000000000400LL;
285
286
287     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
288     ctrl_area->instrs.IOIO_PROT = 1;
289             
290     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
291     ctrl_area->instrs.MSR_PROT = 1;   
292
293
294     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
295     ctrl_area->instrs.INTR = 1;
296     // The above also assures the TPR changes (CR8) are only virtual
297
298
299     // However, we need to see TPR writes since they will
300     // affect the virtual apic
301     // we reflect out cr8 to ctrl_regs->apic_tpr
302     ctrl_area->cr_reads.cr8 = 1;
303     ctrl_area->cr_writes.cr8 = 1;
304     // We will do all TPR comparisons in the virtual apic
305     // We also do not want the V_TPR to be able to mask the PIC
306     ctrl_area->guest_ctrl.V_IGN_TPR = 1;
307
308     
309
310     if (core->core_run_state == CORE_INVALID) { 
311         v3_hook_msr(core->vm_info, EFER_MSR, 
312                     &v3_handle_efer_read,
313                     &v3_svm_handle_efer_write, 
314                     core);
315     }
316
317     if (core->shdw_pg_mode == SHADOW_PAGING) {
318         
319         /* JRL: This is a performance killer, and a simplistic solution */
320         /* We need to fix this */
321         ctrl_area->TLB_CONTROL = 1;
322         ctrl_area->guest_ASID = 1;
323         
324
325         if (core->core_run_state == CORE_INVALID) { 
326             if (v3_init_passthrough_pts(core) == -1) {
327                 PrintError(core->vm_info, core, "Could not initialize passthrough page tables\n");
328                 return ;
329             }
330             // the shadow page tables are OK since we have not initialized hem yet
331         } else {
332             // CORE_RESETTING
333             // invalidation of shadow page tables happened earlier in this function
334         }
335
336         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
337         
338         core->ctrl_regs.cr0 |= 0x80000000;
339
340         v3_activate_passthrough_pt(core);
341
342         ctrl_area->cr_reads.cr0 = 1;
343         ctrl_area->cr_writes.cr0 = 1;
344         //intercept cr4 read so shadow pager can use PAE independently of guest
345         ctrl_area->cr_reads.cr4 = 1;
346         ctrl_area->cr_writes.cr4 = 1;
347         ctrl_area->cr_reads.cr3 = 1;
348         ctrl_area->cr_writes.cr3 = 1;
349
350
351         ctrl_area->instrs.INVLPG = 1;
352
353         ctrl_area->exceptions.pf = 1;
354
355         guest_state->g_pat = 0x7040600070406ULL;
356
357
358     } else if (core->shdw_pg_mode == NESTED_PAGING) {
359         // Flush the TLB on entries/exits
360         ctrl_area->TLB_CONTROL = 1;
361         ctrl_area->guest_ASID = 1;
362
363         // Enable Nested Paging
364         ctrl_area->NP_ENABLE = 1;
365
366         PrintDebug(core->vm_info, core, "NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
367
368         // Set the Nested Page Table pointer
369         if (core->core_run_state == CORE_INVALID) { 
370             if (v3_init_passthrough_pts(core) == -1) {
371                 PrintError(core->vm_info, core, "Could not initialize Nested page tables\n");
372                 return ;
373             }
374         } else {
375             // the existing nested page tables will work fine
376         }
377
378         ctrl_area->N_CR3 = core->direct_map_pt;
379
380         guest_state->g_pat = 0x7040600070406ULL;
381     }
382     
383     /* tell the guest that we don't support SVM */
384     if (core->core_run_state == CORE_INVALID) { 
385         v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
386                     &v3_handle_vm_cr_read,
387                     &v3_handle_vm_cr_write, 
388                     core);
389     }
390
391     if (core->core_run_state == CORE_INVALID) { 
392 #define INT_PENDING_AMD_MSR             0xc0010055
393
394         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
395         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
396         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
397         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
398         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
399
400         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
401         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
402         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
403
404
405         v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
406         v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
407
408         // Passthrough read operations are ok.
409         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
410     }
411
412
413 }
414
415
416 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
417
418     PrintDebug(core->vm_info, core, "Allocating VMCB\n");
419     core->vmm_data = (void *)Allocate_VMCB();
420     
421     if (core->vmm_data == NULL) {
422         PrintError(core->vm_info, core, "Could not allocate VMCB, Exiting...\n");
423         return -1;
424     }
425
426     if (vm_class == V3_PC_VM) {
427         PrintDebug(core->vm_info, core, "Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
428         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
429     } else {
430         PrintError(core->vm_info, core, "Invalid VM class\n");
431         return -1;
432     }
433
434     core->core_run_state = CORE_STOPPED;
435
436     return 0;
437 }
438
439
440 int v3_deinit_svm_vmcb(struct guest_info * core) {
441     if (core->vmm_data) { 
442         V3_FreePages(V3_PAddr(core->vmm_data), 1);
443     }
444     return 0;
445 }
446
447
448 static int svm_handle_standard_reset(struct guest_info *core)
449 {
450     if (core->core_run_state != CORE_RESETTING) { 
451         return 0;
452     }
453
454     PrintDebug(core->vm_info,core,"Handling standard reset (guest state before follows)\n");
455
456 #ifdef V3_CONFIG_DEBUG_SVM
457     v3_print_guest_state(core);
458 #endif
459
460     // wait until all resetting cores get here (ROS or whole VM)
461     v3_counting_barrier(&core->vm_info->reset_barrier);
462
463     // I could be a ROS core, or I could be in a non-HVM 
464     // either way, if I'm core 0, I'm the leader
465     if (core->vcpu_id==0) {
466         core->vm_info->run_state = VM_RESETTING;
467         // copy bioses again because some, 
468         // like seabios, assume
469         // this should also blow away the BDA and EBDA
470         PrintDebug(core->vm_info,core,"Clear memory (%p bytes)\n",(void*)core->vm_info->mem_size);
471         if (v3_set_gpa_memory(core, 0, core->vm_info->mem_size, 0)!=core->vm_info->mem_size) { 
472             PrintError(core->vm_info,core,"Clear of memory failed\n");
473         }
474         PrintDebug(core->vm_info,core,"Copying bioses\n");
475         if (v3_setup_bioses(core->vm_info, core->vm_info->cfg_data->cfg)) { 
476             PrintError(core->vm_info,core,"Setup of bioses failed\n");
477         }
478     }
479
480     Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
481
482     PrintDebug(core->vm_info,core,"InitVMCB done\n");
483
484     core->cpl = 0;
485     core->cpu_mode = REAL;
486     core->mem_mode = PHYSICAL_MEM;
487     core->num_exits=0;
488
489     PrintDebug(core->vm_info,core,"Machine reset to REAL/PHYSICAL\n");
490
491     memset(V3_VAddr((void*)(host_vmcbs[V3_Get_CPU()])),0,4096*4); // good measure...
492
493     // core zero will be restarted by the main execution loop
494     core->core_run_state = CORE_STOPPED;
495
496     if (core->vcpu_id==0) { 
497         core->vm_info->run_state = VM_RUNNING;
498     } 
499
500 #ifdef V3_CONFIG_DEBUG_SVM
501     PrintDebug(core->vm_info,core,"VMCB state at end of reset\n");
502     PrintDebugVMCB((vmcb_t*)(core->vmm_data));
503     PrintDebug(core->vm_info,core,"Guest state at end of reset\n");
504     v3_print_guest_state(core);
505 #endif
506
507     // wait until we are all ready to go
508     v3_counting_barrier(&core->vm_info->reset_barrier);
509
510     PrintDebug(core->vm_info,core,"Returning with request for recycle loop\n");
511
512     return 1; // reboot is occuring
513
514 }
515
516 #ifdef V3_CONFIG_CHECKPOINT
517 int v3_svm_save_core(struct guest_info * core, void * ctx){
518
519   vmcb_saved_state_t * guest_area = GET_VMCB_SAVE_STATE_AREA(core->vmm_data); 
520
521   // Special case saves of data we need immediate access to
522   // in some cases
523   V3_CHKPT_SAVE(ctx, "CPL", core->cpl, failout);
524   V3_CHKPT_SAVE(ctx,"STAR", guest_area->star, failout); 
525   V3_CHKPT_SAVE(ctx,"CSTAR", guest_area->cstar, failout); 
526   V3_CHKPT_SAVE(ctx,"LSTAR", guest_area->lstar, failout); 
527   V3_CHKPT_SAVE(ctx,"SFMASK", guest_area->sfmask, failout); 
528   V3_CHKPT_SAVE(ctx,"KERNELGSBASE", guest_area->KernelGsBase, failout); 
529   V3_CHKPT_SAVE(ctx,"SYSENTER_CS", guest_area->sysenter_cs, failout); 
530   V3_CHKPT_SAVE(ctx,"SYSENTER_ESP", guest_area->sysenter_esp, failout); 
531   V3_CHKPT_SAVE(ctx,"SYSENTER_EIP", guest_area->sysenter_eip, failout); 
532   
533 // and then we save the whole enchilada
534   if (v3_chkpt_save(ctx, "VMCB_DATA", PAGE_SIZE, core->vmm_data)) { 
535     PrintError(core->vm_info, core, "Could not save SVM vmcb\n");
536     goto failout;
537   }
538   
539   return 0;
540
541  failout:
542   PrintError(core->vm_info, core, "Failed to save SVM state for core\n");
543   return -1;
544
545 }
546
547 int v3_svm_load_core(struct guest_info * core, void * ctx){
548     
549
550   vmcb_saved_state_t * guest_area = GET_VMCB_SAVE_STATE_AREA(core->vmm_data); 
551
552   // Reload what we special cased, which we will overwrite in a minute
553   V3_CHKPT_LOAD(ctx, "CPL", core->cpl, failout);
554   V3_CHKPT_LOAD(ctx,"STAR", guest_area->star, failout); 
555   V3_CHKPT_LOAD(ctx,"CSTAR", guest_area->cstar, failout); 
556   V3_CHKPT_LOAD(ctx,"LSTAR", guest_area->lstar, failout); 
557   V3_CHKPT_LOAD(ctx,"SFMASK", guest_area->sfmask, failout); 
558   V3_CHKPT_LOAD(ctx,"KERNELGSBASE", guest_area->KernelGsBase, failout); 
559   V3_CHKPT_LOAD(ctx,"SYSENTER_CS", guest_area->sysenter_cs, failout); 
560   V3_CHKPT_LOAD(ctx,"SYSENTER_ESP", guest_area->sysenter_esp, failout); 
561   V3_CHKPT_LOAD(ctx,"SYSENTER_EIP", guest_area->sysenter_eip, failout); 
562   
563   // and then we load the whole enchilada
564   if (v3_chkpt_load(ctx, "VMCB_DATA", PAGE_SIZE, core->vmm_data)) { 
565     PrintError(core->vm_info, core, "Could not load SVM vmcb\n");
566     goto failout;
567   }
568   
569   return 0;
570
571  failout:
572   PrintError(core->vm_info, core, "Failed to save SVM state for core\n");
573   return -1;
574
575 }
576 #endif
577
578 static int update_irq_exit_state(struct guest_info * info) {
579     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
580
581     // Fix for QEMU bug using EVENTINJ as an internal cache
582     guest_ctrl->EVENTINJ.valid = 0;
583
584     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
585         
586 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
587         PrintDebug(info->vm_info, info, "INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
588 #endif
589
590         info->intr_core_state.irq_started = 1;
591         info->intr_core_state.irq_pending = 0;
592
593         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
594     }
595
596     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
597 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
598         PrintDebug(info->vm_info, info, "Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
599 #endif
600
601         // Interrupt was taken fully vectored
602         info->intr_core_state.irq_started = 0;
603
604     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
605 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
606         PrintDebug(info->vm_info, info, "EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
607 #endif
608     }
609
610     return 0;
611 }
612
613
614 static int update_irq_entry_state(struct guest_info * info) {
615     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
616
617
618     if (info->intr_core_state.irq_pending == 0) {
619         guest_ctrl->guest_ctrl.V_IRQ = 0;
620         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
621     }
622     
623     if (v3_excp_pending(info)) {
624         uint_t excp = v3_get_excp_number(info);
625         
626         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
627         
628         if (info->excp_state.excp_error_code_valid) {
629             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
630             guest_ctrl->EVENTINJ.ev = 1;
631 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
632             PrintDebug(info->vm_info, info, "Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
633 #endif
634         }
635         
636         guest_ctrl->EVENTINJ.vector = excp;
637         
638         guest_ctrl->EVENTINJ.valid = 1;
639
640 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
641         PrintDebug(info->vm_info, info, "<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
642                    (int)info->num_exits, 
643                    guest_ctrl->EVENTINJ.vector, 
644                    (void *)(addr_t)info->ctrl_regs.cr2,
645                    (void *)(addr_t)info->rip);
646 #endif
647
648         v3_injecting_excp(info, excp);
649     } else if (info->intr_core_state.irq_started == 1) {
650 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
651         PrintDebug(info->vm_info, info, "IRQ pending from previous injection\n");
652 #endif
653         guest_ctrl->guest_ctrl.V_IRQ = 1;
654         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
655
656         // We ignore the virtual TPR on this injection
657         // TPR/PPR tests have already been done in the APIC.
658         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
659         guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ;  // 0xf;
660
661     } else {
662         switch (v3_intr_pending(info)) {
663             case V3_EXTERNAL_IRQ: {
664                 int irq = v3_get_intr(info); 
665
666                 if (irq<0) {
667                   break;
668                 }
669
670                 guest_ctrl->guest_ctrl.V_IRQ = 1;
671                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
672
673                 // We ignore the virtual TPR on this injection
674                 // TPR/PPR tests have already been done in the APIC.
675                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
676                 guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ;  // 0xf;
677
678 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
679                 PrintDebug(info->vm_info, info, "Injecting Interrupt %d (EIP=%p)\n", 
680                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
681                            (void *)(addr_t)info->rip);
682 #endif
683
684                 info->intr_core_state.irq_pending = 1;
685                 info->intr_core_state.irq_vector = irq;
686
687                 break;
688                 
689             }
690             case V3_NMI:
691                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
692                 break;
693             case V3_SOFTWARE_INTR:
694                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
695 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
696                 PrintDebug(info->vm_info, info, "Injecting software interrupt --  type: %d, vector: %d\n", 
697                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
698 #endif
699                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
700                 guest_ctrl->EVENTINJ.valid = 1;
701             
702                 /* reset swintr state */
703                 info->intr_core_state.swintr_posted = 0;
704                 info->intr_core_state.swintr_vector = 0;
705                 break;
706             case V3_VIRTUAL_IRQ:
707                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
708                 break;
709
710             case V3_INVALID_INTR:
711             default:
712                 break;
713         }
714         
715     }
716
717     return 0;
718 }
719
720 int 
721 v3_svm_config_tsc_virtualization(struct guest_info * info) {
722     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
723
724
725     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
726         ctrl_area->instrs.RDTSC = 1;
727         ctrl_area->svm_instrs.RDTSCP = 1;
728     } else {
729         ctrl_area->instrs.RDTSC = 0;
730         ctrl_area->svm_instrs.RDTSCP = 0;
731
732         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
733                 ctrl_area->TSC_OFFSET = 0;
734         } else {
735                 ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
736         }
737     }
738     return 0;
739 }
740
741
742
743 /* 
744  * CAUTION and DANGER!!! 
745  * 
746  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
747  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
748  * on its contents will cause things to break. The contents at the time of the exit WILL 
749  * change before the exit handler is executed.
750  */
751 int v3_svm_enter(struct guest_info * info) {
752     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
753     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
754     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
755     uint64_t guest_cycles = 0;
756
757
758     // Conditionally yield the CPU if the timeslice has expired
759     v3_schedule(info);
760
761 #ifdef V3_CONFIG_MEM_TRACK
762     v3_mem_track_entry(info);
763 #endif 
764
765     // Update timer devices after being in the VM before doing 
766     // IRQ updates, so that any interrupts they raise get seen 
767     // immediately.
768
769     v3_advance_time(info, NULL);
770
771     v3_update_timers(info);
772
773
774     // disable global interrupts for vm state transition
775     v3_clgi();
776
777     // Synchronize the guest state to the VMCB
778     guest_state->cr0 = info->ctrl_regs.cr0;
779     guest_state->cr2 = info->ctrl_regs.cr2;
780     guest_state->cr3 = info->ctrl_regs.cr3;
781     guest_state->cr4 = info->ctrl_regs.cr4;
782     guest_state->dr6 = info->dbg_regs.dr6;
783     guest_state->dr7 = info->dbg_regs.dr7;
784
785     // CR8 is now updated by read/writes and it contains the APIC TPR
786     // the V_TPR should be just the class part of that.
787     // This update is here just for completeness.  We currently
788     // are ignoring V_TPR on all injections and doing the priority logivc
789     // in the APIC.
790     // guest_ctrl->guest_ctrl.V_TPR = ((info->ctrl_regs.apic_tpr) >> 4) & 0xf;
791
792     //guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
793     // 
794     
795     guest_state->rflags = info->ctrl_regs.rflags;
796
797     // LMA ,LME, SVE?
798
799     guest_state->efer = info->ctrl_regs.efer;
800     
801     /* Synchronize MSRs */
802     guest_state->star = info->msrs.star;
803     guest_state->lstar = info->msrs.lstar;
804     guest_state->sfmask = info->msrs.sfmask;
805     guest_state->KernelGsBase = info->msrs.kern_gs_base;
806
807     guest_state->cpl = info->cpl;
808
809     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
810
811     guest_state->rax = info->vm_regs.rax;
812     guest_state->rip = info->rip;
813     guest_state->rsp = info->vm_regs.rsp;
814
815     V3_FP_ENTRY_RESTORE(info);
816
817 #ifdef V3_CONFIG_SYMCALL
818     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
819         update_irq_entry_state(info);
820     }
821 #else 
822
823     update_irq_entry_state(info);
824 #endif
825
826 #ifdef V3_CONFIG_TM_FUNC
827     v3_tm_check_intr_state(info, guest_ctrl, guest_state);
828 #endif
829
830
831     /* ** */
832
833     /*
834       PrintDebug(info->vm_info, info, "SVM Entry to CS=%p  rip=%p...\n", 
835       (void *)(addr_t)info->segments.cs.base, 
836       (void *)(addr_t)info->rip);
837     */
838
839 #ifdef V3_CONFIG_SYMCALL
840     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
841         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
842             V3_Print(info->vm_info, info, "!!! Injecting Interrupt during Sym call !!!\n");
843         }
844     }
845 #endif
846
847     v3_svm_config_tsc_virtualization(info);
848
849     //V3_Print(info->vm_info, info, "Calling v3_svm_launch\n");
850     {   
851         uint64_t entry_tsc = 0;
852         uint64_t exit_tsc = 0;
853         
854 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
855         v3_pwrstat_telemetry_enter(info);
856 #endif
857
858 #ifdef V3_CONFIG_PMU_TELEMETRY
859         v3_pmu_telemetry_enter(info);
860 #endif
861
862
863         rdtscll(entry_tsc);
864
865         v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
866
867         rdtscll(exit_tsc);
868
869 #ifdef V3_CONFIG_PMU_TELEMETRY
870         v3_pmu_telemetry_exit(info);
871 #endif
872
873 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
874         v3_pwrstat_telemetry_exit(info);
875 #endif
876
877         guest_cycles = exit_tsc - entry_tsc;
878     }
879
880
881     //V3_Print(info->vm_info, info, "SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
882
883     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
884
885     v3_advance_time(info, &guest_cycles);
886
887     info->num_exits++;
888
889     V3_FP_EXIT_SAVE(info);
890
891     // Save Guest state from VMCB
892     info->rip = guest_state->rip;
893     info->vm_regs.rsp = guest_state->rsp;
894     info->vm_regs.rax = guest_state->rax;
895
896     info->cpl = guest_state->cpl;
897
898     info->ctrl_regs.cr0 = guest_state->cr0;
899     info->ctrl_regs.cr2 = guest_state->cr2;
900     info->ctrl_regs.cr3 = guest_state->cr3;
901     info->ctrl_regs.cr4 = guest_state->cr4;
902     info->dbg_regs.dr6 = guest_state->dr6;
903     info->dbg_regs.dr7 = guest_state->dr7;
904     //
905     // We do not track this anymore
906     // V_TPR is ignored and we do the logic in the APIC
907     //info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
908     //
909     info->ctrl_regs.rflags = guest_state->rflags;
910     info->ctrl_regs.efer = guest_state->efer;
911     
912     /* Synchronize MSRs */
913     info->msrs.star =  guest_state->star;
914     info->msrs.lstar = guest_state->lstar;
915     info->msrs.sfmask = guest_state->sfmask;
916     info->msrs.kern_gs_base = guest_state->KernelGsBase;
917
918     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
919     info->cpu_mode = v3_get_vm_cpu_mode(info);
920     info->mem_mode = v3_get_vm_mem_mode(info);
921     /* ** */
922
923     // save exit info here
924     exit_code = guest_ctrl->exit_code;
925     exit_info1 = guest_ctrl->exit_info1;
926     exit_info2 = guest_ctrl->exit_info2;
927
928 #ifdef V3_CONFIG_SYMCALL
929     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
930         update_irq_exit_state(info);
931     }
932 #else
933     update_irq_exit_state(info);
934 #endif
935
936     // reenable global interrupts after vm exit
937     v3_stgi();
938
939     // Conditionally yield the CPU if the timeslice has expired
940     v3_schedule(info);
941
942     // This update timers is for time-dependent handlers
943     // if we're slaved to host time
944     v3_advance_time(info, NULL);
945     v3_update_timers(info);
946
947
948     {
949         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
950         
951         if (ret != 0) {
952             PrintError(info->vm_info, info, "Error in SVM exit handler (ret=%d)\n", ret);
953             PrintError(info->vm_info, info, "  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
954
955             return -1;
956         }
957     }
958
959
960     if (info->timeouts.timeout_active) {
961         /* Check to see if any timeouts have expired */
962         v3_handle_timeouts(info, guest_cycles);
963     }
964
965 #ifdef V3_CONFIG_MEM_TRACK
966     v3_mem_track_exit(info);
967 #endif 
968
969
970     return 0;
971 }
972
973 int v3_start_svm_guest(struct guest_info * info) {
974
975     int started=0;
976
977     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
978     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
979
980     PrintDebug(info->vm_info, info, "Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
981
982
983 #ifdef V3_CONFIG_MULTIBOOT
984     if (v3_setup_multiboot_core_for_boot(info)) { 
985         PrintError(info->vm_info, info, "Failed to setup Multiboot core...\n");
986         return -1;
987     }
988 #endif
989
990 #ifdef V3_CONFIG_HVM
991     if (v3_setup_hvm_hrt_core_for_boot(info)) { 
992         PrintError(info->vm_info, info, "Failed to setup HRT core...\n");
993         return -1;
994     } 
995 #endif
996  
997     while (1) {
998
999         if (info->core_run_state == CORE_STOPPED) {
1000
1001             if (info->vcpu_id == 0) {
1002                 info->core_run_state = CORE_RUNNING;
1003             } else  { 
1004                 PrintDebug(info->vm_info, info, "SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
1005
1006                 V3_NO_WORK(info);
1007
1008                 // Compiler must not optimize away this read
1009                 while (*((volatile int *)(&info->core_run_state)) == CORE_STOPPED) {
1010                     
1011                     if (info->vm_info->run_state == VM_STOPPED) {
1012                         // The VM was stopped before this core was initialized. 
1013                         return 0;
1014                     }
1015                     
1016                     V3_STILL_NO_WORK(info);
1017
1018                     //PrintDebug(info->vm_info, info, "SVM core %u: still waiting for INIT\n", info->vcpu_id);
1019                 }
1020
1021                 V3_HAVE_WORK_AGAIN(info);
1022                 
1023                 PrintDebug(info->vm_info, info, "SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
1024                 
1025                 // We'll be paranoid about race conditions here
1026                 v3_wait_at_barrier(info);
1027             } 
1028         }
1029
1030         if (!started) {
1031
1032             started=1;
1033             
1034             PrintDebug(info->vm_info, info, "SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
1035                        info->vcpu_id, info->pcpu_id, 
1036                        info->segments.cs.selector, (void *)(info->segments.cs.base), 
1037                        info->segments.cs.limit, (void *)(info->rip));
1038             
1039             
1040             
1041             PrintDebug(info->vm_info, info, "SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
1042                        info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
1043
1044 #ifdef V3_CONFIG_DEBUG_SVM
1045             PrintDebugVMCB((vmcb_t*)(info->vmm_data));
1046 #endif
1047             
1048             v3_start_time(info);
1049         }
1050         
1051         if (info->vm_info->run_state == VM_STOPPED) {
1052             info->core_run_state = CORE_STOPPED;
1053             break;
1054         }
1055         
1056         
1057 #ifdef V3_CONFIG_HVM
1058         if (v3_handle_hvm_reset(info) > 0) { 
1059             continue;
1060         }
1061 #endif
1062        
1063 #ifdef V3_CONFIG_MULTIBOOT
1064         if (v3_handle_multiboot_reset(info) > 0) {
1065             continue;
1066         }
1067 #endif
1068         
1069         if (svm_handle_standard_reset(info) > 0 ) {
1070             continue;
1071         }
1072         
1073
1074
1075 #ifdef V3_CONFIG_PMU_TELEMETRY
1076         v3_pmu_telemetry_start(info);
1077 #endif
1078         
1079 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1080         v3_pwrstat_telemetry_start(info);
1081 #endif
1082         
1083         if (v3_svm_enter(info) == -1 ) {
1084             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
1085             addr_t host_addr;
1086             addr_t linear_addr = 0;
1087             
1088             info->vm_info->run_state = VM_ERROR;
1089             
1090             V3_Print(info->vm_info, info, "SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
1091             
1092             v3_print_guest_state(info);
1093             
1094             V3_Print(info->vm_info, info, "SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
1095             
1096             V3_Print(info->vm_info, info, "SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
1097             V3_Print(info->vm_info, info, "SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
1098             
1099             V3_Print(info->vm_info, info, "SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
1100             V3_Print(info->vm_info, info, "SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
1101             
1102             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1103             
1104             if (info->mem_mode == PHYSICAL_MEM) {
1105                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1106             } else if (info->mem_mode == VIRTUAL_MEM) {
1107                 v3_gva_to_hva(info, linear_addr, &host_addr);
1108             }
1109             
1110             V3_Print(info->vm_info, info, "SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1111             
1112             V3_Print(info->vm_info, info, "SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1113             v3_dump_mem((uint8_t *)host_addr, 15);
1114             
1115             v3_print_stack(info);
1116             
1117             break;
1118         }
1119         
1120         v3_wait_at_barrier(info);
1121         
1122
1123         if (info->vm_info->run_state == VM_STOPPED) {
1124             info->core_run_state = CORE_STOPPED;
1125             break;
1126         }
1127
1128         
1129
1130 /*
1131         if ((info->num_exits % 50000) == 0) {
1132             V3_Print(info->vm_info, info, "SVM Exit number %d\n", (uint32_t)info->num_exits);
1133             v3_print_guest_state(info);
1134         }
1135 */
1136         
1137     }
1138
1139 #ifdef V3_CONFIG_PMU_TELEMETRY
1140     v3_pmu_telemetry_end(info);
1141 #endif
1142
1143 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1144     v3_pwrstat_telemetry_end(info);
1145 #endif
1146     // Need to take down the other cores on error... 
1147
1148     return 0;
1149 }
1150
1151
1152
1153
1154 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
1155     // init vmcb_bios
1156
1157     // Write the RIP, CS, and descriptor
1158     // assume the rest is already good to go
1159     //
1160     // vector VV -> rip at 0
1161     //              CS = VV00
1162     //  This means we start executing at linear address VV000
1163     //
1164     // So the selector needs to be VV00
1165     // and the base needs to be VV000
1166     //
1167     core->rip = 0;
1168     core->segments.cs.selector = rip << 8;
1169     core->segments.cs.limit = 0xffff;
1170     core->segments.cs.base = rip << 12;
1171
1172     return 0;
1173 }
1174
1175
1176
1177
1178
1179
1180 /* Checks machine SVM capability */
1181 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
1182 int v3_is_svm_capable() {
1183     uint_t vm_cr_low = 0, vm_cr_high = 0;
1184     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1185
1186     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1187   
1188     PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
1189
1190     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
1191       V3_Print(VM_NONE, VCORE_NONE,  "SVM Not Available\n");
1192       return 0;
1193     }  else {
1194         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
1195         
1196         PrintDebug(VM_NONE, VCORE_NONE, "SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
1197         
1198         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
1199             V3_Print(VM_NONE, VCORE_NONE, "SVM is available but is disabled.\n");
1200             
1201             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1202             
1203             PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
1204             
1205             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
1206                 V3_Print(VM_NONE, VCORE_NONE,  "SVM BIOS Disabled, not unlockable\n");
1207             } else {
1208                 V3_Print(VM_NONE, VCORE_NONE,  "SVM is locked with a key\n");
1209             }
1210             return 0;
1211
1212         } else {
1213             V3_Print(VM_NONE, VCORE_NONE,  "SVM is available and  enabled.\n");
1214
1215             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1216             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
1217             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
1218             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
1219             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
1220
1221             return 1;
1222         }
1223     }
1224 }
1225
1226 static int has_svm_nested_paging() {
1227     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1228     
1229     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1230     
1231     //PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
1232     
1233     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
1234         V3_Print(VM_NONE, VCORE_NONE, "SVM Nested Paging not supported\n");
1235         return 0;
1236     } else {
1237         V3_Print(VM_NONE, VCORE_NONE, "SVM Nested Paging supported\n");
1238         return 1;
1239     }
1240  }
1241  
1242
1243
1244 void v3_init_svm_cpu(int cpu_id) {
1245     reg_ex_t msr;
1246     extern v3_cpu_arch_t v3_cpu_types[];
1247
1248     // Enable SVM on the CPU
1249     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
1250     msr.e_reg.low |= EFER_MSR_svm_enable;
1251     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
1252
1253     V3_Print(VM_NONE, VCORE_NONE,  "SVM Enabled\n");
1254
1255     // Setup the host state save area
1256     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4); // need not be shadow-safe, not exposed to guest
1257
1258     if (!host_vmcbs[cpu_id]) {
1259         PrintError(VM_NONE, VCORE_NONE,  "Failed to allocate VMCB\n");
1260         return;
1261     }
1262
1263     /* 64-BIT-ISSUE */
1264     //  msr.e_reg.high = 0;
1265     //msr.e_reg.low = (uint_t)host_vmcb;
1266     msr.r_reg = host_vmcbs[cpu_id];
1267
1268     PrintDebug(VM_NONE, VCORE_NONE,  "Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
1269     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
1270
1271
1272     if (has_svm_nested_paging() == 1) {
1273         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
1274     } else {
1275         v3_cpu_types[cpu_id] = V3_SVM_CPU;
1276     }
1277 }
1278
1279
1280
1281 void v3_deinit_svm_cpu(int cpu_id) {
1282     reg_ex_t msr;
1283     extern v3_cpu_arch_t v3_cpu_types[];
1284
1285     // reset SVM_VM_HSAVE_PA_MSR
1286     // Does setting it to NULL disable??
1287     msr.r_reg = 0;
1288     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
1289
1290     // Disable SVM?
1291     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
1292     msr.e_reg.low &= ~EFER_MSR_svm_enable;
1293     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
1294
1295     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1296
1297     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
1298
1299     V3_Print(VM_NONE, VCORE_NONE,  "Host CPU %d host area freed, and SVM disabled\n", cpu_id);
1300     return;
1301 }
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352 #if 0
1353 /* 
1354  * Test VMSAVE/VMLOAD Latency 
1355  */
1356 #define vmsave ".byte 0x0F,0x01,0xDB ; "
1357 #define vmload ".byte 0x0F,0x01,0xDA ; "
1358 {
1359     uint32_t start_lo, start_hi;
1360     uint32_t end_lo, end_hi;
1361     uint64_t start, end;
1362     
1363     __asm__ __volatile__ (
1364                           "rdtsc ; "
1365                           "movl %%eax, %%esi ; "
1366                           "movl %%edx, %%edi ; "
1367                           "movq  %%rcx, %%rax ; "
1368                           vmsave
1369                           "rdtsc ; "
1370                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1371                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1372                           );
1373     
1374     start = start_hi;
1375     start <<= 32;
1376     start += start_lo;
1377     
1378     end = end_hi;
1379     end <<= 32;
1380     end += end_lo;
1381     
1382     PrintDebug(core->vm_info, core, "VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1383     
1384     __asm__ __volatile__ (
1385                           "rdtsc ; "
1386                           "movl %%eax, %%esi ; "
1387                           "movl %%edx, %%edi ; "
1388                           "movq  %%rcx, %%rax ; "
1389                           vmload
1390                           "rdtsc ; "
1391                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1392                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1393                               );
1394         
1395         start = start_hi;
1396         start <<= 32;
1397         start += start_lo;
1398
1399         end = end_hi;
1400         end <<= 32;
1401         end += end_lo;
1402
1403
1404         PrintDebug(core->vm_info, core, "VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1405     }
1406     /* End Latency Test */
1407
1408 #endif
1409
1410
1411
1412
1413
1414
1415