Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


SVM reset capability + integration with multiboot+hvm reset capability
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21
22 #include <palacios/svm.h>
23 #include <palacios/vmm.h>
24
25 #include <palacios/vmcb.h>
26 #include <palacios/vmm_mem.h>
27 #include <palacios/vmm_paging.h>
28 #include <palacios/svm_handler.h>
29
30 #include <palacios/vmm_debug.h>
31 #include <palacios/vm_guest_mem.h>
32
33 #include <palacios/vmm_decoder.h>
34 #include <palacios/vmm_string.h>
35 #include <palacios/vmm_lowlevel.h>
36 #include <palacios/svm_msr.h>
37
38 #include <palacios/vmm_rbtree.h>
39 #include <palacios/vmm_barrier.h>
40 #include <palacios/vmm_debug.h>
41
42 #include <palacios/vmm_perftune.h>
43
44 #include <palacios/vmm_bios.h>
45
46
47 #ifdef V3_CONFIG_CHECKPOINT
48 #include <palacios/vmm_checkpoint.h>
49 #endif
50
51 #include <palacios/vmm_direct_paging.h>
52
53 #include <palacios/vmm_ctrl_regs.h>
54 #include <palacios/svm_io.h>
55
56 #include <palacios/vmm_sprintf.h>
57
58 #ifdef V3_CONFIG_MEM_TRACK
59 #include <palacios/vmm_mem_track.h>
60 #endif 
61
62 #ifdef V3_CONFIG_TM_FUNC
63 #include <extensions/trans_mem.h>
64 #endif
65
66 #ifndef V3_CONFIG_DEBUG_SVM
67 #undef PrintDebug
68 #define PrintDebug(fmt, args...)
69 #endif
70
71
72
73 uint32_t v3_last_exit;
74
75 // This is a global pointer to the host's VMCB
76 // These are physical addresses
77 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
78
79
80
81 extern void v3_stgi();
82 extern void v3_clgi();
83 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
84 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
85
86
87
88 static vmcb_t * Allocate_VMCB() {
89     vmcb_t * vmcb_page = NULL;
90     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);   // need not be shadow safe, not exposed to guest
91
92     if ((void *)vmcb_pa == NULL) {
93       PrintError(VM_NONE, VCORE_NONE, "Error allocating VMCB\n");
94         return NULL;
95     }
96
97     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
98
99     memset(vmcb_page, 0, 4096);
100
101     return vmcb_page;
102 }
103
104
105 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
106 {
107     int status;
108
109     // Call arch-independent handler
110     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
111         return status;
112     }
113
114     // SVM-specific code
115     {
116         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
117         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
118         hw_efer->svme = 1;
119     }
120
121     return 0;
122 }
123
124 /*
125  * This is invoked both on an initial boot and on a reset
126  * 
127  * The difference is that on a reset we will not rehook anything
128  *
129  */
130
131 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
132     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
133     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
134     uint_t i;
135
136     if (core->core_run_state!=CORE_INVALID && core->core_run_state!=CORE_RESETTING) { 
137         PrintError(core->vm_info, core, "Atempt to Init_VMCB_BIOS in invalid state (%d)\n",core->core_run_state);
138         return;
139     }
140
141     // need to invalidate any shadow page tables early
142     if (core->shdw_pg_mode == SHADOW_PAGING && core->core_run_state==CORE_RESETTING) {
143         if (v3_get_vm_cpu_mode(core) != REAL) {
144             if (v3_invalidate_shadow_pts(core) == -1) {
145                 PrintError(core->vm_info,core,"Could not invalidate shadow page tables\n");
146                 return;
147             }
148         }
149     }
150
151     // Guarantee we are starting from a clean slate
152     // even on a reset
153     memset(vmcb,0,4096);
154
155     ctrl_area->svm_instrs.VMRUN = 1;
156     ctrl_area->svm_instrs.VMMCALL = 1;
157     ctrl_area->svm_instrs.VMLOAD = 1;
158     ctrl_area->svm_instrs.VMSAVE = 1;
159     ctrl_area->svm_instrs.STGI = 1;
160     ctrl_area->svm_instrs.CLGI = 1;
161     ctrl_area->svm_instrs.SKINIT = 1; // secure startup... why
162     ctrl_area->svm_instrs.ICEBP = 1;  // in circuit emulator breakpoint
163     ctrl_area->svm_instrs.WBINVD = 1; // write back and invalidate caches... why?
164     ctrl_area->svm_instrs.MONITOR = 1;
165     ctrl_area->svm_instrs.MWAIT_always = 1;
166     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
167     ctrl_area->instrs.INVLPGA = 1;   // invalidate page in asid... why?
168     ctrl_area->instrs.CPUID = 1;
169
170     ctrl_area->instrs.HLT = 1;
171
172     /* Set at VMM launch as needed */
173     ctrl_area->instrs.RDTSC = 0;
174     ctrl_area->svm_instrs.RDTSCP = 0;
175
176
177 #ifdef V3_CONFIG_TM_FUNC
178     v3_tm_set_excp_intercepts(ctrl_area);
179 #endif
180     
181
182     ctrl_area->instrs.NMI = 1;
183     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
184     ctrl_area->instrs.INIT = 1;
185     //    ctrl_area->instrs.PAUSE = 1;    // do not care as does not halt
186     ctrl_area->instrs.shutdown_evts = 1;
187
188
189     /* DEBUG FOR RETURN CODE */
190     ctrl_area->exit_code = 1;
191
192
193     /* Setup Guest Machine state */
194
195     memset(&core->vm_regs,0,sizeof(core->vm_regs));
196     memset(&core->ctrl_regs,0,sizeof(core->ctrl_regs));
197     memset(&core->dbg_regs,0,sizeof(core->dbg_regs));
198     memset(&core->segments,0,sizeof(core->segments));    
199     memset(&core->msrs,0,sizeof(core->msrs));    
200     memset(&core->fp_state,0,sizeof(core->fp_state));    
201
202     // reset interrupts
203     core->intr_core_state.irq_pending=0; 
204     core->intr_core_state.irq_started=0; 
205     core->intr_core_state.swintr_posted=0; 
206
207     // reset exceptions
208     core->excp_state.excp_pending=0;
209
210     // reset of gprs to expected values at init
211     core->vm_regs.rsp = 0x00;
212     core->rip = 0xfff0;
213     core->vm_regs.rdx = 0x00000f00;  // family/stepping/etc
214
215     
216     core->cpl = 0;
217
218     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
219
220     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
221     core->shdw_pg_state.guest_cr0 = core->ctrl_regs.cr0;
222
223     // cr3 zeroed above
224     core->shdw_pg_state.guest_cr3 = core->ctrl_regs.cr3;
225     // cr4 zeroed above
226     core->shdw_pg_state.guest_cr4 = core->ctrl_regs.cr4;
227
228     core->ctrl_regs.efer |= EFER_MSR_svm_enable ;
229     core->shdw_pg_state.guest_efer.value = core->ctrl_regs.efer;
230
231     core->segments.cs.selector = 0xf000;
232     core->segments.cs.limit = 0xffff;
233     core->segments.cs.base = 0x0000f0000LL;
234
235     // (raw attributes = 0xf3)
236     core->segments.cs.type = 0xa;
237     core->segments.cs.system = 0x1;
238     core->segments.cs.dpl = 0x0;
239     core->segments.cs.present = 1;
240
241
242
243     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
244                                       &(core->segments.es), &(core->segments.fs), 
245                                       &(core->segments.gs), NULL};
246
247     for ( i = 0; segregs[i] != NULL; i++) {
248         struct v3_segment * seg = segregs[i];
249         
250         seg->selector = 0x0000;
251         //    seg->base = seg->selector << 4;
252         seg->base = 0x00000000;
253         seg->limit = 0xffff;
254
255         // (raw attributes = 0xf3)
256         seg->type = 0x2;
257         seg->system = 0x1;
258         seg->dpl = 0x0;
259         seg->present = 1;
260     }
261
262     core->segments.gdtr.selector = 0x0000;
263     core->segments.gdtr.limit = 0x0000ffff;
264     core->segments.gdtr.base = 0x0000000000000000LL;
265     core->segments.gdtr.dpl = 0x0;
266
267     core->segments.idtr.selector = 0x0000; 
268     core->segments.idtr.limit = 0x0000ffff;
269     core->segments.idtr.base = 0x0000000000000000LL;
270     core->segments.ldtr.limit = 0x0000ffff;
271     core->segments.ldtr.base = 0x0000000000000000LL;
272     core->segments.ldtr.system = 0;
273     core->segments.ldtr.type = 0x2;
274     core->segments.ldtr.dpl = 0x0;
275
276     core->segments.tr.selector = 0x0000;
277     core->segments.tr.limit = 0x0000ffff;
278     core->segments.tr.base = 0x0000000000000000LL;
279     core->segments.tr.system = 0;
280     core->segments.tr.type = 0x3;
281     core->segments.tr.dpl = 0x0;
282
283     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
284     core->dbg_regs.dr7 = 0x0000000000000400LL;
285
286
287     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
288     ctrl_area->instrs.IOIO_PROT = 1;
289             
290     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
291     ctrl_area->instrs.MSR_PROT = 1;   
292
293
294     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
295     ctrl_area->instrs.INTR = 1;
296     // The above also assures the TPR changes (CR8) are only virtual
297
298
299     // However, we need to see TPR writes since they will
300     // affect the virtual apic
301     // we reflect out cr8 to ctrl_regs->apic_tpr
302     ctrl_area->cr_reads.cr8 = 1;
303     ctrl_area->cr_writes.cr8 = 1;
304     // We will do all TPR comparisons in the virtual apic
305     // We also do not want the V_TPR to be able to mask the PIC
306     ctrl_area->guest_ctrl.V_IGN_TPR = 1;
307
308     
309
310     if (core->core_run_state == CORE_INVALID) { 
311         v3_hook_msr(core->vm_info, EFER_MSR, 
312                     &v3_handle_efer_read,
313                     &v3_svm_handle_efer_write, 
314                     core);
315     }
316
317     if (core->shdw_pg_mode == SHADOW_PAGING) {
318         
319         /* JRL: This is a performance killer, and a simplistic solution */
320         /* We need to fix this */
321         ctrl_area->TLB_CONTROL = 1;
322         ctrl_area->guest_ASID = 1;
323         
324
325         if (core->core_run_state == CORE_INVALID) { 
326             if (v3_init_passthrough_pts(core) == -1) {
327                 PrintError(core->vm_info, core, "Could not initialize passthrough page tables\n");
328                 return ;
329             }
330             // the shadow page tables are OK since we have not initialized hem yet
331         } else {
332             // CORE_RESETTING
333             // invalidation of shadow page tables happened earlier in this function
334         }
335
336         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
337         
338         core->ctrl_regs.cr0 |= 0x80000000;
339
340         v3_activate_passthrough_pt(core);
341
342         ctrl_area->cr_reads.cr0 = 1;
343         ctrl_area->cr_writes.cr0 = 1;
344         //intercept cr4 read so shadow pager can use PAE independently of guest
345         ctrl_area->cr_reads.cr4 = 1;
346         ctrl_area->cr_writes.cr4 = 1;
347         ctrl_area->cr_reads.cr3 = 1;
348         ctrl_area->cr_writes.cr3 = 1;
349
350
351         ctrl_area->instrs.INVLPG = 1;
352
353         ctrl_area->exceptions.pf = 1;
354
355         guest_state->g_pat = 0x7040600070406ULL;
356
357
358     } else if (core->shdw_pg_mode == NESTED_PAGING) {
359         // Flush the TLB on entries/exits
360         ctrl_area->TLB_CONTROL = 1;
361         ctrl_area->guest_ASID = 1;
362
363         // Enable Nested Paging
364         ctrl_area->NP_ENABLE = 1;
365
366         PrintDebug(core->vm_info, core, "NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
367
368         // Set the Nested Page Table pointer
369         if (core->core_run_state == CORE_INVALID) { 
370             if (v3_init_passthrough_pts(core) == -1) {
371                 PrintError(core->vm_info, core, "Could not initialize Nested page tables\n");
372                 return ;
373             }
374         } else {
375             // the existing nested page tables will work fine
376         }
377
378         ctrl_area->N_CR3 = core->direct_map_pt;
379
380         guest_state->g_pat = 0x7040600070406ULL;
381     }
382     
383     /* tell the guest that we don't support SVM */
384     if (core->core_run_state == CORE_INVALID) { 
385         v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
386                     &v3_handle_vm_cr_read,
387                     &v3_handle_vm_cr_write, 
388                     core);
389     }
390
391     if (core->core_run_state == CORE_INVALID) { 
392 #define INT_PENDING_AMD_MSR             0xc0010055
393
394         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
395         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
396         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
397         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
398         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
399
400         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
401         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
402         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
403
404
405         v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
406         v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
407
408         // Passthrough read operations are ok.
409         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
410     }
411
412
413 }
414
415
416 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
417
418     PrintDebug(core->vm_info, core, "Allocating VMCB\n");
419     core->vmm_data = (void *)Allocate_VMCB();
420     
421     if (core->vmm_data == NULL) {
422         PrintError(core->vm_info, core, "Could not allocate VMCB, Exiting...\n");
423         return -1;
424     }
425
426     if (vm_class == V3_PC_VM) {
427         PrintDebug(core->vm_info, core, "Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
428         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
429     } else {
430         PrintError(core->vm_info, core, "Invalid VM class\n");
431         return -1;
432     }
433
434     core->core_run_state = CORE_STOPPED;
435
436     return 0;
437 }
438
439
440 int v3_deinit_svm_vmcb(struct guest_info * core) {
441     if (core->vmm_data) { 
442         V3_FreePages(V3_PAddr(core->vmm_data), 1);
443     }
444     return 0;
445 }
446
447
448 static int svm_handle_standard_reset(struct guest_info *core)
449 {
450     if (core->core_run_state != CORE_RESETTING) { 
451         return 0;
452     }
453
454     PrintDebug(core->vm_info,core,"Handling standard reset (guest state before follows)\n");
455
456 #ifdef V3_CONFIG_DEBUG_SVM
457     v3_print_guest_state(core);
458 #endif
459
460     // wait until all resetting cores get here (ROS or whole VM)
461     v3_counting_barrier(&core->vm_info->reset_barrier);
462
463     // I could be a ROS core, or I could be in a non-HVM 
464     // either way, if I'm core 0, I'm the leader
465     if (core->vcpu_id==0) {
466         core->vm_info->run_state = VM_RESETTING;
467         // copy bioses again because some, 
468         // like seabios, assume
469         // this should also blow away the BDA and EBDA
470         PrintDebug(core->vm_info,core,"Clear memory (%p bytes)\n",(void*)core->vm_info->mem_size);
471         if (v3_set_gpa_memory(core, 0, core->vm_info->mem_size, 0)!=core->vm_info->mem_size) { 
472             PrintError(core->vm_info,core,"Clear of memory failed\n");
473         }
474         PrintDebug(core->vm_info,core,"Copying bioses\n");
475         if (v3_setup_bioses(core->vm_info, core->vm_info->cfg_data->cfg)) { 
476             PrintError(core->vm_info,core,"Setup of bioses failed\n");
477         }
478     }
479
480     Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
481
482     PrintDebug(core->vm_info,core,"InitVMCB done\n");
483
484     core->cpl = 0;
485     core->cpu_mode = REAL;
486     core->mem_mode = PHYSICAL_MEM;
487     core->num_exits=0;
488
489     PrintDebug(core->vm_info,core,"Machine reset to REAL/PHYSICAL\n");
490
491     memset(V3_VAddr((void*)(host_vmcbs[V3_Get_CPU()])),0,4096*4); // good measure...
492
493     // core zero will be restarted by the main execution loop
494     core->core_run_state = CORE_STOPPED;
495
496     if (core->vcpu_id==0) { 
497         core->vm_info->run_state = VM_RUNNING;
498     } 
499
500 #ifdef V3_CONFIG_DEBUG_SVM
501     PrintDebug(core->vm_info,core,"VMCB state at end of reset\n");
502     PrintDebugVMCB((vmcb_t*)(core->vmm_data));
503     PrintDebug(core->vm_info,core,"Guest state at end of reset\n");
504     v3_print_guest_state(core);
505 #endif
506
507     // wait until we are all ready to go
508     v3_counting_barrier(&core->vm_info->reset_barrier);
509
510     PrintDebug(core->vm_info,core,"Returning with request for recycle loop\n");
511
512     return 1; // reboot is occuring
513
514 }
515
516 #ifdef V3_CONFIG_CHECKPOINT
517 int v3_svm_save_core(struct guest_info * core, void * ctx){
518
519   vmcb_saved_state_t * guest_area = GET_VMCB_SAVE_STATE_AREA(core->vmm_data); 
520
521   // Special case saves of data we need immediate access to
522   // in some cases
523   V3_CHKPT_SAVE(ctx, "CPL", core->cpl, failout);
524   V3_CHKPT_SAVE(ctx,"STAR", guest_area->star, failout); 
525   V3_CHKPT_SAVE(ctx,"CSTAR", guest_area->cstar, failout); 
526   V3_CHKPT_SAVE(ctx,"LSTAR", guest_area->lstar, failout); 
527   V3_CHKPT_SAVE(ctx,"SFMASK", guest_area->sfmask, failout); 
528   V3_CHKPT_SAVE(ctx,"KERNELGSBASE", guest_area->KernelGsBase, failout); 
529   V3_CHKPT_SAVE(ctx,"SYSENTER_CS", guest_area->sysenter_cs, failout); 
530   V3_CHKPT_SAVE(ctx,"SYSENTER_ESP", guest_area->sysenter_esp, failout); 
531   V3_CHKPT_SAVE(ctx,"SYSENTER_EIP", guest_area->sysenter_eip, failout); 
532   
533 // and then we save the whole enchilada
534   if (v3_chkpt_save(ctx, "VMCB_DATA", PAGE_SIZE, core->vmm_data)) { 
535     PrintError(core->vm_info, core, "Could not save SVM vmcb\n");
536     goto failout;
537   }
538   
539   return 0;
540
541  failout:
542   PrintError(core->vm_info, core, "Failed to save SVM state for core\n");
543   return -1;
544
545 }
546
547 int v3_svm_load_core(struct guest_info * core, void * ctx){
548     
549
550   vmcb_saved_state_t * guest_area = GET_VMCB_SAVE_STATE_AREA(core->vmm_data); 
551
552   // Reload what we special cased, which we will overwrite in a minute
553   V3_CHKPT_LOAD(ctx, "CPL", core->cpl, failout);
554   V3_CHKPT_LOAD(ctx,"STAR", guest_area->star, failout); 
555   V3_CHKPT_LOAD(ctx,"CSTAR", guest_area->cstar, failout); 
556   V3_CHKPT_LOAD(ctx,"LSTAR", guest_area->lstar, failout); 
557   V3_CHKPT_LOAD(ctx,"SFMASK", guest_area->sfmask, failout); 
558   V3_CHKPT_LOAD(ctx,"KERNELGSBASE", guest_area->KernelGsBase, failout); 
559   V3_CHKPT_LOAD(ctx,"SYSENTER_CS", guest_area->sysenter_cs, failout); 
560   V3_CHKPT_LOAD(ctx,"SYSENTER_ESP", guest_area->sysenter_esp, failout); 
561   V3_CHKPT_LOAD(ctx,"SYSENTER_EIP", guest_area->sysenter_eip, failout); 
562   
563   // and then we load the whole enchilada
564   if (v3_chkpt_load(ctx, "VMCB_DATA", PAGE_SIZE, core->vmm_data)) { 
565     PrintError(core->vm_info, core, "Could not load SVM vmcb\n");
566     goto failout;
567   }
568   
569   return 0;
570
571  failout:
572   PrintError(core->vm_info, core, "Failed to save SVM state for core\n");
573   return -1;
574
575 }
576 #endif
577
578 static int update_irq_exit_state(struct guest_info * info) {
579     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
580
581     // Fix for QEMU bug using EVENTINJ as an internal cache
582     guest_ctrl->EVENTINJ.valid = 0;
583
584     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
585         
586 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
587         PrintDebug(info->vm_info, info, "INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
588 #endif
589
590         info->intr_core_state.irq_started = 1;
591         info->intr_core_state.irq_pending = 0;
592
593         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
594     }
595
596     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
597 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
598         PrintDebug(info->vm_info, info, "Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
599 #endif
600
601         // Interrupt was taken fully vectored
602         info->intr_core_state.irq_started = 0;
603
604     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
605 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
606         PrintDebug(info->vm_info, info, "EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
607 #endif
608     }
609
610     return 0;
611 }
612
613
614 static int update_irq_entry_state(struct guest_info * info) {
615     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
616
617
618     if (info->intr_core_state.irq_pending == 0) {
619         guest_ctrl->guest_ctrl.V_IRQ = 0;
620         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
621     }
622     
623     if (v3_excp_pending(info)) {
624         uint_t excp = v3_get_excp_number(info);
625         
626         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
627         
628         if (info->excp_state.excp_error_code_valid) {
629             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
630             guest_ctrl->EVENTINJ.ev = 1;
631 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
632             PrintDebug(info->vm_info, info, "Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
633 #endif
634         }
635         
636         guest_ctrl->EVENTINJ.vector = excp;
637         
638         guest_ctrl->EVENTINJ.valid = 1;
639
640 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
641         PrintDebug(info->vm_info, info, "<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
642                    (int)info->num_exits, 
643                    guest_ctrl->EVENTINJ.vector, 
644                    (void *)(addr_t)info->ctrl_regs.cr2,
645                    (void *)(addr_t)info->rip);
646 #endif
647
648         v3_injecting_excp(info, excp);
649     } else if (info->intr_core_state.irq_started == 1) {
650 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
651         PrintDebug(info->vm_info, info, "IRQ pending from previous injection\n");
652 #endif
653         guest_ctrl->guest_ctrl.V_IRQ = 1;
654         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
655
656         // We ignore the virtual TPR on this injection
657         // TPR/PPR tests have already been done in the APIC.
658         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
659         guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ;  // 0xf;
660
661     } else {
662         switch (v3_intr_pending(info)) {
663             case V3_EXTERNAL_IRQ: {
664                 int irq = v3_get_intr(info); 
665
666                 if (irq<0) {
667                   break;
668                 }
669
670                 guest_ctrl->guest_ctrl.V_IRQ = 1;
671                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
672
673                 // We ignore the virtual TPR on this injection
674                 // TPR/PPR tests have already been done in the APIC.
675                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
676                 guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ;  // 0xf;
677
678 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
679                 PrintDebug(info->vm_info, info, "Injecting Interrupt %d (EIP=%p)\n", 
680                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
681                            (void *)(addr_t)info->rip);
682 #endif
683
684                 info->intr_core_state.irq_pending = 1;
685                 info->intr_core_state.irq_vector = irq;
686                 
687             }
688             case V3_NMI:
689                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
690                 break;
691             case V3_SOFTWARE_INTR:
692                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
693 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
694                 PrintDebug(info->vm_info, info, "Injecting software interrupt --  type: %d, vector: %d\n", 
695                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
696 #endif
697                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
698                 guest_ctrl->EVENTINJ.valid = 1;
699             
700                 /* reset swintr state */
701                 info->intr_core_state.swintr_posted = 0;
702                 info->intr_core_state.swintr_vector = 0;
703                 break;
704             case V3_VIRTUAL_IRQ:
705                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
706                 break;
707
708             case V3_INVALID_INTR:
709             default:
710                 break;
711         }
712         
713     }
714
715     return 0;
716 }
717
718 int 
719 v3_svm_config_tsc_virtualization(struct guest_info * info) {
720     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
721
722
723     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
724         ctrl_area->instrs.RDTSC = 1;
725         ctrl_area->svm_instrs.RDTSCP = 1;
726     } else {
727         ctrl_area->instrs.RDTSC = 0;
728         ctrl_area->svm_instrs.RDTSCP = 0;
729
730         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
731                 ctrl_area->TSC_OFFSET = 0;
732         } else {
733                 ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
734         }
735     }
736     return 0;
737 }
738
739
740
741 /* 
742  * CAUTION and DANGER!!! 
743  * 
744  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
745  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
746  * on its contents will cause things to break. The contents at the time of the exit WILL 
747  * change before the exit handler is executed.
748  */
749 int v3_svm_enter(struct guest_info * info) {
750     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
751     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
752     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
753     uint64_t guest_cycles = 0;
754     struct Interrupt_Info exit_int_info;
755
756
757     // Conditionally yield the CPU if the timeslice has expired
758     v3_schedule(info);
759
760 #ifdef V3_CONFIG_MEM_TRACK
761     v3_mem_track_entry(info);
762 #endif 
763
764     // Update timer devices after being in the VM before doing 
765     // IRQ updates, so that any interrupts they raise get seen 
766     // immediately.
767
768     v3_advance_time(info, NULL);
769
770     v3_update_timers(info);
771
772
773     // disable global interrupts for vm state transition
774     v3_clgi();
775
776     // Synchronize the guest state to the VMCB
777     guest_state->cr0 = info->ctrl_regs.cr0;
778     guest_state->cr2 = info->ctrl_regs.cr2;
779     guest_state->cr3 = info->ctrl_regs.cr3;
780     guest_state->cr4 = info->ctrl_regs.cr4;
781     guest_state->dr6 = info->dbg_regs.dr6;
782     guest_state->dr7 = info->dbg_regs.dr7;
783
784     // CR8 is now updated by read/writes and it contains the APIC TPR
785     // the V_TPR should be just the class part of that.
786     // This update is here just for completeness.  We currently
787     // are ignoring V_TPR on all injections and doing the priority logivc
788     // in the APIC.
789     // guest_ctrl->guest_ctrl.V_TPR = ((info->ctrl_regs.apic_tpr) >> 4) & 0xf;
790
791     //guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
792     // 
793     
794     guest_state->rflags = info->ctrl_regs.rflags;
795
796     // LMA ,LME, SVE?
797
798     guest_state->efer = info->ctrl_regs.efer;
799     
800     /* Synchronize MSRs */
801     guest_state->star = info->msrs.star;
802     guest_state->lstar = info->msrs.lstar;
803     guest_state->sfmask = info->msrs.sfmask;
804     guest_state->KernelGsBase = info->msrs.kern_gs_base;
805
806     guest_state->cpl = info->cpl;
807
808     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
809
810     guest_state->rax = info->vm_regs.rax;
811     guest_state->rip = info->rip;
812     guest_state->rsp = info->vm_regs.rsp;
813
814     V3_FP_ENTRY_RESTORE(info);
815
816 #ifdef V3_CONFIG_SYMCALL
817     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
818         update_irq_entry_state(info);
819     }
820 #else 
821
822     update_irq_entry_state(info);
823 #endif
824
825 #ifdef V3_CONFIG_TM_FUNC
826     v3_tm_check_intr_state(info, guest_ctrl, guest_state);
827 #endif
828
829
830     /* ** */
831
832     /*
833       PrintDebug(info->vm_info, info, "SVM Entry to CS=%p  rip=%p...\n", 
834       (void *)(addr_t)info->segments.cs.base, 
835       (void *)(addr_t)info->rip);
836     */
837
838 #ifdef V3_CONFIG_SYMCALL
839     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
840         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
841             V3_Print(info->vm_info, info, "!!! Injecting Interrupt during Sym call !!!\n");
842         }
843     }
844 #endif
845
846     v3_svm_config_tsc_virtualization(info);
847
848     //V3_Print(info->vm_info, info, "Calling v3_svm_launch\n");
849     {   
850         uint64_t entry_tsc = 0;
851         uint64_t exit_tsc = 0;
852         
853 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
854         v3_pwrstat_telemetry_enter(info);
855 #endif
856
857 #ifdef V3_CONFIG_PMU_TELEMETRY
858         v3_pmu_telemetry_enter(info);
859 #endif
860
861
862         rdtscll(entry_tsc);
863
864         v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
865
866         rdtscll(exit_tsc);
867
868 #ifdef V3_CONFIG_PMU_TELEMETRY
869         v3_pmu_telemetry_exit(info);
870 #endif
871
872 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
873         v3_pwrstat_telemetry_exit(info);
874 #endif
875
876         guest_cycles = exit_tsc - entry_tsc;
877     }
878
879
880     //V3_Print(info->vm_info, info, "SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
881
882     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
883
884     v3_advance_time(info, &guest_cycles);
885
886     info->num_exits++;
887
888     V3_FP_EXIT_SAVE(info);
889
890     // Save Guest state from VMCB
891     info->rip = guest_state->rip;
892     info->vm_regs.rsp = guest_state->rsp;
893     info->vm_regs.rax = guest_state->rax;
894
895     info->cpl = guest_state->cpl;
896
897     info->ctrl_regs.cr0 = guest_state->cr0;
898     info->ctrl_regs.cr2 = guest_state->cr2;
899     info->ctrl_regs.cr3 = guest_state->cr3;
900     info->ctrl_regs.cr4 = guest_state->cr4;
901     info->dbg_regs.dr6 = guest_state->dr6;
902     info->dbg_regs.dr7 = guest_state->dr7;
903     //
904     // We do not track this anymore
905     // V_TPR is ignored and we do the logic in the APIC
906     //info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
907     //
908     info->ctrl_regs.rflags = guest_state->rflags;
909     info->ctrl_regs.efer = guest_state->efer;
910     
911     /* Synchronize MSRs */
912     info->msrs.star =  guest_state->star;
913     info->msrs.lstar = guest_state->lstar;
914     info->msrs.sfmask = guest_state->sfmask;
915     info->msrs.kern_gs_base = guest_state->KernelGsBase;
916
917     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
918     info->cpu_mode = v3_get_vm_cpu_mode(info);
919     info->mem_mode = v3_get_vm_mem_mode(info);
920     /* ** */
921
922     // save exit info here
923     exit_code = guest_ctrl->exit_code;
924     exit_info1 = guest_ctrl->exit_info1;
925     exit_info2 = guest_ctrl->exit_info2;
926     exit_int_info = guest_ctrl->exit_int_info;
927
928 #ifdef V3_CONFIG_SYMCALL
929     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
930         update_irq_exit_state(info);
931     }
932 #else
933     update_irq_exit_state(info);
934 #endif
935
936     // reenable global interrupts after vm exit
937     v3_stgi();
938
939     // Conditionally yield the CPU if the timeslice has expired
940     v3_schedule(info);
941
942     // This update timers is for time-dependent handlers
943     // if we're slaved to host time
944     v3_advance_time(info, NULL);
945     v3_update_timers(info);
946
947
948     {
949         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
950         
951         if (ret != 0) {
952             PrintError(info->vm_info, info, "Error in SVM exit handler (ret=%d)\n", ret);
953             PrintError(info->vm_info, info, "  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
954
955             //V3_Sleep(5*1000000);
956     
957             return -1;
958         }
959     }
960
961
962     if (info->timeouts.timeout_active) {
963         /* Check to see if any timeouts have expired */
964         v3_handle_timeouts(info, guest_cycles);
965     }
966
967 #ifdef V3_CONFIG_MEM_TRACK
968     v3_mem_track_exit(info);
969 #endif 
970
971
972     return 0;
973 }
974
975 int v3_start_svm_guest(struct guest_info * info) {
976
977     int started=0;
978
979     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
980     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
981
982     PrintDebug(info->vm_info, info, "Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
983
984
985 #ifdef V3_CONFIG_MULTIBOOT
986     if (v3_setup_multiboot_core_for_boot(info)) { 
987         PrintError(info->vm_info, info, "Failed to setup Multiboot core...\n");
988         return -1;
989     }
990 #endif
991
992 #ifdef V3_CONFIG_HVM
993     if (v3_setup_hvm_hrt_core_for_boot(info)) { 
994         PrintError(info->vm_info, info, "Failed to setup HRT core...\n");
995         return -1;
996     } 
997 #endif
998  
999     while (1) {
1000
1001         if (info->core_run_state == CORE_STOPPED) {
1002
1003             if (info->vcpu_id == 0) {
1004                 info->core_run_state = CORE_RUNNING;
1005             } else  { 
1006                 PrintDebug(info->vm_info, info, "SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
1007
1008                 V3_NO_WORK(info);
1009
1010                 // Compiler must not optimize away this read
1011                 while (*((volatile int *)(&info->core_run_state)) == CORE_STOPPED) {
1012                     
1013                     if (info->vm_info->run_state == VM_STOPPED) {
1014                         // The VM was stopped before this core was initialized. 
1015                         return 0;
1016                     }
1017                     
1018                     V3_STILL_NO_WORK(info);
1019
1020                     //PrintDebug(info->vm_info, info, "SVM core %u: still waiting for INIT\n", info->vcpu_id);
1021                 }
1022
1023                 V3_HAVE_WORK_AGAIN(info);
1024                 
1025                 PrintDebug(info->vm_info, info, "SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
1026                 
1027                 // We'll be paranoid about race conditions here
1028                 v3_wait_at_barrier(info);
1029             } 
1030         }
1031
1032         if (!started) {
1033
1034             started=1;
1035             
1036             PrintDebug(info->vm_info, info, "SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
1037                        info->vcpu_id, info->pcpu_id, 
1038                        info->segments.cs.selector, (void *)(info->segments.cs.base), 
1039                        info->segments.cs.limit, (void *)(info->rip));
1040             
1041             
1042             
1043             PrintDebug(info->vm_info, info, "SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
1044                        info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
1045
1046 #ifdef V3_CONFIG_DEBUG_SVM
1047             PrintDebugVMCB((vmcb_t*)(info->vmm_data));
1048 #endif
1049             
1050             v3_start_time(info);
1051         }
1052         
1053         if (info->vm_info->run_state == VM_STOPPED) {
1054             info->core_run_state = CORE_STOPPED;
1055             break;
1056         }
1057         
1058         
1059 #ifdef V3_CONFIG_HVM
1060         if (v3_handle_hvm_reset(info) > 0) { 
1061             continue;
1062         }
1063 #endif
1064        
1065 #ifdef V3_CONFIG_MULTIBOOT
1066         if (v3_handle_multiboot_reset(info) > 0) {
1067             continue;
1068         }
1069 #endif
1070         
1071         if (svm_handle_standard_reset(info) > 0 ) {
1072             continue;
1073         }
1074         
1075
1076
1077 #ifdef V3_CONFIG_PMU_TELEMETRY
1078         v3_pmu_telemetry_start(info);
1079 #endif
1080         
1081 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1082         v3_pwrstat_telemetry_start(info);
1083 #endif
1084         
1085         if (v3_svm_enter(info) == -1 ) {
1086             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
1087             addr_t host_addr;
1088             addr_t linear_addr = 0;
1089             
1090             info->vm_info->run_state = VM_ERROR;
1091             
1092             V3_Print(info->vm_info, info, "SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
1093             
1094             v3_print_guest_state(info);
1095             
1096             V3_Print(info->vm_info, info, "SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
1097             
1098             V3_Print(info->vm_info, info, "SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
1099             V3_Print(info->vm_info, info, "SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
1100             
1101             V3_Print(info->vm_info, info, "SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
1102             V3_Print(info->vm_info, info, "SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
1103             
1104             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1105             
1106             if (info->mem_mode == PHYSICAL_MEM) {
1107                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1108             } else if (info->mem_mode == VIRTUAL_MEM) {
1109                 v3_gva_to_hva(info, linear_addr, &host_addr);
1110             }
1111             
1112             V3_Print(info->vm_info, info, "SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1113             
1114             V3_Print(info->vm_info, info, "SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1115             v3_dump_mem((uint8_t *)host_addr, 15);
1116             
1117             v3_print_stack(info);
1118             
1119             break;
1120         }
1121         
1122         v3_wait_at_barrier(info);
1123         
1124
1125         if (info->vm_info->run_state == VM_STOPPED) {
1126             info->core_run_state = CORE_STOPPED;
1127             break;
1128         }
1129
1130         
1131
1132 /*
1133         if ((info->num_exits % 50000) == 0) {
1134             V3_Print(info->vm_info, info, "SVM Exit number %d\n", (uint32_t)info->num_exits);
1135             v3_print_guest_state(info);
1136         }
1137 */
1138         
1139     }
1140
1141 #ifdef V3_CONFIG_PMU_TELEMETRY
1142     v3_pmu_telemetry_end(info);
1143 #endif
1144
1145 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1146     v3_pwrstat_telemetry_end(info);
1147 #endif
1148     // Need to take down the other cores on error... 
1149
1150     return 0;
1151 }
1152
1153
1154
1155
1156 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
1157     // init vmcb_bios
1158
1159     // Write the RIP, CS, and descriptor
1160     // assume the rest is already good to go
1161     //
1162     // vector VV -> rip at 0
1163     //              CS = VV00
1164     //  This means we start executing at linear address VV000
1165     //
1166     // So the selector needs to be VV00
1167     // and the base needs to be VV000
1168     //
1169     V3_Print(core->vm_info,core,"SVM Reset to RIP %p\n",(void*)rip);
1170     core->rip = 0;
1171     core->segments.cs.selector = rip << 8;
1172     core->segments.cs.limit = 0xffff;
1173     core->segments.cs.base = rip << 12;
1174
1175     return 0;
1176 }
1177
1178
1179
1180
1181
1182
1183 /* Checks machine SVM capability */
1184 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
1185 int v3_is_svm_capable() {
1186     uint_t vm_cr_low = 0, vm_cr_high = 0;
1187     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1188
1189     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1190   
1191     PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
1192
1193     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
1194       V3_Print(VM_NONE, VCORE_NONE,  "SVM Not Available\n");
1195       return 0;
1196     }  else {
1197         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
1198         
1199         PrintDebug(VM_NONE, VCORE_NONE, "SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
1200         
1201         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
1202             V3_Print(VM_NONE, VCORE_NONE, "SVM is available but is disabled.\n");
1203             
1204             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1205             
1206             PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
1207             
1208             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
1209                 V3_Print(VM_NONE, VCORE_NONE,  "SVM BIOS Disabled, not unlockable\n");
1210             } else {
1211                 V3_Print(VM_NONE, VCORE_NONE,  "SVM is locked with a key\n");
1212             }
1213             return 0;
1214
1215         } else {
1216             V3_Print(VM_NONE, VCORE_NONE,  "SVM is available and  enabled.\n");
1217
1218             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1219             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
1220             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
1221             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
1222             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
1223
1224             return 1;
1225         }
1226     }
1227 }
1228
1229 static int has_svm_nested_paging() {
1230     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1231     
1232     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1233     
1234     //PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
1235     
1236     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
1237         V3_Print(VM_NONE, VCORE_NONE, "SVM Nested Paging not supported\n");
1238         return 0;
1239     } else {
1240         V3_Print(VM_NONE, VCORE_NONE, "SVM Nested Paging supported\n");
1241         return 1;
1242     }
1243  }
1244  
1245
1246
1247 void v3_init_svm_cpu(int cpu_id) {
1248     reg_ex_t msr;
1249     extern v3_cpu_arch_t v3_cpu_types[];
1250
1251     // Enable SVM on the CPU
1252     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
1253     msr.e_reg.low |= EFER_MSR_svm_enable;
1254     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
1255
1256     V3_Print(VM_NONE, VCORE_NONE,  "SVM Enabled\n");
1257
1258     // Setup the host state save area
1259     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4); // need not be shadow-safe, not exposed to guest
1260
1261     if (!host_vmcbs[cpu_id]) {
1262         PrintError(VM_NONE, VCORE_NONE,  "Failed to allocate VMCB\n");
1263         return;
1264     }
1265
1266     /* 64-BIT-ISSUE */
1267     //  msr.e_reg.high = 0;
1268     //msr.e_reg.low = (uint_t)host_vmcb;
1269     msr.r_reg = host_vmcbs[cpu_id];
1270
1271     PrintDebug(VM_NONE, VCORE_NONE,  "Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
1272     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
1273
1274
1275     if (has_svm_nested_paging() == 1) {
1276         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
1277     } else {
1278         v3_cpu_types[cpu_id] = V3_SVM_CPU;
1279     }
1280 }
1281
1282
1283
1284 void v3_deinit_svm_cpu(int cpu_id) {
1285     reg_ex_t msr;
1286     extern v3_cpu_arch_t v3_cpu_types[];
1287
1288     // reset SVM_VM_HSAVE_PA_MSR
1289     // Does setting it to NULL disable??
1290     msr.r_reg = 0;
1291     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
1292
1293     // Disable SVM?
1294     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
1295     msr.e_reg.low &= ~EFER_MSR_svm_enable;
1296     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
1297
1298     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1299
1300     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
1301
1302     V3_Print(VM_NONE, VCORE_NONE,  "Host CPU %d host area freed, and SVM disabled\n", cpu_id);
1303     return;
1304 }
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355 #if 0
1356 /* 
1357  * Test VMSAVE/VMLOAD Latency 
1358  */
1359 #define vmsave ".byte 0x0F,0x01,0xDB ; "
1360 #define vmload ".byte 0x0F,0x01,0xDA ; "
1361 {
1362     uint32_t start_lo, start_hi;
1363     uint32_t end_lo, end_hi;
1364     uint64_t start, end;
1365     
1366     __asm__ __volatile__ (
1367                           "rdtsc ; "
1368                           "movl %%eax, %%esi ; "
1369                           "movl %%edx, %%edi ; "
1370                           "movq  %%rcx, %%rax ; "
1371                           vmsave
1372                           "rdtsc ; "
1373                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1374                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1375                           );
1376     
1377     start = start_hi;
1378     start <<= 32;
1379     start += start_lo;
1380     
1381     end = end_hi;
1382     end <<= 32;
1383     end += end_lo;
1384     
1385     PrintDebug(core->vm_info, core, "VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1386     
1387     __asm__ __volatile__ (
1388                           "rdtsc ; "
1389                           "movl %%eax, %%esi ; "
1390                           "movl %%edx, %%edi ; "
1391                           "movq  %%rcx, %%rax ; "
1392                           vmload
1393                           "rdtsc ; "
1394                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1395                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1396                               );
1397         
1398         start = start_hi;
1399         start <<= 32;
1400         start += start_lo;
1401
1402         end = end_hi;
1403         end <<= 32;
1404         end += end_lo;
1405
1406
1407         PrintDebug(core->vm_info, core, "VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1408     }
1409     /* End Latency Test */
1410
1411 #endif
1412
1413
1414
1415
1416
1417
1418