Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


VM Reset Bugfixes
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *         Peter Dinda <jarusl@cs.northwestern.edu> (Reset)
16  *
17  * This is free software.  You are permitted to use,
18  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
19  */
20
21
22
23 #include <palacios/svm.h>
24 #include <palacios/vmm.h>
25
26 #include <palacios/vmcb.h>
27 #include <palacios/vmm_mem.h>
28 #include <palacios/vmm_paging.h>
29 #include <palacios/svm_handler.h>
30
31 #include <palacios/vmm_debug.h>
32 #include <palacios/vm_guest_mem.h>
33
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_string.h>
36 #include <palacios/vmm_lowlevel.h>
37 #include <palacios/svm_msr.h>
38
39 #include <palacios/vmm_rbtree.h>
40 #include <palacios/vmm_barrier.h>
41 #include <palacios/vmm_debug.h>
42
43 #include <palacios/vmm_perftune.h>
44
45 #include <palacios/vmm_bios.h>
46
47
48 #ifdef V3_CONFIG_CHECKPOINT
49 #include <palacios/vmm_checkpoint.h>
50 #endif
51
52 #include <palacios/vmm_direct_paging.h>
53
54 #include <palacios/vmm_ctrl_regs.h>
55 #include <palacios/svm_io.h>
56
57 #include <palacios/vmm_sprintf.h>
58
59 #ifdef V3_CONFIG_MEM_TRACK
60 #include <palacios/vmm_mem_track.h>
61 #endif 
62
63 #ifdef V3_CONFIG_TM_FUNC
64 #include <extensions/trans_mem.h>
65 #endif
66
67 #ifndef V3_CONFIG_DEBUG_SVM
68 #undef PrintDebug
69 #define PrintDebug(fmt, args...)
70 #endif
71
72
73
74 uint32_t v3_last_exit;
75
76 // This is a global pointer to the host's VMCB
77 // These are physical addresses
78 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
79
80
81
82 extern void v3_stgi();
83 extern void v3_clgi();
84 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
85 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
86
87
88
89 static vmcb_t * Allocate_VMCB() {
90     vmcb_t * vmcb_page = NULL;
91     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);   // need not be shadow safe, not exposed to guest
92
93     if ((void *)vmcb_pa == NULL) {
94       PrintError(VM_NONE, VCORE_NONE, "Error allocating VMCB\n");
95         return NULL;
96     }
97
98     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
99
100     memset(vmcb_page, 0, 4096);
101
102     return vmcb_page;
103 }
104
105
106 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
107 {
108     int status;
109
110     // Call arch-independent handler
111     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
112         return status;
113     }
114
115     // SVM-specific code
116     {
117         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
118         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
119         hw_efer->svme = 1;
120     }
121
122     return 0;
123 }
124
125 /*
126  * This is invoked both on an initial boot and on a reset
127  * 
128  * The difference is that on a reset we will not rehook anything
129  *
130  */
131
132 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
133     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
134     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
135     uint_t i;
136
137     if (core->core_run_state!=CORE_INVALID && core->core_run_state!=CORE_RESETTING) { 
138         PrintError(core->vm_info, core, "Atempt to Init_VMCB_BIOS in invalid state (%d)\n",core->core_run_state);
139         return;
140     }
141
142     // need to invalidate any shadow page tables early
143     if (core->shdw_pg_mode == SHADOW_PAGING && core->core_run_state==CORE_RESETTING) {
144         if (v3_get_vm_cpu_mode(core) != REAL) {
145             if (v3_invalidate_shadow_pts(core) == -1) {
146                 PrintError(core->vm_info,core,"Could not invalidate shadow page tables\n");
147                 return;
148             }
149         }
150     }
151
152     // Guarantee we are starting from a clean slate
153     // even on a reset
154     memset(vmcb,0,4096);
155
156     ctrl_area->svm_instrs.VMRUN = 1;
157     ctrl_area->svm_instrs.VMMCALL = 1;
158     ctrl_area->svm_instrs.VMLOAD = 1;
159     ctrl_area->svm_instrs.VMSAVE = 1;
160     ctrl_area->svm_instrs.STGI = 1;
161     ctrl_area->svm_instrs.CLGI = 1;
162     ctrl_area->svm_instrs.SKINIT = 1; // secure startup... why
163     ctrl_area->svm_instrs.ICEBP = 1;  // in circuit emulator breakpoint
164     ctrl_area->svm_instrs.WBINVD = 1; // write back and invalidate caches... why?
165     ctrl_area->svm_instrs.MONITOR = 1;
166     ctrl_area->svm_instrs.MWAIT_always = 1;
167     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
168     ctrl_area->instrs.INVLPGA = 1;   // invalidate page in asid... why?
169     ctrl_area->instrs.CPUID = 1;
170
171     ctrl_area->instrs.HLT = 1;
172
173     /* Set at VMM launch as needed */
174     ctrl_area->instrs.RDTSC = 0;
175     ctrl_area->svm_instrs.RDTSCP = 0;
176
177
178 #ifdef V3_CONFIG_TM_FUNC
179     v3_tm_set_excp_intercepts(ctrl_area);
180 #endif
181     
182
183     ctrl_area->instrs.NMI = 1;
184     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
185     ctrl_area->instrs.INIT = 1;
186     //    ctrl_area->instrs.PAUSE = 1;    // do not care as does not halt
187     ctrl_area->instrs.shutdown_evts = 1;
188
189
190     /* DEBUG FOR RETURN CODE */
191     ctrl_area->exit_code = 1;
192
193
194     /* Setup Guest Machine state */
195
196     memset(&core->vm_regs,0,sizeof(core->vm_regs));
197     memset(&core->ctrl_regs,0,sizeof(core->ctrl_regs));
198     memset(&core->dbg_regs,0,sizeof(core->dbg_regs));
199     memset(&core->segments,0,sizeof(core->segments));    
200     memset(&core->msrs,0,sizeof(core->msrs));    
201     memset(&core->fp_state,0,sizeof(core->fp_state));    
202
203     // reset interrupts
204     core->intr_core_state.irq_pending=0; 
205     core->intr_core_state.irq_started=0; 
206     core->intr_core_state.swintr_posted=0; 
207
208     // reset exceptions
209     core->excp_state.excp_pending=0;
210
211     // reset of gprs to expected values at init
212     core->vm_regs.rsp = 0x00;
213     core->rip = 0xfff0;
214     core->vm_regs.rdx = 0x00000f00;  // family/stepping/etc
215
216     
217     core->cpl = 0;
218
219     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
220
221     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
222     core->shdw_pg_state.guest_cr0 = core->ctrl_regs.cr0;
223
224     // cr3 zeroed above
225     core->shdw_pg_state.guest_cr3 = core->ctrl_regs.cr3;
226     // cr4 zeroed above
227     core->shdw_pg_state.guest_cr4 = core->ctrl_regs.cr4;
228
229     core->ctrl_regs.efer |= EFER_MSR_svm_enable ;
230     core->shdw_pg_state.guest_efer.value = core->ctrl_regs.efer;
231
232     core->segments.cs.selector = 0xf000;
233     core->segments.cs.limit = 0xffff;
234     core->segments.cs.base = 0x0000f0000LL;
235
236     // (raw attributes = 0xf3)
237     core->segments.cs.type = 0xa;
238     core->segments.cs.system = 0x1;
239     core->segments.cs.dpl = 0x0;
240     core->segments.cs.present = 1;
241
242
243
244     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
245                                       &(core->segments.es), &(core->segments.fs), 
246                                       &(core->segments.gs), NULL};
247
248     for ( i = 0; segregs[i] != NULL; i++) {
249         struct v3_segment * seg = segregs[i];
250         
251         seg->selector = 0x0000;
252         //    seg->base = seg->selector << 4;
253         seg->base = 0x00000000;
254         seg->limit = 0xffff;
255
256         // (raw attributes = 0xf3)
257         seg->type = 0x2;
258         seg->system = 0x1;
259         seg->dpl = 0x0;
260         seg->present = 1;
261     }
262
263     core->segments.gdtr.selector = 0x0000;
264     core->segments.gdtr.limit = 0x0000ffff;
265     core->segments.gdtr.base = 0x0000000000000000LL;
266     core->segments.gdtr.dpl = 0x0;
267
268     core->segments.idtr.selector = 0x0000; 
269     core->segments.idtr.limit = 0x0000ffff;
270     core->segments.idtr.base = 0x0000000000000000LL;
271     core->segments.ldtr.limit = 0x0000ffff;
272     core->segments.ldtr.base = 0x0000000000000000LL;
273     core->segments.ldtr.system = 0;
274     core->segments.ldtr.type = 0x2;
275     core->segments.ldtr.dpl = 0x0;
276
277     core->segments.tr.selector = 0x0000;
278     core->segments.tr.limit = 0x0000ffff;
279     core->segments.tr.base = 0x0000000000000000LL;
280     core->segments.tr.system = 0;
281     core->segments.tr.type = 0x3;
282     core->segments.tr.dpl = 0x0;
283
284     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
285     core->dbg_regs.dr7 = 0x0000000000000400LL;
286
287
288     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
289     ctrl_area->instrs.IOIO_PROT = 1;
290             
291     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
292     ctrl_area->instrs.MSR_PROT = 1;   
293
294
295     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
296     ctrl_area->instrs.INTR = 1;
297     // The above also assures the TPR changes (CR8) are only virtual
298
299
300     // However, we need to see TPR writes since they will
301     // affect the virtual apic
302     // we reflect out cr8 to ctrl_regs->apic_tpr
303     ctrl_area->cr_reads.cr8 = 1;
304     ctrl_area->cr_writes.cr8 = 1;
305     // We will do all TPR comparisons in the virtual apic
306     // We also do not want the V_TPR to be able to mask the PIC
307     ctrl_area->guest_ctrl.V_IGN_TPR = 1;
308
309     
310
311     if (core->core_run_state == CORE_INVALID) { 
312         v3_hook_msr(core->vm_info, EFER_MSR, 
313                     &v3_handle_efer_read,
314                     &v3_svm_handle_efer_write, 
315                     core);
316     }
317
318     if (core->shdw_pg_mode == SHADOW_PAGING) {
319         
320         /* JRL: This is a performance killer, and a simplistic solution */
321         /* We need to fix this */
322         ctrl_area->TLB_CONTROL = 1;
323         ctrl_area->guest_ASID = 1;
324         
325
326         if (core->core_run_state == CORE_INVALID) { 
327             if (v3_init_passthrough_pts(core) == -1) {
328                 PrintError(core->vm_info, core, "Could not initialize passthrough page tables\n");
329                 return ;
330             }
331             // the shadow page tables are OK since we have not initialized hem yet
332         } else {
333             // CORE_RESETTING
334             // invalidation of shadow page tables happened earlier in this function
335         }
336
337         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
338         
339         core->ctrl_regs.cr0 |= 0x80000000;
340
341         v3_activate_passthrough_pt(core);
342
343         ctrl_area->cr_reads.cr0 = 1;
344         ctrl_area->cr_writes.cr0 = 1;
345         //intercept cr4 read so shadow pager can use PAE independently of guest
346         ctrl_area->cr_reads.cr4 = 1;
347         ctrl_area->cr_writes.cr4 = 1;
348         ctrl_area->cr_reads.cr3 = 1;
349         ctrl_area->cr_writes.cr3 = 1;
350
351
352         ctrl_area->instrs.INVLPG = 1;
353
354         ctrl_area->exceptions.pf = 1;
355
356         guest_state->g_pat = 0x7040600070406ULL;
357
358
359     } else if (core->shdw_pg_mode == NESTED_PAGING) {
360         // Flush the TLB on entries/exits
361         ctrl_area->TLB_CONTROL = 1;
362         ctrl_area->guest_ASID = 1;
363
364         // Enable Nested Paging
365         ctrl_area->NP_ENABLE = 1;
366
367         PrintDebug(core->vm_info, core, "NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
368
369         // Set the Nested Page Table pointer
370         if (core->core_run_state == CORE_INVALID) { 
371             if (v3_init_passthrough_pts(core) == -1) {
372                 PrintError(core->vm_info, core, "Could not initialize Nested page tables\n");
373                 return ;
374             }
375         } else {
376             // the existing nested page tables will work fine
377         }
378
379         ctrl_area->N_CR3 = core->direct_map_pt;
380
381         guest_state->g_pat = 0x7040600070406ULL;
382     }
383     
384     /* tell the guest that we don't support SVM */
385     if (core->core_run_state == CORE_INVALID) { 
386         v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
387                     &v3_handle_vm_cr_read,
388                     &v3_handle_vm_cr_write, 
389                     core);
390     }
391
392     if (core->core_run_state == CORE_INVALID) { 
393 #define INT_PENDING_AMD_MSR             0xc0010055
394
395         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
396         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
397         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
398         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
399         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
400
401         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
402         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
403         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
404
405
406         v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
407         v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
408
409         // Passthrough read operations are ok.
410         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
411     }
412
413
414 }
415
416
417 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
418
419     PrintDebug(core->vm_info, core, "Allocating VMCB\n");
420     core->vmm_data = (void *)Allocate_VMCB();
421     
422     if (core->vmm_data == NULL) {
423         PrintError(core->vm_info, core, "Could not allocate VMCB, Exiting...\n");
424         return -1;
425     }
426
427     if (vm_class == V3_PC_VM) {
428         PrintDebug(core->vm_info, core, "Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
429         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
430     } else {
431         PrintError(core->vm_info, core, "Invalid VM class\n");
432         return -1;
433     }
434
435     core->core_run_state = CORE_STOPPED;
436
437     return 0;
438 }
439
440
441 int v3_deinit_svm_vmcb(struct guest_info * core) {
442     if (core->vmm_data) { 
443         V3_FreePages(V3_PAddr(core->vmm_data), 1);
444     }
445     return 0;
446 }
447
448
449 static int svm_handle_standard_reset(struct guest_info *core)
450 {
451     if (core->core_run_state != CORE_RESETTING) { 
452         return 0;
453     }
454
455     PrintDebug(core->vm_info,core,"Handling standard reset (guest state before follows)\n");
456
457 #ifdef V3_CONFIG_DEBUG_SVM
458     v3_print_guest_state(core);
459 #endif
460
461     // wait until all resetting cores get here (ROS or whole VM)
462     v3_counting_barrier(&core->vm_info->reset_barrier);
463
464     // I could be a ROS core, or I could be in a non-HVM 
465     // either way, if I'm core 0, I'm the leader
466     if (core->vcpu_id==0) {
467         uint64_t mem_size=core->vm_info->mem_size;
468
469 #ifdef V3_CONFIG_HVM
470         // on a ROS reset, we should only 
471         // manipulate the part of the memory seen by
472         // the ROS
473         if (core->vm_info->hvm_state.is_hvm) { 
474             mem_size=v3_get_hvm_ros_memsize(core->vm_info);
475         }
476 #endif
477         core->vm_info->run_state = VM_RESETTING;
478         // copy bioses again because some, 
479         // like seabios, assume
480         // this should also blow away the BDA and EBDA
481         PrintDebug(core->vm_info,core,"Clear memory (%p bytes)\n",(void*)core->vm_info->mem_size);
482         if (v3_set_gpa_memory(core, 0, mem_size, 0)!=mem_size) { 
483             PrintError(core->vm_info,core,"Clear of memory failed\n");
484         }
485         PrintDebug(core->vm_info,core,"Copying bioses\n");
486         if (v3_setup_bioses(core->vm_info, core->vm_info->cfg_data->cfg)) { 
487             PrintError(core->vm_info,core,"Setup of bioses failed\n");
488         }
489     }
490
491     Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
492
493     PrintDebug(core->vm_info,core,"InitVMCB done\n");
494
495     core->cpl = 0;
496     core->cpu_mode = REAL;
497     core->mem_mode = PHYSICAL_MEM;
498     //core->num_exits=0;
499
500     PrintDebug(core->vm_info,core,"Machine reset to REAL/PHYSICAL\n");
501
502     memset(V3_VAddr((void*)(host_vmcbs[V3_Get_CPU()])),0,4096*4); // good measure...
503
504     // core zero will be restarted by the main execution loop
505     core->core_run_state = CORE_STOPPED;
506
507     if (core->vcpu_id==0) { 
508         core->vm_info->run_state = VM_RUNNING;
509     } 
510
511 #ifdef V3_CONFIG_DEBUG_SVM
512     PrintDebug(core->vm_info,core,"VMCB state at end of reset\n");
513     PrintDebugVMCB((vmcb_t*)(core->vmm_data));
514     PrintDebug(core->vm_info,core,"Guest state at end of reset\n");
515     v3_print_guest_state(core);
516 #endif
517
518     // wait until we are all ready to go
519     v3_counting_barrier(&core->vm_info->reset_barrier);
520
521     PrintDebug(core->vm_info,core,"Returning with request for recycle loop\n");
522
523     return 1; // reboot is occuring
524
525 }
526
527 #ifdef V3_CONFIG_CHECKPOINT
528 int v3_svm_save_core(struct guest_info * core, void * ctx){
529
530   vmcb_saved_state_t * guest_area = GET_VMCB_SAVE_STATE_AREA(core->vmm_data); 
531
532   // Special case saves of data we need immediate access to
533   // in some cases
534   V3_CHKPT_SAVE(ctx, "CPL", core->cpl, failout);
535   V3_CHKPT_SAVE(ctx,"STAR", guest_area->star, failout); 
536   V3_CHKPT_SAVE(ctx,"CSTAR", guest_area->cstar, failout); 
537   V3_CHKPT_SAVE(ctx,"LSTAR", guest_area->lstar, failout); 
538   V3_CHKPT_SAVE(ctx,"SFMASK", guest_area->sfmask, failout); 
539   V3_CHKPT_SAVE(ctx,"KERNELGSBASE", guest_area->KernelGsBase, failout); 
540   V3_CHKPT_SAVE(ctx,"SYSENTER_CS", guest_area->sysenter_cs, failout); 
541   V3_CHKPT_SAVE(ctx,"SYSENTER_ESP", guest_area->sysenter_esp, failout); 
542   V3_CHKPT_SAVE(ctx,"SYSENTER_EIP", guest_area->sysenter_eip, failout); 
543   
544 // and then we save the whole enchilada
545   if (v3_chkpt_save(ctx, "VMCB_DATA", PAGE_SIZE, core->vmm_data)) { 
546     PrintError(core->vm_info, core, "Could not save SVM vmcb\n");
547     goto failout;
548   }
549   
550   return 0;
551
552  failout:
553   PrintError(core->vm_info, core, "Failed to save SVM state for core\n");
554   return -1;
555
556 }
557
558 int v3_svm_load_core(struct guest_info * core, void * ctx){
559     
560
561   vmcb_saved_state_t * guest_area = GET_VMCB_SAVE_STATE_AREA(core->vmm_data); 
562
563   // Reload what we special cased, which we will overwrite in a minute
564   V3_CHKPT_LOAD(ctx, "CPL", core->cpl, failout);
565   V3_CHKPT_LOAD(ctx,"STAR", guest_area->star, failout); 
566   V3_CHKPT_LOAD(ctx,"CSTAR", guest_area->cstar, failout); 
567   V3_CHKPT_LOAD(ctx,"LSTAR", guest_area->lstar, failout); 
568   V3_CHKPT_LOAD(ctx,"SFMASK", guest_area->sfmask, failout); 
569   V3_CHKPT_LOAD(ctx,"KERNELGSBASE", guest_area->KernelGsBase, failout); 
570   V3_CHKPT_LOAD(ctx,"SYSENTER_CS", guest_area->sysenter_cs, failout); 
571   V3_CHKPT_LOAD(ctx,"SYSENTER_ESP", guest_area->sysenter_esp, failout); 
572   V3_CHKPT_LOAD(ctx,"SYSENTER_EIP", guest_area->sysenter_eip, failout); 
573   
574   // and then we load the whole enchilada
575   if (v3_chkpt_load(ctx, "VMCB_DATA", PAGE_SIZE, core->vmm_data)) { 
576     PrintError(core->vm_info, core, "Could not load SVM vmcb\n");
577     goto failout;
578   }
579   
580   return 0;
581
582  failout:
583   PrintError(core->vm_info, core, "Failed to save SVM state for core\n");
584   return -1;
585
586 }
587 #endif
588
589 static int update_irq_exit_state(struct guest_info * info) {
590     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
591
592     // Fix for QEMU bug using EVENTINJ as an internal cache
593     guest_ctrl->EVENTINJ.valid = 0;
594
595     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
596         
597 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
598         PrintDebug(info->vm_info, info, "INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
599 #endif
600
601         info->intr_core_state.irq_started = 1;
602         info->intr_core_state.irq_pending = 0;
603
604         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
605     }
606
607     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
608 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
609         PrintDebug(info->vm_info, info, "Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
610 #endif
611
612         // Interrupt was taken fully vectored
613         info->intr_core_state.irq_started = 0;
614
615     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
616 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
617         PrintDebug(info->vm_info, info, "EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
618 #endif
619     }
620
621     return 0;
622 }
623
624
625 static int update_irq_entry_state(struct guest_info * info) {
626     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
627
628
629     if (info->intr_core_state.irq_pending == 0) {
630         guest_ctrl->guest_ctrl.V_IRQ = 0;
631         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
632     }
633     
634     if (v3_excp_pending(info)) {
635         uint_t excp = v3_get_excp_number(info);
636         
637         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
638         
639         if (info->excp_state.excp_error_code_valid) {
640             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
641             guest_ctrl->EVENTINJ.ev = 1;
642 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
643             PrintDebug(info->vm_info, info, "Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
644 #endif
645         }
646         
647         guest_ctrl->EVENTINJ.vector = excp;
648         
649         guest_ctrl->EVENTINJ.valid = 1;
650
651 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
652         PrintDebug(info->vm_info, info, "<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
653                    (int)info->num_exits, 
654                    guest_ctrl->EVENTINJ.vector, 
655                    (void *)(addr_t)info->ctrl_regs.cr2,
656                    (void *)(addr_t)info->rip);
657 #endif
658
659         v3_injecting_excp(info, excp);
660     } else if (info->intr_core_state.irq_started == 1) {
661 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
662         PrintDebug(info->vm_info, info, "IRQ pending from previous injection\n");
663 #endif
664         guest_ctrl->guest_ctrl.V_IRQ = 1;
665         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
666
667         // We ignore the virtual TPR on this injection
668         // TPR/PPR tests have already been done in the APIC.
669         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
670         guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ;  // 0xf;
671
672     } else {
673         switch (v3_intr_pending(info)) {
674             case V3_EXTERNAL_IRQ: {
675                 int irq = v3_get_intr(info); 
676
677                 if (irq<0) {
678                   break;
679                 }
680
681                 guest_ctrl->guest_ctrl.V_IRQ = 1;
682                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
683
684                 // We ignore the virtual TPR on this injection
685                 // TPR/PPR tests have already been done in the APIC.
686                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
687                 guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ;  // 0xf;
688
689 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
690                 PrintDebug(info->vm_info, info, "Injecting Interrupt %d (EIP=%p)\n", 
691                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
692                            (void *)(addr_t)info->rip);
693 #endif
694
695                 info->intr_core_state.irq_pending = 1;
696                 info->intr_core_state.irq_vector = irq;
697
698                 break;
699                 
700             }
701             case V3_NMI:
702                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
703                 break;
704             case V3_SOFTWARE_INTR:
705                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
706 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
707                 PrintDebug(info->vm_info, info, "Injecting software interrupt --  type: %d, vector: %d\n", 
708                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
709 #endif
710                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
711                 guest_ctrl->EVENTINJ.valid = 1;
712             
713                 /* reset swintr state */
714                 info->intr_core_state.swintr_posted = 0;
715                 info->intr_core_state.swintr_vector = 0;
716                 break;
717             case V3_VIRTUAL_IRQ:
718                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
719                 break;
720
721             case V3_INVALID_INTR:
722             default:
723                 break;
724         }
725         
726     }
727
728     return 0;
729 }
730
731 int 
732 v3_svm_config_tsc_virtualization(struct guest_info * info) {
733     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
734
735
736     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
737         ctrl_area->instrs.RDTSC = 1;
738         ctrl_area->svm_instrs.RDTSCP = 1;
739     } else {
740         ctrl_area->instrs.RDTSC = 0;
741         ctrl_area->svm_instrs.RDTSCP = 0;
742
743         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
744                 ctrl_area->TSC_OFFSET = 0;
745         } else {
746                 ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
747         }
748     }
749     return 0;
750 }
751
752
753
754 /* 
755  * CAUTION and DANGER!!! 
756  * 
757  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
758  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
759  * on its contents will cause things to break. The contents at the time of the exit WILL 
760  * change before the exit handler is executed.
761  */
762 int v3_svm_enter(struct guest_info * info) {
763     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
764     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
765     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
766     uint64_t guest_cycles = 0;
767
768
769     // Conditionally yield the CPU if the timeslice has expired
770     v3_schedule(info);
771
772 #ifdef V3_CONFIG_MEM_TRACK
773     v3_mem_track_entry(info);
774 #endif 
775
776     // Update timer devices after being in the VM before doing 
777     // IRQ updates, so that any interrupts they raise get seen 
778     // immediately.
779
780     v3_advance_time(info, NULL);
781
782     v3_update_timers(info);
783
784
785     // disable global interrupts for vm state transition
786     v3_clgi();
787
788     // Synchronize the guest state to the VMCB
789     guest_state->cr0 = info->ctrl_regs.cr0;
790     guest_state->cr2 = info->ctrl_regs.cr2;
791     guest_state->cr3 = info->ctrl_regs.cr3;
792     guest_state->cr4 = info->ctrl_regs.cr4;
793     guest_state->dr6 = info->dbg_regs.dr6;
794     guest_state->dr7 = info->dbg_regs.dr7;
795
796     // CR8 is now updated by read/writes and it contains the APIC TPR
797     // the V_TPR should be just the class part of that.
798     // This update is here just for completeness.  We currently
799     // are ignoring V_TPR on all injections and doing the priority logivc
800     // in the APIC.
801     // guest_ctrl->guest_ctrl.V_TPR = ((info->ctrl_regs.apic_tpr) >> 4) & 0xf;
802
803     //guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
804     // 
805     
806     guest_state->rflags = info->ctrl_regs.rflags;
807
808     // LMA ,LME, SVE?
809
810     guest_state->efer = info->ctrl_regs.efer;
811     
812     /* Synchronize MSRs */
813     guest_state->star = info->msrs.star;
814     guest_state->lstar = info->msrs.lstar;
815     guest_state->sfmask = info->msrs.sfmask;
816     guest_state->KernelGsBase = info->msrs.kern_gs_base;
817
818     guest_state->cpl = info->cpl;
819
820     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
821
822     guest_state->rax = info->vm_regs.rax;
823     guest_state->rip = info->rip;
824     guest_state->rsp = info->vm_regs.rsp;
825
826     V3_FP_ENTRY_RESTORE(info);
827
828 #ifdef V3_CONFIG_SYMCALL
829     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
830         update_irq_entry_state(info);
831     }
832 #else 
833
834     update_irq_entry_state(info);
835 #endif
836
837 #ifdef V3_CONFIG_TM_FUNC
838     v3_tm_check_intr_state(info, guest_ctrl, guest_state);
839 #endif
840
841
842     /* ** */
843
844     /*
845       PrintDebug(info->vm_info, info, "SVM Entry to CS=%p  rip=%p...\n", 
846       (void *)(addr_t)info->segments.cs.base, 
847       (void *)(addr_t)info->rip);
848     */
849
850 #ifdef V3_CONFIG_SYMCALL
851     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
852         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
853             V3_Print(info->vm_info, info, "!!! Injecting Interrupt during Sym call !!!\n");
854         }
855     }
856 #endif
857
858     v3_svm_config_tsc_virtualization(info);
859
860     //V3_Print(info->vm_info, info, "Calling v3_svm_launch\n");
861     {   
862         uint64_t entry_tsc = 0;
863         uint64_t exit_tsc = 0;
864         
865 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
866         v3_pwrstat_telemetry_enter(info);
867 #endif
868
869 #ifdef V3_CONFIG_PMU_TELEMETRY
870         v3_pmu_telemetry_enter(info);
871 #endif
872
873
874         rdtscll(entry_tsc);
875
876         v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
877
878         rdtscll(exit_tsc);
879
880 #ifdef V3_CONFIG_PMU_TELEMETRY
881         v3_pmu_telemetry_exit(info);
882 #endif
883
884 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
885         v3_pwrstat_telemetry_exit(info);
886 #endif
887
888         guest_cycles = exit_tsc - entry_tsc;
889     }
890
891
892     //V3_Print(info->vm_info, info, "SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
893
894     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
895
896     v3_advance_time(info, &guest_cycles);
897
898     info->num_exits++;
899
900     V3_FP_EXIT_SAVE(info);
901
902     // Save Guest state from VMCB
903     info->rip = guest_state->rip;
904     info->vm_regs.rsp = guest_state->rsp;
905     info->vm_regs.rax = guest_state->rax;
906
907     info->cpl = guest_state->cpl;
908
909     info->ctrl_regs.cr0 = guest_state->cr0;
910     info->ctrl_regs.cr2 = guest_state->cr2;
911     info->ctrl_regs.cr3 = guest_state->cr3;
912     info->ctrl_regs.cr4 = guest_state->cr4;
913     info->dbg_regs.dr6 = guest_state->dr6;
914     info->dbg_regs.dr7 = guest_state->dr7;
915     //
916     // We do not track this anymore
917     // V_TPR is ignored and we do the logic in the APIC
918     //info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
919     //
920     info->ctrl_regs.rflags = guest_state->rflags;
921     info->ctrl_regs.efer = guest_state->efer;
922     
923     /* Synchronize MSRs */
924     info->msrs.star =  guest_state->star;
925     info->msrs.lstar = guest_state->lstar;
926     info->msrs.sfmask = guest_state->sfmask;
927     info->msrs.kern_gs_base = guest_state->KernelGsBase;
928
929     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
930     info->cpu_mode = v3_get_vm_cpu_mode(info);
931     info->mem_mode = v3_get_vm_mem_mode(info);
932     /* ** */
933
934     // save exit info here
935     exit_code = guest_ctrl->exit_code;
936     exit_info1 = guest_ctrl->exit_info1;
937     exit_info2 = guest_ctrl->exit_info2;
938
939 #ifdef V3_CONFIG_SYMCALL
940     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
941         update_irq_exit_state(info);
942     }
943 #else
944     update_irq_exit_state(info);
945 #endif
946
947     // reenable global interrupts after vm exit
948     v3_stgi();
949
950     // Conditionally yield the CPU if the timeslice has expired
951     v3_schedule(info);
952
953     // This update timers is for time-dependent handlers
954     // if we're slaved to host time
955     v3_advance_time(info, NULL);
956     v3_update_timers(info);
957
958
959     {
960         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
961         
962         if (ret != 0) {
963             PrintError(info->vm_info, info, "Error in SVM exit handler (ret=%d)\n", ret);
964             PrintError(info->vm_info, info, "  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
965
966             return -1;
967         }
968     }
969
970
971     if (info->timeouts.timeout_active) {
972         /* Check to see if any timeouts have expired */
973         v3_handle_timeouts(info, guest_cycles);
974     }
975
976 #ifdef V3_CONFIG_MEM_TRACK
977     v3_mem_track_exit(info);
978 #endif 
979
980
981     return 0;
982 }
983
984 int v3_start_svm_guest(struct guest_info * info) {
985
986     int started=0;
987
988     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
989     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
990
991     PrintDebug(info->vm_info, info, "Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
992
993
994 #ifdef V3_CONFIG_MULTIBOOT
995     if (v3_setup_multiboot_core_for_boot(info)) { 
996         PrintError(info->vm_info, info, "Failed to setup Multiboot core...\n");
997         return -1;
998     }
999 #endif
1000
1001 #ifdef V3_CONFIG_HVM
1002     if (v3_setup_hvm_hrt_core_for_boot(info)) { 
1003         PrintError(info->vm_info, info, "Failed to setup HRT core...\n");
1004         return -1;
1005     } 
1006 #endif
1007  
1008     while (1) {
1009
1010         if (info->core_run_state == CORE_STOPPED) {
1011
1012             if (info->vcpu_id == 0) {
1013                 info->core_run_state = CORE_RUNNING;
1014             } else  { 
1015                 PrintDebug(info->vm_info, info, "SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
1016
1017                 V3_NO_WORK(info);
1018
1019                 // Compiler must not optimize away this read
1020                 while (*((volatile int *)(&info->core_run_state)) == CORE_STOPPED) {
1021                     
1022                     if (info->vm_info->run_state == VM_STOPPED) {
1023                         // The VM was stopped before this core was initialized. 
1024                         return 0;
1025                     }
1026                     
1027                     V3_STILL_NO_WORK(info);
1028
1029                     //PrintDebug(info->vm_info, info, "SVM core %u: still waiting for INIT\n", info->vcpu_id);
1030                 }
1031
1032                 V3_HAVE_WORK_AGAIN(info);
1033                 
1034                 PrintDebug(info->vm_info, info, "SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
1035                 
1036                 // We'll be paranoid about race conditions here
1037                 v3_wait_at_barrier(info);
1038             } 
1039         }
1040
1041         if (!started) {
1042
1043             started=1;
1044             
1045             PrintDebug(info->vm_info, info, "SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
1046                        info->vcpu_id, info->pcpu_id, 
1047                        info->segments.cs.selector, (void *)(info->segments.cs.base), 
1048                        info->segments.cs.limit, (void *)(info->rip));
1049             
1050             
1051             
1052             PrintDebug(info->vm_info, info, "SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
1053                        info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
1054
1055 #ifdef V3_CONFIG_DEBUG_SVM
1056             PrintDebugVMCB((vmcb_t*)(info->vmm_data));
1057 #endif
1058             
1059             v3_start_time(info);
1060         }
1061         
1062         if (info->vm_info->run_state == VM_STOPPED) {
1063             info->core_run_state = CORE_STOPPED;
1064             break;
1065         }
1066         
1067         
1068 #ifdef V3_CONFIG_HVM
1069         if (v3_handle_hvm_reset(info) > 0) { 
1070             continue;
1071         }
1072 #endif
1073        
1074 #ifdef V3_CONFIG_MULTIBOOT
1075         if (v3_handle_multiboot_reset(info) > 0) {
1076             continue;
1077         }
1078 #endif
1079         
1080         if (svm_handle_standard_reset(info) > 0 ) {
1081             continue;
1082         }
1083         
1084
1085
1086 #ifdef V3_CONFIG_PMU_TELEMETRY
1087         v3_pmu_telemetry_start(info);
1088 #endif
1089         
1090 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1091         v3_pwrstat_telemetry_start(info);
1092 #endif
1093         
1094         if (v3_svm_enter(info) == -1 ) {
1095             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
1096             addr_t host_addr;
1097             addr_t linear_addr = 0;
1098             
1099             info->vm_info->run_state = VM_ERROR;
1100             
1101             V3_Print(info->vm_info, info, "SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
1102             
1103             v3_print_guest_state(info);
1104             
1105             V3_Print(info->vm_info, info, "SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
1106             
1107             V3_Print(info->vm_info, info, "SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
1108             V3_Print(info->vm_info, info, "SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
1109             
1110             V3_Print(info->vm_info, info, "SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
1111             V3_Print(info->vm_info, info, "SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
1112             
1113             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1114             
1115             if (info->mem_mode == PHYSICAL_MEM) {
1116                 v3_gpa_to_hva(info, linear_addr, &host_addr);
1117             } else if (info->mem_mode == VIRTUAL_MEM) {
1118                 v3_gva_to_hva(info, linear_addr, &host_addr);
1119             }
1120             
1121             V3_Print(info->vm_info, info, "SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1122             
1123             V3_Print(info->vm_info, info, "SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1124             v3_dump_mem((uint8_t *)host_addr, 15);
1125             
1126             v3_print_stack(info);
1127             
1128             break;
1129         }
1130         
1131         v3_wait_at_barrier(info);
1132         
1133
1134         if (info->vm_info->run_state == VM_STOPPED) {
1135             info->core_run_state = CORE_STOPPED;
1136             break;
1137         }
1138
1139         
1140
1141 /*
1142         if ((info->num_exits % 50000) == 0) {
1143             V3_Print(info->vm_info, info, "SVM Exit number %d\n", (uint32_t)info->num_exits);
1144             v3_print_guest_state(info);
1145         }
1146 */
1147         
1148     }
1149
1150 #ifdef V3_CONFIG_PMU_TELEMETRY
1151     v3_pmu_telemetry_end(info);
1152 #endif
1153
1154 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1155     v3_pwrstat_telemetry_end(info);
1156 #endif
1157     // Need to take down the other cores on error... 
1158
1159     return 0;
1160 }
1161
1162
1163
1164
1165 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
1166     // init vmcb_bios
1167
1168     // Write the RIP, CS, and descriptor
1169     // assume the rest is already good to go
1170     //
1171     // vector VV -> rip at 0
1172     //              CS = VV00
1173     //  This means we start executing at linear address VV000
1174     //
1175     // So the selector needs to be VV00
1176     // and the base needs to be VV000
1177     //
1178     core->rip = 0;
1179     core->segments.cs.selector = rip << 8;
1180     core->segments.cs.limit = 0xffff;
1181     core->segments.cs.base = rip << 12;
1182
1183     return 0;
1184 }
1185
1186
1187
1188
1189
1190
1191 /* Checks machine SVM capability */
1192 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
1193 int v3_is_svm_capable() {
1194     uint_t vm_cr_low = 0, vm_cr_high = 0;
1195     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1196
1197     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1198   
1199     PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
1200
1201     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
1202       V3_Print(VM_NONE, VCORE_NONE,  "SVM Not Available\n");
1203       return 0;
1204     }  else {
1205         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
1206         
1207         PrintDebug(VM_NONE, VCORE_NONE, "SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
1208         
1209         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
1210             V3_Print(VM_NONE, VCORE_NONE, "SVM is available but is disabled.\n");
1211             
1212             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1213             
1214             PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
1215             
1216             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
1217                 V3_Print(VM_NONE, VCORE_NONE,  "SVM BIOS Disabled, not unlockable\n");
1218             } else {
1219                 V3_Print(VM_NONE, VCORE_NONE,  "SVM is locked with a key\n");
1220             }
1221             return 0;
1222
1223         } else {
1224             V3_Print(VM_NONE, VCORE_NONE,  "SVM is available and  enabled.\n");
1225
1226             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1227             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
1228             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
1229             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
1230             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
1231
1232             return 1;
1233         }
1234     }
1235 }
1236
1237 static int has_svm_nested_paging() {
1238     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1239     
1240     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1241     
1242     //PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
1243     
1244     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
1245         V3_Print(VM_NONE, VCORE_NONE, "SVM Nested Paging not supported\n");
1246         return 0;
1247     } else {
1248         V3_Print(VM_NONE, VCORE_NONE, "SVM Nested Paging supported\n");
1249         return 1;
1250     }
1251  }
1252  
1253
1254
1255 void v3_init_svm_cpu(int cpu_id) {
1256     reg_ex_t msr;
1257     extern v3_cpu_arch_t v3_cpu_types[];
1258
1259     // Enable SVM on the CPU
1260     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
1261     msr.e_reg.low |= EFER_MSR_svm_enable;
1262     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
1263
1264     V3_Print(VM_NONE, VCORE_NONE,  "SVM Enabled\n");
1265
1266     // Setup the host state save area
1267     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4); // need not be shadow-safe, not exposed to guest
1268
1269     if (!host_vmcbs[cpu_id]) {
1270         PrintError(VM_NONE, VCORE_NONE,  "Failed to allocate VMCB\n");
1271         return;
1272     }
1273
1274     /* 64-BIT-ISSUE */
1275     //  msr.e_reg.high = 0;
1276     //msr.e_reg.low = (uint_t)host_vmcb;
1277     msr.r_reg = host_vmcbs[cpu_id];
1278
1279     PrintDebug(VM_NONE, VCORE_NONE,  "Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
1280     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
1281
1282
1283     if (has_svm_nested_paging() == 1) {
1284         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
1285     } else {
1286         v3_cpu_types[cpu_id] = V3_SVM_CPU;
1287     }
1288 }
1289
1290
1291
1292 void v3_deinit_svm_cpu(int cpu_id) {
1293     reg_ex_t msr;
1294     extern v3_cpu_arch_t v3_cpu_types[];
1295
1296     // reset SVM_VM_HSAVE_PA_MSR
1297     // Does setting it to NULL disable??
1298     msr.r_reg = 0;
1299     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
1300
1301     // Disable SVM?
1302     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
1303     msr.e_reg.low &= ~EFER_MSR_svm_enable;
1304     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
1305
1306     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1307
1308     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
1309
1310     V3_Print(VM_NONE, VCORE_NONE,  "Host CPU %d host area freed, and SVM disabled\n", cpu_id);
1311     return;
1312 }
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363 #if 0
1364 /* 
1365  * Test VMSAVE/VMLOAD Latency 
1366  */
1367 #define vmsave ".byte 0x0F,0x01,0xDB ; "
1368 #define vmload ".byte 0x0F,0x01,0xDA ; "
1369 {
1370     uint32_t start_lo, start_hi;
1371     uint32_t end_lo, end_hi;
1372     uint64_t start, end;
1373     
1374     __asm__ __volatile__ (
1375                           "rdtsc ; "
1376                           "movl %%eax, %%esi ; "
1377                           "movl %%edx, %%edi ; "
1378                           "movq  %%rcx, %%rax ; "
1379                           vmsave
1380                           "rdtsc ; "
1381                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1382                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1383                           );
1384     
1385     start = start_hi;
1386     start <<= 32;
1387     start += start_lo;
1388     
1389     end = end_hi;
1390     end <<= 32;
1391     end += end_lo;
1392     
1393     PrintDebug(core->vm_info, core, "VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1394     
1395     __asm__ __volatile__ (
1396                           "rdtsc ; "
1397                           "movl %%eax, %%esi ; "
1398                           "movl %%edx, %%edi ; "
1399                           "movq  %%rcx, %%rax ; "
1400                           vmload
1401                           "rdtsc ; "
1402                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1403                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1404                               );
1405         
1406         start = start_hi;
1407         start <<= 32;
1408         start += start_lo;
1409
1410         end = end_hi;
1411         end <<= 32;
1412         end += end_lo;
1413
1414
1415         PrintDebug(core->vm_info, core, "VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1416     }
1417     /* End Latency Test */
1418
1419 #endif
1420
1421
1422
1423
1424
1425
1426