Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


HVM capability enhancement: asynchronous upcalls to ROS userspace
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *         Peter Dinda <jarusl@cs.northwestern.edu> (Reset)
16  *
17  * This is free software.  You are permitted to use,
18  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
19  */
20
21
22
23 #include <palacios/svm.h>
24 #include <palacios/vmm.h>
25
26 #include <palacios/vmcb.h>
27 #include <palacios/vmm_mem.h>
28 #include <palacios/vmm_paging.h>
29 #include <palacios/svm_handler.h>
30
31 #include <palacios/vmm_debug.h>
32 #include <palacios/vm_guest_mem.h>
33
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_string.h>
36 #include <palacios/vmm_lowlevel.h>
37 #include <palacios/svm_msr.h>
38
39 #include <palacios/vmm_rbtree.h>
40 #include <palacios/vmm_barrier.h>
41 #include <palacios/vmm_debug.h>
42
43 #include <palacios/vmm_perftune.h>
44
45 #include <palacios/vmm_bios.h>
46
47
48 #ifdef V3_CONFIG_CHECKPOINT
49 #include <palacios/vmm_checkpoint.h>
50 #endif
51
52 #include <palacios/vmm_direct_paging.h>
53
54 #include <palacios/vmm_ctrl_regs.h>
55 #include <palacios/svm_io.h>
56
57 #include <palacios/vmm_sprintf.h>
58
59 #ifdef V3_CONFIG_MEM_TRACK
60 #include <palacios/vmm_mem_track.h>
61 #endif 
62
63 #ifdef V3_CONFIG_TM_FUNC
64 #include <extensions/trans_mem.h>
65 #endif
66
67 #ifndef V3_CONFIG_DEBUG_SVM
68 #undef PrintDebug
69 #define PrintDebug(fmt, args...)
70 #endif
71
72
73
74 uint32_t v3_last_exit;
75
76 // This is a global pointer to the host's VMCB
77 // These are physical addresses
78 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
79
80
81
82 extern void v3_stgi();
83 extern void v3_clgi();
84 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
85 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
86
87
88
89 static vmcb_t * Allocate_VMCB() {
90     vmcb_t * vmcb_page = NULL;
91     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);   // need not be shadow safe, not exposed to guest
92
93     if ((void *)vmcb_pa == NULL) {
94       PrintError(VM_NONE, VCORE_NONE, "Error allocating VMCB\n");
95         return NULL;
96     }
97
98     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
99
100     memset(vmcb_page, 0, 4096);
101
102     return vmcb_page;
103 }
104
105
106 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
107 {
108     int status;
109
110     // Call arch-independent handler
111     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
112         return status;
113     }
114
115     // SVM-specific code
116     {
117         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
118         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
119         hw_efer->svme = 1;
120     }
121
122     return 0;
123 }
124
125 /*
126  * This is invoked both on an initial boot and on a reset
127  * 
128  * The difference is that on a reset we will not rehook anything
129  *
130  */
131
132 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
133     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
134     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
135     uint_t i;
136
137     if (core->core_run_state!=CORE_INVALID && core->core_run_state!=CORE_RESETTING) { 
138         PrintError(core->vm_info, core, "Atempt to Init_VMCB_BIOS in invalid state (%d)\n",core->core_run_state);
139         return;
140     }
141
142     // need to invalidate any shadow page tables early
143     if (core->shdw_pg_mode == SHADOW_PAGING && core->core_run_state==CORE_RESETTING) {
144         if (v3_get_vm_cpu_mode(core) != REAL) {
145             if (v3_invalidate_shadow_pts(core) == -1) {
146                 PrintError(core->vm_info,core,"Could not invalidate shadow page tables\n");
147                 return;
148             }
149         }
150     }
151
152     // Guarantee we are starting from a clean slate
153     // even on a reset
154     memset(vmcb,0,4096);
155
156     ctrl_area->svm_instrs.VMRUN = 1;
157     ctrl_area->svm_instrs.VMMCALL = 1;
158     ctrl_area->svm_instrs.VMLOAD = 1;
159     ctrl_area->svm_instrs.VMSAVE = 1;
160     ctrl_area->svm_instrs.STGI = 1;
161     ctrl_area->svm_instrs.CLGI = 1;
162     ctrl_area->svm_instrs.SKINIT = 1; // secure startup... why
163     ctrl_area->svm_instrs.ICEBP = 1;  // in circuit emulator breakpoint
164     ctrl_area->svm_instrs.WBINVD = 1; // write back and invalidate caches... why?
165     ctrl_area->svm_instrs.MONITOR = 1;
166     ctrl_area->svm_instrs.MWAIT_always = 1;
167     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
168     ctrl_area->instrs.INVLPGA = 1;   // invalidate page in asid... AMD ERRATA
169     ctrl_area->instrs.CPUID = 1;
170
171     ctrl_area->instrs.HLT = 1;
172
173     /* Set at VMM launch as needed */
174     ctrl_area->instrs.RDTSC = 0;
175     ctrl_area->svm_instrs.RDTSCP = 0;
176
177
178 #ifdef V3_CONFIG_TM_FUNC
179     v3_tm_set_excp_intercepts(ctrl_area);
180 #endif
181     
182
183     ctrl_area->instrs.NMI = 1;
184     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
185     ctrl_area->instrs.INIT = 1;
186     //    ctrl_area->instrs.PAUSE = 1;    // do not care as does not halt
187     ctrl_area->instrs.shutdown_evts = 1;
188
189
190     /* DEBUG FOR RETURN CODE */
191     ctrl_area->exit_code = 1;
192
193
194     /* Setup Guest Machine state */
195
196     memset(&core->vm_regs,0,sizeof(core->vm_regs));
197     memset(&core->ctrl_regs,0,sizeof(core->ctrl_regs));
198     memset(&core->dbg_regs,0,sizeof(core->dbg_regs));
199     memset(&core->segments,0,sizeof(core->segments));    
200     memset(&core->msrs,0,sizeof(core->msrs));    
201     memset(&core->fp_state,0,sizeof(core->fp_state));    
202
203     // reset interrupts
204     core->intr_core_state.irq_pending=0; 
205     core->intr_core_state.irq_started=0; 
206     core->intr_core_state.swintr_posted=0; 
207
208     // reset exceptions
209     core->excp_state.excp_pending=0;
210
211     // reset of gprs to expected values at init
212     core->vm_regs.rsp = 0x00;
213     core->rip = 0xfff0;
214     core->vm_regs.rdx = 0x00000f00;  // family/stepping/etc
215
216     
217     core->cpl = 0;
218
219     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
220
221     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
222     core->shdw_pg_state.guest_cr0 = core->ctrl_regs.cr0;
223
224     // cr3 zeroed above
225     core->shdw_pg_state.guest_cr3 = core->ctrl_regs.cr3;
226     // cr4 zeroed above
227     core->shdw_pg_state.guest_cr4 = core->ctrl_regs.cr4;
228
229     core->ctrl_regs.efer |= EFER_MSR_svm_enable ;
230     core->shdw_pg_state.guest_efer.value = core->ctrl_regs.efer;
231
232     core->segments.cs.selector = 0xf000;
233     core->segments.cs.limit = 0xffff;
234     core->segments.cs.base = 0x0000f0000LL;
235
236     // (raw attributes = 0xf3)
237     core->segments.cs.type = 0xa;
238     core->segments.cs.system = 0x1;
239     core->segments.cs.dpl = 0x0;
240     core->segments.cs.present = 1;
241
242
243
244     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
245                                       &(core->segments.es), &(core->segments.fs), 
246                                       &(core->segments.gs), NULL};
247
248     for ( i = 0; segregs[i] != NULL; i++) {
249         struct v3_segment * seg = segregs[i];
250         
251         seg->selector = 0x0000;
252         //    seg->base = seg->selector << 4;
253         seg->base = 0x00000000;
254         seg->limit = 0xffff;
255
256         // (raw attributes = 0xf3)
257         seg->type = 0x2;
258         seg->system = 0x1;
259         seg->dpl = 0x0;
260         seg->present = 1;
261     }
262
263     core->segments.gdtr.selector = 0x0000;
264     core->segments.gdtr.limit = 0x0000ffff;
265     core->segments.gdtr.base = 0x0000000000000000LL;
266     core->segments.gdtr.dpl = 0x0;
267
268     core->segments.idtr.selector = 0x0000; 
269     core->segments.idtr.limit = 0x0000ffff;
270     core->segments.idtr.base = 0x0000000000000000LL;
271     core->segments.ldtr.limit = 0x0000ffff;
272     core->segments.ldtr.base = 0x0000000000000000LL;
273     core->segments.ldtr.system = 0;
274     core->segments.ldtr.type = 0x2;
275     core->segments.ldtr.dpl = 0x0;
276
277     core->segments.tr.selector = 0x0000;
278     core->segments.tr.limit = 0x0000ffff;
279     core->segments.tr.base = 0x0000000000000000LL;
280     core->segments.tr.system = 0;
281     core->segments.tr.type = 0x3;
282     core->segments.tr.dpl = 0x0;
283
284     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
285     core->dbg_regs.dr7 = 0x0000000000000400LL;
286
287
288     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
289     ctrl_area->instrs.IOIO_PROT = 1;
290             
291     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
292     ctrl_area->instrs.MSR_PROT = 1;   
293
294
295     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
296     ctrl_area->instrs.INTR = 1;
297     // The above also assures the TPR changes (CR8) are only virtual
298
299
300     // However, we need to see TPR writes since they will
301     // affect the virtual apic
302     // we reflect out cr8 to ctrl_regs->apic_tpr
303     ctrl_area->cr_reads.cr8 = 1;
304     ctrl_area->cr_writes.cr8 = 1;
305     // We will do all TPR comparisons in the virtual apic
306     // We also do not want the V_TPR to be able to mask the PIC
307     ctrl_area->guest_ctrl.V_IGN_TPR = 1;
308
309     
310
311     if (core->core_run_state == CORE_INVALID) { 
312         v3_hook_msr(core->vm_info, EFER_MSR, 
313                     &v3_handle_efer_read,
314                     &v3_svm_handle_efer_write, 
315                     core);
316     }
317
318     if (core->shdw_pg_mode == SHADOW_PAGING) {
319         
320         /* JRL: This is a performance killer, and a simplistic solution */
321         /* We need to fix this */
322         ctrl_area->TLB_CONTROL = 1;
323         ctrl_area->guest_ASID = 1;
324         
325
326         if (core->core_run_state == CORE_INVALID) { 
327             if (v3_init_passthrough_pts(core) == -1) {
328                 PrintError(core->vm_info, core, "Could not initialize passthrough page tables\n");
329                 return ;
330             }
331             // the shadow page tables are OK since we have not initialized hem yet
332         } else {
333             // CORE_RESETTING
334             // invalidation of shadow page tables happened earlier in this function
335         }
336
337         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
338         
339         core->ctrl_regs.cr0 |= 0x80000000;
340
341         v3_activate_passthrough_pt(core);
342
343         ctrl_area->cr_reads.cr0 = 1;
344         ctrl_area->cr_writes.cr0 = 1;
345         //intercept cr4 read so shadow pager can use PAE independently of guest
346         ctrl_area->cr_reads.cr4 = 1;
347         ctrl_area->cr_writes.cr4 = 1;
348         ctrl_area->cr_reads.cr3 = 1;
349         ctrl_area->cr_writes.cr3 = 1;
350
351
352         ctrl_area->instrs.INVLPG = 1;
353
354         ctrl_area->exceptions.pf = 1;
355
356         guest_state->g_pat = 0x7040600070406ULL;
357
358
359     } else if (core->shdw_pg_mode == NESTED_PAGING) {
360         // Flush the TLB on entries/exits
361         ctrl_area->TLB_CONTROL = 1;
362         ctrl_area->guest_ASID = 1;
363
364         // Enable Nested Paging
365         ctrl_area->NP_ENABLE = 1;
366
367         // Set the Nested Page Table pointer
368         if (core->core_run_state == CORE_INVALID) { 
369             if (v3_init_passthrough_pts(core) == -1) {
370                 PrintError(core->vm_info, core, "Could not initialize Nested page tables\n");
371                 return ;
372             }
373         } else {
374             // the existing nested page tables will work fine
375         }
376
377         ctrl_area->N_CR3 = core->direct_map_pt;
378
379         guest_state->g_pat = 0x7040600070406ULL;
380     }
381     
382     /* tell the guest that we don't support SVM */
383     if (core->core_run_state == CORE_INVALID) { 
384         v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
385                     &v3_handle_vm_cr_read,
386                     &v3_handle_vm_cr_write, 
387                     core);
388     }
389
390     if (core->core_run_state == CORE_INVALID) { 
391 #define INT_PENDING_AMD_MSR             0xc0010055
392
393         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
394         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
395         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
396         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
397         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
398
399         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
400         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
401         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
402
403
404         v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
405         v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
406
407         // Passthrough read operations are ok.
408         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
409     }
410
411
412 }
413
414
415 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
416
417     PrintDebug(core->vm_info, core, "Allocating VMCB\n");
418     core->vmm_data = (void *)Allocate_VMCB();
419     
420     if (core->vmm_data == NULL) {
421         PrintError(core->vm_info, core, "Could not allocate VMCB, Exiting...\n");
422         return -1;
423     }
424
425     if (vm_class == V3_PC_VM) {
426         PrintDebug(core->vm_info, core, "Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
427         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
428     } else {
429         PrintError(core->vm_info, core, "Invalid VM class\n");
430         return -1;
431     }
432
433     core->core_run_state = CORE_STOPPED;
434
435     return 0;
436 }
437
438
439 int v3_deinit_svm_vmcb(struct guest_info * core) {
440     if (core && core->vmm_data) { 
441         V3_FreePages(V3_PAddr(core->vmm_data), 1);
442     }
443     return 0;
444 }
445
446
447 static int svm_handle_standard_reset(struct guest_info *core)
448 {
449     if (core->core_run_state != CORE_RESETTING) { 
450         return 0;
451     }
452
453     PrintDebug(core->vm_info,core,"Handling standard reset (guest state before follows)\n");
454
455 #ifdef V3_CONFIG_DEBUG_SVM
456     v3_print_guest_state(core);
457 #endif
458
459     // wait until all resetting cores get here (ROS or whole VM)
460     v3_counting_barrier(&core->vm_info->reset_barrier);
461
462     // I could be a ROS core, or I could be in a non-HVM 
463     // either way, if I'm core 0, I'm the leader
464     if (core->vcpu_id==0) {
465         uint64_t mem_size=core->vm_info->mem_size;
466
467 #ifdef V3_CONFIG_HVM
468         // on a ROS reset, we should only 
469         // manipulate the part of the memory seen by
470         // the ROS
471         if (core->vm_info->hvm_state.is_hvm) { 
472             mem_size=v3_get_hvm_ros_memsize(core->vm_info);
473         }
474 #endif
475         core->vm_info->run_state = VM_RESETTING;
476         // copy bioses again because some, 
477         // like seabios, assume
478         // this should also blow away the BDA and EBDA
479         PrintDebug(core->vm_info,core,"Clear memory (%p bytes)\n",(void*)core->vm_info->mem_size);
480         if (v3_set_gpa_memory(core, 0, mem_size, 0)!=mem_size) { 
481             PrintError(core->vm_info,core,"Clear of memory failed\n");
482         }
483         PrintDebug(core->vm_info,core,"Copying bioses\n");
484         if (v3_setup_bioses(core->vm_info, core->vm_info->cfg_data->cfg)) { 
485             PrintError(core->vm_info,core,"Setup of bioses failed\n");
486         }
487     }
488
489     Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
490
491     PrintDebug(core->vm_info,core,"InitVMCB done\n");
492
493     core->cpl = 0;
494     core->cpu_mode = REAL;
495     core->mem_mode = PHYSICAL_MEM;
496     //core->num_exits=0;
497
498     PrintDebug(core->vm_info,core,"Machine reset to REAL/PHYSICAL\n");
499
500     memset(V3_VAddr((void*)(host_vmcbs[V3_Get_CPU()])),0,4096*4); // good measure...
501
502     // core zero will be restarted by the main execution loop
503     core->core_run_state = CORE_STOPPED;
504
505     if (core->vcpu_id==0) { 
506         core->vm_info->run_state = VM_RUNNING;
507     } 
508
509 #ifdef V3_CONFIG_DEBUG_SVM
510     PrintDebug(core->vm_info,core,"VMCB state at end of reset\n");
511     PrintDebugVMCB((vmcb_t*)(core->vmm_data));
512     PrintDebug(core->vm_info,core,"Guest state at end of reset\n");
513     v3_print_guest_state(core);
514 #endif
515
516     // wait until we are all ready to go
517     v3_counting_barrier(&core->vm_info->reset_barrier);
518
519     PrintDebug(core->vm_info,core,"Returning with request for recycle loop\n");
520
521     return 1; // reboot is occuring
522
523 }
524
525 #ifdef V3_CONFIG_CHECKPOINT
526 int v3_svm_save_core(struct guest_info * core, void * ctx){
527
528   vmcb_saved_state_t * guest_area = GET_VMCB_SAVE_STATE_AREA(core->vmm_data); 
529
530   // Special case saves of data we need immediate access to
531   // in some cases
532   V3_CHKPT_SAVE(ctx, "CPL", core->cpl, failout);
533   V3_CHKPT_SAVE(ctx,"STAR", guest_area->star, failout); 
534   V3_CHKPT_SAVE(ctx,"CSTAR", guest_area->cstar, failout); 
535   V3_CHKPT_SAVE(ctx,"LSTAR", guest_area->lstar, failout); 
536   V3_CHKPT_SAVE(ctx,"SFMASK", guest_area->sfmask, failout); 
537   V3_CHKPT_SAVE(ctx,"KERNELGSBASE", guest_area->KernelGsBase, failout); 
538   V3_CHKPT_SAVE(ctx,"SYSENTER_CS", guest_area->sysenter_cs, failout); 
539   V3_CHKPT_SAVE(ctx,"SYSENTER_ESP", guest_area->sysenter_esp, failout); 
540   V3_CHKPT_SAVE(ctx,"SYSENTER_EIP", guest_area->sysenter_eip, failout); 
541   
542 // and then we save the whole enchilada
543   if (v3_chkpt_save(ctx, "VMCB_DATA", PAGE_SIZE, core->vmm_data)) { 
544     PrintError(core->vm_info, core, "Could not save SVM vmcb\n");
545     goto failout;
546   }
547   
548   return 0;
549
550  failout:
551   PrintError(core->vm_info, core, "Failed to save SVM state for core\n");
552   return -1;
553
554 }
555
556 int v3_svm_load_core(struct guest_info * core, void * ctx){
557     
558
559   vmcb_saved_state_t * guest_area = GET_VMCB_SAVE_STATE_AREA(core->vmm_data); 
560
561   // Reload what we special cased, which we will overwrite in a minute
562   V3_CHKPT_LOAD(ctx, "CPL", core->cpl, failout);
563   V3_CHKPT_LOAD(ctx,"STAR", guest_area->star, failout); 
564   V3_CHKPT_LOAD(ctx,"CSTAR", guest_area->cstar, failout); 
565   V3_CHKPT_LOAD(ctx,"LSTAR", guest_area->lstar, failout); 
566   V3_CHKPT_LOAD(ctx,"SFMASK", guest_area->sfmask, failout); 
567   V3_CHKPT_LOAD(ctx,"KERNELGSBASE", guest_area->KernelGsBase, failout); 
568   V3_CHKPT_LOAD(ctx,"SYSENTER_CS", guest_area->sysenter_cs, failout); 
569   V3_CHKPT_LOAD(ctx,"SYSENTER_ESP", guest_area->sysenter_esp, failout); 
570   V3_CHKPT_LOAD(ctx,"SYSENTER_EIP", guest_area->sysenter_eip, failout); 
571   
572   // and then we load the whole enchilada
573   if (v3_chkpt_load(ctx, "VMCB_DATA", PAGE_SIZE, core->vmm_data)) { 
574     PrintError(core->vm_info, core, "Could not load SVM vmcb\n");
575     goto failout;
576   }
577   
578   return 0;
579
580  failout:
581   PrintError(core->vm_info, core, "Failed to save SVM state for core\n");
582   return -1;
583
584 }
585 #endif
586
587 static int update_irq_exit_state(struct guest_info * info) {
588     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
589
590     // Fix for QEMU bug using EVENTINJ as an internal cache
591     guest_ctrl->EVENTINJ.valid = 0;
592
593     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
594         
595 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
596         PrintDebug(info->vm_info, info, "INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
597 #endif
598
599         info->intr_core_state.irq_started = 1;
600         info->intr_core_state.irq_pending = 0;
601
602         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
603     }
604
605     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
606 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
607         PrintDebug(info->vm_info, info, "Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
608 #endif
609
610         // Interrupt was taken fully vectored
611         info->intr_core_state.irq_started = 0;
612
613     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
614 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
615         PrintDebug(info->vm_info, info, "EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
616 #endif
617     }
618
619     return 0;
620 }
621
622
623 static int update_irq_entry_state(struct guest_info * info) {
624     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
625
626     if (guest_ctrl->exit_int_info.valid) {
627         // We need to complete the previous injection
628         guest_ctrl->EVENTINJ = guest_ctrl->exit_int_info;
629
630         PrintDebug(info->vm_info,info,"Continuing injection of event - eventinj=0x%llx\n",*(uint64_t*)&guest_ctrl->EVENTINJ);
631
632         return 0;
633     }
634
635
636     if (info->intr_core_state.irq_pending == 0) {
637         guest_ctrl->guest_ctrl.V_IRQ = 0;
638         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
639     }
640     
641     if (v3_excp_pending(info)) {
642
643         uint_t excp = v3_get_excp_number(info);
644         
645         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
646         guest_ctrl->EVENTINJ.vector = excp;
647
648         if (info->excp_state.excp_error_code_valid) {
649             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
650             guest_ctrl->EVENTINJ.ev = 1;
651 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
652             PrintDebug(info->vm_info, info, "Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
653 #endif
654         } else {
655             guest_ctrl->EVENTINJ.error_code = 0;
656             guest_ctrl->EVENTINJ.ev = 0;
657         }
658
659         guest_ctrl->EVENTINJ.rsvd = 0;
660         guest_ctrl->EVENTINJ.valid = 1;
661
662 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
663         PrintDebug(info->vm_info, info, "<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
664                    (int)info->num_exits, 
665                    guest_ctrl->EVENTINJ.vector, 
666                    (void *)(addr_t)info->ctrl_regs.cr2,
667                    (void *)(addr_t)info->rip);
668 #endif
669
670         v3_injecting_excp(info, excp);
671
672     } else if (info->intr_core_state.irq_started == 1) {
673
674 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
675         PrintDebug(info->vm_info, info, "IRQ pending from previous injection\n");
676 #endif
677         guest_ctrl->guest_ctrl.V_IRQ = 1;
678         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
679
680         // We ignore the virtual TPR on this injection
681         // TPR/PPR tests have already been done in the APIC.
682         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
683         guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ;  // 0xf;
684
685     } else {
686         switch (v3_intr_pending(info)) {
687             case V3_EXTERNAL_IRQ: {
688                 int irq = v3_get_intr(info); 
689
690                 if (irq<0) {
691                   break;
692                 }
693
694                 guest_ctrl->guest_ctrl.V_IRQ = 1;
695                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
696
697                 // We ignore the virtual TPR on this injection
698                 // TPR/PPR tests have already been done in the APIC.
699                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
700                 guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ;  // 0xf;
701
702 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
703                 PrintDebug(info->vm_info, info, "Injecting Interrupt %d (EIP=%p)\n", 
704                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
705                            (void *)(addr_t)info->rip);
706 #endif
707
708                 info->intr_core_state.irq_pending = 1;
709                 info->intr_core_state.irq_vector = irq;
710
711                 break;
712                 
713             }
714             case V3_NMI:
715 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
716                 PrintDebug(info->vm_info, info, "Injecting NMI\n");
717 #endif
718                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
719                 guest_ctrl->EVENTINJ.ev = 0;
720                 guest_ctrl->EVENTINJ.error_code = 0;
721                 guest_ctrl->EVENTINJ.rsvd = 0;
722                 guest_ctrl->EVENTINJ.valid = 1;
723
724                 break;
725
726             case V3_SOFTWARE_INTR:
727 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
728                 PrintDebug(info->vm_info, info, "Injecting software interrupt --  type: %d, vector: %d\n", 
729                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
730 #endif
731                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
732                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
733                 guest_ctrl->EVENTINJ.ev = 0;
734                 guest_ctrl->EVENTINJ.error_code = 0;
735                 guest_ctrl->EVENTINJ.rsvd = 0;
736                 guest_ctrl->EVENTINJ.valid = 1;
737             
738                 /* reset swintr state */
739                 info->intr_core_state.swintr_posted = 0;
740                 info->intr_core_state.swintr_vector = 0;
741
742                 break;
743             case V3_VIRTUAL_IRQ:
744                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
745                 break;
746
747             case V3_INVALID_INTR:
748             default:
749                 break;
750         }
751         
752     }
753
754     return 0;
755 }
756
757 int 
758 v3_svm_config_tsc_virtualization(struct guest_info * info) {
759     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
760
761
762     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
763         ctrl_area->instrs.RDTSC = 1;
764         ctrl_area->svm_instrs.RDTSCP = 1;
765     } else {
766         ctrl_area->instrs.RDTSC = 0;
767         ctrl_area->svm_instrs.RDTSCP = 0;
768
769         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
770                 ctrl_area->TSC_OFFSET = 0;
771         } else {
772                 ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
773         }
774     }
775     return 0;
776 }
777
778
779
780 /* 
781  * CAUTION and DANGER!!! 
782  * 
783  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
784  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
785  * on its contents will cause things to break. The contents at the time of the exit WILL 
786  * change before the exit handler is executed.
787  */
788 int v3_svm_enter(struct guest_info * info) {
789     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
790     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
791     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
792     uint64_t guest_cycles = 0;
793
794
795     // Conditionally yield the CPU if the timeslice has expired
796     v3_schedule(info);
797
798 #ifdef V3_CONFIG_MEM_TRACK
799     v3_mem_track_entry(info);
800 #endif 
801
802 #ifdef V3_CONFIG_HVM
803     v3_handle_hvm_entry(info);
804 #endif
805
806     // Update timer devices after being in the VM before doing 
807     // IRQ updates, so that any interrupts they raise get seen 
808     // immediately.
809
810     v3_advance_time(info, NULL);
811
812     v3_update_timers(info);
813
814
815     // disable global interrupts for vm state transition
816     v3_clgi();
817
818     // Synchronize the guest state to the VMCB
819     guest_state->cr0 = info->ctrl_regs.cr0;
820     guest_state->cr2 = info->ctrl_regs.cr2;
821     guest_state->cr3 = info->ctrl_regs.cr3;
822     guest_state->cr4 = info->ctrl_regs.cr4;
823     guest_state->dr6 = info->dbg_regs.dr6;
824     guest_state->dr7 = info->dbg_regs.dr7;
825
826     // CR8 is now updated by read/writes and it contains the APIC TPR
827     // the V_TPR should be just the class part of that.
828     // This update is here just for completeness.  We currently
829     // are ignoring V_TPR on all injections and doing the priority logivc
830     // in the APIC.
831     // guest_ctrl->guest_ctrl.V_TPR = ((info->ctrl_regs.apic_tpr) >> 4) & 0xf;
832
833     //guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
834     // 
835     
836     guest_state->rflags = info->ctrl_regs.rflags;
837
838     // LMA ,LME, SVE?
839
840     guest_state->efer = info->ctrl_regs.efer;
841     
842     /* Synchronize MSRs */
843     guest_state->star = info->msrs.star;
844     guest_state->lstar = info->msrs.lstar;
845     guest_state->sfmask = info->msrs.sfmask;
846     guest_state->KernelGsBase = info->msrs.kern_gs_base;
847
848     guest_state->cpl = info->cpl;
849
850     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
851
852     guest_state->rax = info->vm_regs.rax;
853     guest_state->rip = info->rip;
854     guest_state->rsp = info->vm_regs.rsp;
855
856     V3_FP_ENTRY_RESTORE(info);
857
858 #ifdef V3_CONFIG_SYMCALL
859     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
860         update_irq_entry_state(info);
861     }
862 #else 
863
864     update_irq_entry_state(info);
865 #endif
866
867 #ifdef V3_CONFIG_TM_FUNC
868     v3_tm_check_intr_state(info, guest_ctrl, guest_state);
869 #endif
870
871
872     /* ** */
873
874     /*
875       PrintDebug(info->vm_info, info, "SVM Entry to CS=%p  rip=%p...\n", 
876       (void *)(addr_t)info->segments.cs.base, 
877       (void *)(addr_t)info->rip);
878     */
879
880 #ifdef V3_CONFIG_SYMCALL
881     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
882         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
883             V3_Print(info->vm_info, info, "!!! Injecting Interrupt during Sym call !!!\n");
884         }
885     }
886 #endif
887
888     v3_svm_config_tsc_virtualization(info);
889
890     //V3_Print(info->vm_info, info, "Calling v3_svm_launch\n");
891     {   
892         uint64_t entry_tsc = 0;
893         uint64_t exit_tsc = 0;
894         
895 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
896         v3_pwrstat_telemetry_enter(info);
897 #endif
898
899 #ifdef V3_CONFIG_PMU_TELEMETRY
900         v3_pmu_telemetry_enter(info);
901 #endif
902
903
904         if (guest_ctrl->EVENTINJ.valid && guest_ctrl->interrupt_shadow) { 
905 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
906             PrintDebug(info->vm_info,info,"Event injection during an interrupt shadow\n");
907 #endif
908         }
909
910         rdtscll(entry_tsc);
911
912         v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
913
914         rdtscll(exit_tsc);
915
916 #ifdef V3_CONFIG_PMU_TELEMETRY
917         v3_pmu_telemetry_exit(info);
918 #endif
919
920 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
921         v3_pwrstat_telemetry_exit(info);
922 #endif
923
924         guest_cycles = exit_tsc - entry_tsc;
925     }
926
927
928     //V3_Print(info->vm_info, info, "SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
929
930     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
931
932     v3_advance_time(info, &guest_cycles);
933
934     info->num_exits++;
935
936     V3_FP_EXIT_SAVE(info);
937
938     // Save Guest state from VMCB
939     info->rip = guest_state->rip;
940     info->vm_regs.rsp = guest_state->rsp;
941     info->vm_regs.rax = guest_state->rax;
942
943     info->cpl = guest_state->cpl;
944
945     info->ctrl_regs.cr0 = guest_state->cr0;
946     info->ctrl_regs.cr2 = guest_state->cr2;
947     info->ctrl_regs.cr3 = guest_state->cr3;
948     info->ctrl_regs.cr4 = guest_state->cr4;
949     info->dbg_regs.dr6 = guest_state->dr6;
950     info->dbg_regs.dr7 = guest_state->dr7;
951     //
952     // We do not track this anymore
953     // V_TPR is ignored and we do the logic in the APIC
954     //info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
955     //
956     info->ctrl_regs.rflags = guest_state->rflags;
957     info->ctrl_regs.efer = guest_state->efer;
958     
959     /* Synchronize MSRs */
960     info->msrs.star =  guest_state->star;
961     info->msrs.lstar = guest_state->lstar;
962     info->msrs.sfmask = guest_state->sfmask;
963     info->msrs.kern_gs_base = guest_state->KernelGsBase;
964
965     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
966     info->cpu_mode = v3_get_vm_cpu_mode(info);
967     info->mem_mode = v3_get_vm_mem_mode(info);
968     /* ** */
969
970     // save exit info here
971     exit_code = guest_ctrl->exit_code;
972     exit_info1 = guest_ctrl->exit_info1;
973     exit_info2 = guest_ctrl->exit_info2;
974
975 #ifdef V3_CONFIG_SYMCALL
976     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
977         update_irq_exit_state(info);
978     }
979 #else
980     update_irq_exit_state(info);
981 #endif
982
983     // reenable global interrupts after vm exit
984     v3_stgi();
985
986     // Conditionally yield the CPU if the timeslice has expired
987     v3_schedule(info);
988
989     // This update timers is for time-dependent handlers
990     // if we're slaved to host time
991     v3_advance_time(info, NULL);
992     v3_update_timers(info);
993
994
995     {
996         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
997         
998         if (ret != 0) {
999             PrintError(info->vm_info, info, "Error in SVM exit handler (ret=%d)\n", ret);
1000             PrintError(info->vm_info, info, "  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
1001
1002             return -1;
1003         }
1004     }
1005
1006
1007     if (info->timeouts.timeout_active) {
1008         /* Check to see if any timeouts have expired */
1009         v3_handle_timeouts(info, guest_cycles);
1010     }
1011
1012 #ifdef V3_CONFIG_HVM
1013     v3_handle_hvm_exit(info);
1014 #endif 
1015
1016 #ifdef V3_CONFIG_MEM_TRACK
1017     v3_mem_track_exit(info);
1018 #endif 
1019
1020
1021     return 0;
1022 }
1023
1024 int v3_start_svm_guest(struct guest_info * info) {
1025
1026     int started=0;
1027
1028     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
1029     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
1030
1031     PrintDebug(info->vm_info, info, "Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
1032
1033
1034 #ifdef V3_CONFIG_MULTIBOOT
1035     if (v3_setup_multiboot_core_for_boot(info)) { 
1036         PrintError(info->vm_info, info, "Failed to setup Multiboot core...\n");
1037         return -1;
1038     }
1039 #endif
1040
1041 #ifdef V3_CONFIG_HVM
1042     if (v3_setup_hvm_hrt_core_for_boot(info)) { 
1043         PrintError(info->vm_info, info, "Failed to setup HRT core...\n");
1044         return -1;
1045     } 
1046 #endif
1047  
1048     while (1) {
1049
1050         if (info->core_run_state == CORE_STOPPED) {
1051
1052             if (info->vcpu_id == 0) {
1053                 info->core_run_state = CORE_RUNNING;
1054             } else  { 
1055                 PrintDebug(info->vm_info, info, "SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
1056
1057                 V3_NO_WORK(info);
1058
1059                 // Compiler must not optimize away this read
1060                 while (*((volatile int *)(&info->core_run_state)) == CORE_STOPPED) {
1061                     
1062                     if (info->vm_info->run_state == VM_STOPPED) {
1063                         // The VM was stopped before this core was initialized. 
1064                         return 0;
1065                     }
1066                     
1067                     V3_STILL_NO_WORK(info);
1068
1069                     //PrintDebug(info->vm_info, info, "SVM core %u: still waiting for INIT\n", info->vcpu_id);
1070                 }
1071
1072                 V3_HAVE_WORK_AGAIN(info);
1073                 
1074                 PrintDebug(info->vm_info, info, "SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
1075                 
1076                 // We'll be paranoid about race conditions here
1077                 v3_wait_at_barrier(info);
1078             } 
1079         }
1080
1081         if (!started) {
1082
1083             started=1;
1084             
1085             PrintDebug(info->vm_info, info, "SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
1086                        info->vcpu_id, info->pcpu_id, 
1087                        info->segments.cs.selector, (void *)(info->segments.cs.base), 
1088                        info->segments.cs.limit, (void *)(info->rip));
1089             
1090             
1091             
1092             PrintDebug(info->vm_info, info, "SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
1093                        info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
1094
1095 #ifdef V3_CONFIG_DEBUG_SVM
1096             PrintDebugVMCB((vmcb_t*)(info->vmm_data));
1097 #endif
1098             
1099             v3_start_time(info);
1100         }
1101         
1102         if (info->vm_info->run_state == VM_STOPPED) {
1103             info->core_run_state = CORE_STOPPED;
1104             break;
1105         }
1106         
1107         
1108 #ifdef V3_CONFIG_HVM
1109         if (v3_handle_hvm_reset(info) > 0) { 
1110             continue;
1111         }
1112 #endif
1113        
1114 #ifdef V3_CONFIG_MULTIBOOT
1115         if (v3_handle_multiboot_reset(info) > 0) {
1116             continue;
1117         }
1118 #endif
1119         
1120         if (svm_handle_standard_reset(info) > 0 ) {
1121             continue;
1122         }
1123         
1124
1125
1126 #ifdef V3_CONFIG_PMU_TELEMETRY
1127         v3_pmu_telemetry_start(info);
1128 #endif
1129         
1130 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1131         v3_pwrstat_telemetry_start(info);
1132 #endif
1133         
1134         if (v3_svm_enter(info) == -1 ) {
1135             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
1136             addr_t host_addr;
1137             addr_t linear_addr = 0;
1138             
1139             info->vm_info->run_state = VM_ERROR;
1140             
1141             V3_Print(info->vm_info, info, "SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
1142             
1143             v3_print_guest_state(info);
1144             
1145             V3_Print(info->vm_info, info, "SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
1146             
1147             V3_Print(info->vm_info, info, "SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
1148             V3_Print(info->vm_info, info, "SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
1149             
1150             V3_Print(info->vm_info, info, "SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
1151             V3_Print(info->vm_info, info, "SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
1152             
1153             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1154             
1155             if (info->mem_mode == PHYSICAL_MEM) {
1156                 if (v3_gpa_to_hva(info, linear_addr, &host_addr)) {
1157                     PrintError(info->vm_info, info, "Cannot translate address\n");
1158                     break;
1159                 }
1160             } else if (info->mem_mode == VIRTUAL_MEM) {
1161                 if (v3_gva_to_hva(info, linear_addr, &host_addr)) {
1162                     PrintError(info->vm_info, info, "Cannot translate address\n");
1163                     break;
1164                 }
1165             }
1166             
1167             V3_Print(info->vm_info, info, "SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1168             
1169             V3_Print(info->vm_info, info, "SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1170             v3_dump_mem((uint8_t *)host_addr, 15);
1171             
1172             v3_print_stack(info);
1173             
1174             break;
1175         }
1176         
1177         v3_wait_at_barrier(info);
1178         
1179
1180         if (info->vm_info->run_state == VM_STOPPED) {
1181             PrintDebug(info->vm_info,info,"Stopping core as VM is stopped\n");
1182             info->core_run_state = CORE_STOPPED;
1183             break;
1184         }
1185
1186         
1187
1188 /*
1189         if ((info->num_exits % 50000) == 0) {
1190             V3_Print(info->vm_info, info, "SVM Exit number %d\n", (uint32_t)info->num_exits);
1191             v3_print_guest_state(info);
1192         }
1193 */
1194         
1195     }
1196
1197 #ifdef V3_CONFIG_PMU_TELEMETRY
1198     v3_pmu_telemetry_end(info);
1199 #endif
1200
1201 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1202     v3_pwrstat_telemetry_end(info);
1203 #endif
1204     // Need to take down the other cores on error... 
1205
1206     return 0;
1207 }
1208
1209
1210
1211
1212 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
1213     // init vmcb_bios
1214
1215     // Write the RIP, CS, and descriptor
1216     // assume the rest is already good to go
1217     //
1218     // vector VV -> rip at 0
1219     //              CS = VV00
1220     //  This means we start executing at linear address VV000
1221     //
1222     // So the selector needs to be VV00
1223     // and the base needs to be VV000
1224     //
1225     core->rip = 0;
1226     core->segments.cs.selector = rip << 8;
1227     core->segments.cs.limit = 0xffff;
1228     core->segments.cs.base = rip << 12;
1229
1230     return 0;
1231 }
1232
1233
1234
1235
1236
1237
1238 /* Checks machine SVM capability */
1239 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
1240 int v3_is_svm_capable() {
1241     uint_t vm_cr_low = 0, vm_cr_high = 0;
1242     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1243
1244     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1245   
1246     PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
1247
1248     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
1249       V3_Print(VM_NONE, VCORE_NONE,  "SVM Not Available\n");
1250       return 0;
1251     }  else {
1252         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
1253         
1254         PrintDebug(VM_NONE, VCORE_NONE, "SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
1255         
1256         if (vm_cr_low & SVM_VM_CR_MSR_svmdis) {
1257             V3_Print(VM_NONE, VCORE_NONE, "SVM is available but is disabled.\n");
1258             
1259             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1260             
1261             PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
1262             
1263             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
1264                 V3_Print(VM_NONE, VCORE_NONE,  "SVM BIOS Disabled, not unlockable\n");
1265             } else {
1266                 V3_Print(VM_NONE, VCORE_NONE,  "SVM is locked with a key\n");
1267             }
1268             return 0;
1269
1270         } else {
1271             V3_Print(VM_NONE, VCORE_NONE,  "SVM is available and  enabled.\n");
1272
1273             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1274             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
1275             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
1276             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
1277             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
1278
1279             if (!(edx & 0x8)) { 
1280               PrintError(VM_NONE,VCORE_NONE, "WARNING: NO SVM SUPPORT FOR NRIP - SW INTR INJECTION WILL LIKELY FAIL\n");
1281             }
1282
1283             return 1;
1284         }
1285     }
1286 }
1287
1288 static int has_svm_nested_paging() {
1289     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1290     
1291     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1292     
1293     //PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
1294     
1295     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
1296         V3_Print(VM_NONE, VCORE_NONE, "SVM Nested Paging not supported\n");
1297         return 0;
1298     } else {
1299         V3_Print(VM_NONE, VCORE_NONE, "SVM Nested Paging supported\n");
1300         return 1;
1301     }
1302  }
1303  
1304
1305
1306 void v3_init_svm_cpu(int cpu_id) {
1307     reg_ex_t msr;
1308     extern v3_cpu_arch_t v3_cpu_types[];
1309
1310     // Enable SVM on the CPU
1311     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
1312     msr.e_reg.low |= EFER_MSR_svm_enable;
1313     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
1314
1315     V3_Print(VM_NONE, VCORE_NONE,  "SVM Enabled\n");
1316
1317     // Setup the host state save area
1318     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4); // need not be shadow-safe, not exposed to guest
1319
1320     if (!host_vmcbs[cpu_id]) {
1321         PrintError(VM_NONE, VCORE_NONE,  "Failed to allocate VMCB\n");
1322         return;
1323     }
1324
1325     /* 64-BIT-ISSUE */
1326     //  msr.e_reg.high = 0;
1327     //msr.e_reg.low = (uint_t)host_vmcb;
1328     msr.r_reg = host_vmcbs[cpu_id];
1329
1330     PrintDebug(VM_NONE, VCORE_NONE,  "Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
1331     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
1332
1333
1334     if (has_svm_nested_paging() == 1) {
1335         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
1336     } else {
1337         v3_cpu_types[cpu_id] = V3_SVM_CPU;
1338     }
1339 }
1340
1341
1342
1343 void v3_deinit_svm_cpu(int cpu_id) {
1344     reg_ex_t msr;
1345     extern v3_cpu_arch_t v3_cpu_types[];
1346
1347     // reset SVM_VM_HSAVE_PA_MSR
1348     // Does setting it to NULL disable??
1349     msr.r_reg = 0;
1350     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
1351
1352     // Disable SVM?
1353     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
1354     msr.e_reg.low &= ~EFER_MSR_svm_enable;
1355     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
1356
1357     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1358
1359     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
1360
1361     V3_Print(VM_NONE, VCORE_NONE,  "Host CPU %d host area freed, and SVM disabled\n", cpu_id);
1362     return;
1363 }
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414 #if 0
1415 /* 
1416  * Test VMSAVE/VMLOAD Latency 
1417  */
1418 #define vmsave ".byte 0x0F,0x01,0xDB ; "
1419 #define vmload ".byte 0x0F,0x01,0xDA ; "
1420 {
1421     uint32_t start_lo, start_hi;
1422     uint32_t end_lo, end_hi;
1423     uint64_t start, end;
1424     
1425     __asm__ __volatile__ (
1426                           "rdtsc ; "
1427                           "movl %%eax, %%esi ; "
1428                           "movl %%edx, %%edi ; "
1429                           "movq  %%rcx, %%rax ; "
1430                           vmsave
1431                           "rdtsc ; "
1432                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1433                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1434                           );
1435     
1436     start = start_hi;
1437     start <<= 32;
1438     start += start_lo;
1439     
1440     end = end_hi;
1441     end <<= 32;
1442     end += end_lo;
1443     
1444     PrintDebug(core->vm_info, core, "VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1445     
1446     __asm__ __volatile__ (
1447                           "rdtsc ; "
1448                           "movl %%eax, %%esi ; "
1449                           "movl %%edx, %%edi ; "
1450                           "movq  %%rcx, %%rax ; "
1451                           vmload
1452                           "rdtsc ; "
1453                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1454                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1455                               );
1456         
1457         start = start_hi;
1458         start <<= 32;
1459         start += start_lo;
1460
1461         end = end_hi;
1462         end <<= 32;
1463         end += end_lo;
1464
1465
1466         PrintDebug(core->vm_info, core, "VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1467     }
1468     /* End Latency Test */
1469
1470 #endif
1471
1472
1473
1474
1475
1476
1477