Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Cleanup and sanity-checking of endianness, dead code, unchecked returns (Coverity...
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *         Peter Dinda <jarusl@cs.northwestern.edu> (Reset)
16  *
17  * This is free software.  You are permitted to use,
18  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
19  */
20
21
22
23 #include <palacios/svm.h>
24 #include <palacios/vmm.h>
25
26 #include <palacios/vmcb.h>
27 #include <palacios/vmm_mem.h>
28 #include <palacios/vmm_paging.h>
29 #include <palacios/svm_handler.h>
30
31 #include <palacios/vmm_debug.h>
32 #include <palacios/vm_guest_mem.h>
33
34 #include <palacios/vmm_decoder.h>
35 #include <palacios/vmm_string.h>
36 #include <palacios/vmm_lowlevel.h>
37 #include <palacios/svm_msr.h>
38
39 #include <palacios/vmm_rbtree.h>
40 #include <palacios/vmm_barrier.h>
41 #include <palacios/vmm_debug.h>
42
43 #include <palacios/vmm_perftune.h>
44
45 #include <palacios/vmm_bios.h>
46
47
48 #ifdef V3_CONFIG_CHECKPOINT
49 #include <palacios/vmm_checkpoint.h>
50 #endif
51
52 #include <palacios/vmm_direct_paging.h>
53
54 #include <palacios/vmm_ctrl_regs.h>
55 #include <palacios/svm_io.h>
56
57 #include <palacios/vmm_sprintf.h>
58
59 #ifdef V3_CONFIG_MEM_TRACK
60 #include <palacios/vmm_mem_track.h>
61 #endif 
62
63 #ifdef V3_CONFIG_TM_FUNC
64 #include <extensions/trans_mem.h>
65 #endif
66
67 #ifndef V3_CONFIG_DEBUG_SVM
68 #undef PrintDebug
69 #define PrintDebug(fmt, args...)
70 #endif
71
72
73
74 uint32_t v3_last_exit;
75
76 // This is a global pointer to the host's VMCB
77 // These are physical addresses
78 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
79
80
81
82 extern void v3_stgi();
83 extern void v3_clgi();
84 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
85 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
86
87
88
89 static vmcb_t * Allocate_VMCB() {
90     vmcb_t * vmcb_page = NULL;
91     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);   // need not be shadow safe, not exposed to guest
92
93     if ((void *)vmcb_pa == NULL) {
94       PrintError(VM_NONE, VCORE_NONE, "Error allocating VMCB\n");
95         return NULL;
96     }
97
98     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
99
100     memset(vmcb_page, 0, 4096);
101
102     return vmcb_page;
103 }
104
105
106 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
107 {
108     int status;
109
110     // Call arch-independent handler
111     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
112         return status;
113     }
114
115     // SVM-specific code
116     {
117         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
118         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
119         hw_efer->svme = 1;
120     }
121
122     return 0;
123 }
124
125 /*
126  * This is invoked both on an initial boot and on a reset
127  * 
128  * The difference is that on a reset we will not rehook anything
129  *
130  */
131
132 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
133     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
134     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
135     uint_t i;
136
137     if (core->core_run_state!=CORE_INVALID && core->core_run_state!=CORE_RESETTING) { 
138         PrintError(core->vm_info, core, "Atempt to Init_VMCB_BIOS in invalid state (%d)\n",core->core_run_state);
139         return;
140     }
141
142     // need to invalidate any shadow page tables early
143     if (core->shdw_pg_mode == SHADOW_PAGING && core->core_run_state==CORE_RESETTING) {
144         if (v3_get_vm_cpu_mode(core) != REAL) {
145             if (v3_invalidate_shadow_pts(core) == -1) {
146                 PrintError(core->vm_info,core,"Could not invalidate shadow page tables\n");
147                 return;
148             }
149         }
150     }
151
152     // Guarantee we are starting from a clean slate
153     // even on a reset
154     memset(vmcb,0,4096);
155
156     ctrl_area->svm_instrs.VMRUN = 1;
157     ctrl_area->svm_instrs.VMMCALL = 1;
158     ctrl_area->svm_instrs.VMLOAD = 1;
159     ctrl_area->svm_instrs.VMSAVE = 1;
160     ctrl_area->svm_instrs.STGI = 1;
161     ctrl_area->svm_instrs.CLGI = 1;
162     ctrl_area->svm_instrs.SKINIT = 1; // secure startup... why
163     ctrl_area->svm_instrs.ICEBP = 1;  // in circuit emulator breakpoint
164     ctrl_area->svm_instrs.WBINVD = 1; // write back and invalidate caches... why?
165     ctrl_area->svm_instrs.MONITOR = 1;
166     ctrl_area->svm_instrs.MWAIT_always = 1;
167     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
168     ctrl_area->instrs.INVLPGA = 1;   // invalidate page in asid... AMD ERRATA
169     ctrl_area->instrs.CPUID = 1;
170
171     ctrl_area->instrs.HLT = 1;
172
173     /* Set at VMM launch as needed */
174     ctrl_area->instrs.RDTSC = 0;
175     ctrl_area->svm_instrs.RDTSCP = 0;
176
177
178 #ifdef V3_CONFIG_TM_FUNC
179     v3_tm_set_excp_intercepts(ctrl_area);
180 #endif
181     
182
183     ctrl_area->instrs.NMI = 1;
184     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
185     ctrl_area->instrs.INIT = 1;
186     //    ctrl_area->instrs.PAUSE = 1;    // do not care as does not halt
187     ctrl_area->instrs.shutdown_evts = 1;
188
189
190     /* DEBUG FOR RETURN CODE */
191     ctrl_area->exit_code = 1;
192
193
194     /* Setup Guest Machine state */
195
196     memset(&core->vm_regs,0,sizeof(core->vm_regs));
197     memset(&core->ctrl_regs,0,sizeof(core->ctrl_regs));
198     memset(&core->dbg_regs,0,sizeof(core->dbg_regs));
199     memset(&core->segments,0,sizeof(core->segments));    
200     memset(&core->msrs,0,sizeof(core->msrs));    
201     memset(&core->fp_state,0,sizeof(core->fp_state));    
202
203     // reset interrupts
204     core->intr_core_state.irq_pending=0; 
205     core->intr_core_state.irq_started=0; 
206     core->intr_core_state.swintr_posted=0; 
207
208     // reset exceptions
209     core->excp_state.excp_pending=0;
210
211     // reset of gprs to expected values at init
212     core->vm_regs.rsp = 0x00;
213     core->rip = 0xfff0;
214     core->vm_regs.rdx = 0x00000f00;  // family/stepping/etc
215
216     
217     core->cpl = 0;
218
219     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
220
221     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
222     core->shdw_pg_state.guest_cr0 = core->ctrl_regs.cr0;
223
224     // cr3 zeroed above
225     core->shdw_pg_state.guest_cr3 = core->ctrl_regs.cr3;
226     // cr4 zeroed above
227     core->shdw_pg_state.guest_cr4 = core->ctrl_regs.cr4;
228
229     core->ctrl_regs.efer |= EFER_MSR_svm_enable ;
230     core->shdw_pg_state.guest_efer.value = core->ctrl_regs.efer;
231
232     core->segments.cs.selector = 0xf000;
233     core->segments.cs.limit = 0xffff;
234     core->segments.cs.base = 0x0000f0000LL;
235
236     // (raw attributes = 0xf3)
237     core->segments.cs.type = 0xa;
238     core->segments.cs.system = 0x1;
239     core->segments.cs.dpl = 0x0;
240     core->segments.cs.present = 1;
241
242
243
244     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
245                                       &(core->segments.es), &(core->segments.fs), 
246                                       &(core->segments.gs), NULL};
247
248     for ( i = 0; segregs[i] != NULL; i++) {
249         struct v3_segment * seg = segregs[i];
250         
251         seg->selector = 0x0000;
252         //    seg->base = seg->selector << 4;
253         seg->base = 0x00000000;
254         seg->limit = 0xffff;
255
256         // (raw attributes = 0xf3)
257         seg->type = 0x2;
258         seg->system = 0x1;
259         seg->dpl = 0x0;
260         seg->present = 1;
261     }
262
263     core->segments.gdtr.selector = 0x0000;
264     core->segments.gdtr.limit = 0x0000ffff;
265     core->segments.gdtr.base = 0x0000000000000000LL;
266     core->segments.gdtr.dpl = 0x0;
267
268     core->segments.idtr.selector = 0x0000; 
269     core->segments.idtr.limit = 0x0000ffff;
270     core->segments.idtr.base = 0x0000000000000000LL;
271     core->segments.ldtr.limit = 0x0000ffff;
272     core->segments.ldtr.base = 0x0000000000000000LL;
273     core->segments.ldtr.system = 0;
274     core->segments.ldtr.type = 0x2;
275     core->segments.ldtr.dpl = 0x0;
276
277     core->segments.tr.selector = 0x0000;
278     core->segments.tr.limit = 0x0000ffff;
279     core->segments.tr.base = 0x0000000000000000LL;
280     core->segments.tr.system = 0;
281     core->segments.tr.type = 0x3;
282     core->segments.tr.dpl = 0x0;
283
284     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
285     core->dbg_regs.dr7 = 0x0000000000000400LL;
286
287
288     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
289     ctrl_area->instrs.IOIO_PROT = 1;
290             
291     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
292     ctrl_area->instrs.MSR_PROT = 1;   
293
294
295     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
296     ctrl_area->instrs.INTR = 1;
297     // The above also assures the TPR changes (CR8) are only virtual
298
299
300     // However, we need to see TPR writes since they will
301     // affect the virtual apic
302     // we reflect out cr8 to ctrl_regs->apic_tpr
303     ctrl_area->cr_reads.cr8 = 1;
304     ctrl_area->cr_writes.cr8 = 1;
305     // We will do all TPR comparisons in the virtual apic
306     // We also do not want the V_TPR to be able to mask the PIC
307     ctrl_area->guest_ctrl.V_IGN_TPR = 1;
308
309     
310
311     if (core->core_run_state == CORE_INVALID) { 
312         v3_hook_msr(core->vm_info, EFER_MSR, 
313                     &v3_handle_efer_read,
314                     &v3_svm_handle_efer_write, 
315                     core);
316     }
317
318     if (core->shdw_pg_mode == SHADOW_PAGING) {
319         
320         /* JRL: This is a performance killer, and a simplistic solution */
321         /* We need to fix this */
322         ctrl_area->TLB_CONTROL = 1;
323         ctrl_area->guest_ASID = 1;
324         
325
326         if (core->core_run_state == CORE_INVALID) { 
327             if (v3_init_passthrough_pts(core) == -1) {
328                 PrintError(core->vm_info, core, "Could not initialize passthrough page tables\n");
329                 return ;
330             }
331             // the shadow page tables are OK since we have not initialized hem yet
332         } else {
333             // CORE_RESETTING
334             // invalidation of shadow page tables happened earlier in this function
335         }
336
337         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
338         
339         core->ctrl_regs.cr0 |= 0x80000000;
340
341         v3_activate_passthrough_pt(core);
342
343         ctrl_area->cr_reads.cr0 = 1;
344         ctrl_area->cr_writes.cr0 = 1;
345         //intercept cr4 read so shadow pager can use PAE independently of guest
346         ctrl_area->cr_reads.cr4 = 1;
347         ctrl_area->cr_writes.cr4 = 1;
348         ctrl_area->cr_reads.cr3 = 1;
349         ctrl_area->cr_writes.cr3 = 1;
350
351
352         ctrl_area->instrs.INVLPG = 1;
353
354         ctrl_area->exceptions.pf = 1;
355
356         guest_state->g_pat = 0x7040600070406ULL;
357
358
359     } else if (core->shdw_pg_mode == NESTED_PAGING) {
360         // Flush the TLB on entries/exits
361         ctrl_area->TLB_CONTROL = 1;
362         ctrl_area->guest_ASID = 1;
363
364         // Enable Nested Paging
365         ctrl_area->NP_ENABLE = 1;
366
367         // Set the Nested Page Table pointer
368         if (core->core_run_state == CORE_INVALID) { 
369             if (v3_init_passthrough_pts(core) == -1) {
370                 PrintError(core->vm_info, core, "Could not initialize Nested page tables\n");
371                 return ;
372             }
373         } else {
374             // the existing nested page tables will work fine
375         }
376
377         ctrl_area->N_CR3 = core->direct_map_pt;
378
379         guest_state->g_pat = 0x7040600070406ULL;
380     }
381     
382     /* tell the guest that we don't support SVM */
383     if (core->core_run_state == CORE_INVALID) { 
384         v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
385                     &v3_handle_vm_cr_read,
386                     &v3_handle_vm_cr_write, 
387                     core);
388     }
389
390     if (core->core_run_state == CORE_INVALID) { 
391 #define INT_PENDING_AMD_MSR             0xc0010055
392
393         v3_hook_msr(core->vm_info, IA32_STAR_MSR, NULL, NULL, NULL);
394         v3_hook_msr(core->vm_info, IA32_LSTAR_MSR, NULL, NULL, NULL);
395         v3_hook_msr(core->vm_info, IA32_FMASK_MSR, NULL, NULL, NULL);
396         v3_hook_msr(core->vm_info, IA32_KERN_GS_BASE_MSR, NULL, NULL, NULL);
397         v3_hook_msr(core->vm_info, IA32_CSTAR_MSR, NULL, NULL, NULL);
398
399         v3_hook_msr(core->vm_info, SYSENTER_CS_MSR, NULL, NULL, NULL);
400         v3_hook_msr(core->vm_info, SYSENTER_ESP_MSR, NULL, NULL, NULL);
401         v3_hook_msr(core->vm_info, SYSENTER_EIP_MSR, NULL, NULL, NULL);
402
403
404         v3_hook_msr(core->vm_info, FS_BASE_MSR, NULL, NULL, NULL);
405         v3_hook_msr(core->vm_info, GS_BASE_MSR, NULL, NULL, NULL);
406
407         // Passthrough read operations are ok.
408         v3_hook_msr(core->vm_info, INT_PENDING_AMD_MSR, NULL, v3_msr_unhandled_write, NULL);
409     }
410
411
412 }
413
414
415 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
416
417     PrintDebug(core->vm_info, core, "Allocating VMCB\n");
418     core->vmm_data = (void *)Allocate_VMCB();
419     
420     if (core->vmm_data == NULL) {
421         PrintError(core->vm_info, core, "Could not allocate VMCB, Exiting...\n");
422         return -1;
423     }
424
425     if (vm_class == V3_PC_VM) {
426         PrintDebug(core->vm_info, core, "Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
427         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
428     } else {
429         PrintError(core->vm_info, core, "Invalid VM class\n");
430         return -1;
431     }
432
433     core->core_run_state = CORE_STOPPED;
434
435     return 0;
436 }
437
438
439 int v3_deinit_svm_vmcb(struct guest_info * core) {
440     if (core && core->vmm_data) { 
441         V3_FreePages(V3_PAddr(core->vmm_data), 1);
442     }
443     return 0;
444 }
445
446
447 static int svm_handle_standard_reset(struct guest_info *core)
448 {
449     if (core->core_run_state != CORE_RESETTING) { 
450         return 0;
451     }
452
453     PrintDebug(core->vm_info,core,"Handling standard reset (guest state before follows)\n");
454
455 #ifdef V3_CONFIG_DEBUG_SVM
456     v3_print_guest_state(core);
457 #endif
458
459     // wait until all resetting cores get here (ROS or whole VM)
460     v3_counting_barrier(&core->vm_info->reset_barrier);
461
462     // I could be a ROS core, or I could be in a non-HVM 
463     // either way, if I'm core 0, I'm the leader
464     if (core->vcpu_id==0) {
465         uint64_t mem_size=core->vm_info->mem_size;
466
467 #ifdef V3_CONFIG_HVM
468         // on a ROS reset, we should only 
469         // manipulate the part of the memory seen by
470         // the ROS
471         if (core->vm_info->hvm_state.is_hvm) { 
472             mem_size=v3_get_hvm_ros_memsize(core->vm_info);
473         }
474 #endif
475         core->vm_info->run_state = VM_RESETTING;
476         // copy bioses again because some, 
477         // like seabios, assume
478         // this should also blow away the BDA and EBDA
479         PrintDebug(core->vm_info,core,"Clear memory (%p bytes)\n",(void*)core->vm_info->mem_size);
480         if (v3_set_gpa_memory(core, 0, mem_size, 0)!=mem_size) { 
481             PrintError(core->vm_info,core,"Clear of memory failed\n");
482         }
483         PrintDebug(core->vm_info,core,"Copying bioses\n");
484         if (v3_setup_bioses(core->vm_info, core->vm_info->cfg_data->cfg)) { 
485             PrintError(core->vm_info,core,"Setup of bioses failed\n");
486         }
487     }
488
489     Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
490
491     PrintDebug(core->vm_info,core,"InitVMCB done\n");
492
493     core->cpl = 0;
494     core->cpu_mode = REAL;
495     core->mem_mode = PHYSICAL_MEM;
496     //core->num_exits=0;
497
498     PrintDebug(core->vm_info,core,"Machine reset to REAL/PHYSICAL\n");
499
500     memset(V3_VAddr((void*)(host_vmcbs[V3_Get_CPU()])),0,4096*4); // good measure...
501
502     // core zero will be restarted by the main execution loop
503     core->core_run_state = CORE_STOPPED;
504
505     if (core->vcpu_id==0) { 
506         core->vm_info->run_state = VM_RUNNING;
507     } 
508
509 #ifdef V3_CONFIG_DEBUG_SVM
510     PrintDebug(core->vm_info,core,"VMCB state at end of reset\n");
511     PrintDebugVMCB((vmcb_t*)(core->vmm_data));
512     PrintDebug(core->vm_info,core,"Guest state at end of reset\n");
513     v3_print_guest_state(core);
514 #endif
515
516     // wait until we are all ready to go
517     v3_counting_barrier(&core->vm_info->reset_barrier);
518
519     PrintDebug(core->vm_info,core,"Returning with request for recycle loop\n");
520
521     return 1; // reboot is occuring
522
523 }
524
525 #ifdef V3_CONFIG_CHECKPOINT
526 int v3_svm_save_core(struct guest_info * core, void * ctx){
527
528   vmcb_saved_state_t * guest_area = GET_VMCB_SAVE_STATE_AREA(core->vmm_data); 
529
530   // Special case saves of data we need immediate access to
531   // in some cases
532   V3_CHKPT_SAVE(ctx, "CPL", core->cpl, failout);
533   V3_CHKPT_SAVE(ctx,"STAR", guest_area->star, failout); 
534   V3_CHKPT_SAVE(ctx,"CSTAR", guest_area->cstar, failout); 
535   V3_CHKPT_SAVE(ctx,"LSTAR", guest_area->lstar, failout); 
536   V3_CHKPT_SAVE(ctx,"SFMASK", guest_area->sfmask, failout); 
537   V3_CHKPT_SAVE(ctx,"KERNELGSBASE", guest_area->KernelGsBase, failout); 
538   V3_CHKPT_SAVE(ctx,"SYSENTER_CS", guest_area->sysenter_cs, failout); 
539   V3_CHKPT_SAVE(ctx,"SYSENTER_ESP", guest_area->sysenter_esp, failout); 
540   V3_CHKPT_SAVE(ctx,"SYSENTER_EIP", guest_area->sysenter_eip, failout); 
541   
542 // and then we save the whole enchilada
543   if (v3_chkpt_save(ctx, "VMCB_DATA", PAGE_SIZE, core->vmm_data)) { 
544     PrintError(core->vm_info, core, "Could not save SVM vmcb\n");
545     goto failout;
546   }
547   
548   return 0;
549
550  failout:
551   PrintError(core->vm_info, core, "Failed to save SVM state for core\n");
552   return -1;
553
554 }
555
556 int v3_svm_load_core(struct guest_info * core, void * ctx){
557     
558
559   vmcb_saved_state_t * guest_area = GET_VMCB_SAVE_STATE_AREA(core->vmm_data); 
560
561   // Reload what we special cased, which we will overwrite in a minute
562   V3_CHKPT_LOAD(ctx, "CPL", core->cpl, failout);
563   V3_CHKPT_LOAD(ctx,"STAR", guest_area->star, failout); 
564   V3_CHKPT_LOAD(ctx,"CSTAR", guest_area->cstar, failout); 
565   V3_CHKPT_LOAD(ctx,"LSTAR", guest_area->lstar, failout); 
566   V3_CHKPT_LOAD(ctx,"SFMASK", guest_area->sfmask, failout); 
567   V3_CHKPT_LOAD(ctx,"KERNELGSBASE", guest_area->KernelGsBase, failout); 
568   V3_CHKPT_LOAD(ctx,"SYSENTER_CS", guest_area->sysenter_cs, failout); 
569   V3_CHKPT_LOAD(ctx,"SYSENTER_ESP", guest_area->sysenter_esp, failout); 
570   V3_CHKPT_LOAD(ctx,"SYSENTER_EIP", guest_area->sysenter_eip, failout); 
571   
572   // and then we load the whole enchilada
573   if (v3_chkpt_load(ctx, "VMCB_DATA", PAGE_SIZE, core->vmm_data)) { 
574     PrintError(core->vm_info, core, "Could not load SVM vmcb\n");
575     goto failout;
576   }
577   
578   return 0;
579
580  failout:
581   PrintError(core->vm_info, core, "Failed to save SVM state for core\n");
582   return -1;
583
584 }
585 #endif
586
587 static int update_irq_exit_state(struct guest_info * info) {
588     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
589
590     // Fix for QEMU bug using EVENTINJ as an internal cache
591     guest_ctrl->EVENTINJ.valid = 0;
592
593     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
594         
595 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
596         PrintDebug(info->vm_info, info, "INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
597 #endif
598
599         info->intr_core_state.irq_started = 1;
600         info->intr_core_state.irq_pending = 0;
601
602         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
603     }
604
605     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
606 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
607         PrintDebug(info->vm_info, info, "Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
608 #endif
609
610         // Interrupt was taken fully vectored
611         info->intr_core_state.irq_started = 0;
612
613     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
614 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
615         PrintDebug(info->vm_info, info, "EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
616 #endif
617     }
618
619     return 0;
620 }
621
622
623 static int update_irq_entry_state(struct guest_info * info) {
624     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
625
626     if (guest_ctrl->exit_int_info.valid) {
627         // We need to complete the previous injection
628         guest_ctrl->EVENTINJ = guest_ctrl->exit_int_info;
629
630         PrintDebug(info->vm_info,info,"Continuing injection of event - eventinj=0x%llx\n",*(uint64_t*)&guest_ctrl->EVENTINJ);
631
632         return 0;
633     }
634
635
636     if (info->intr_core_state.irq_pending == 0) {
637         guest_ctrl->guest_ctrl.V_IRQ = 0;
638         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
639     }
640     
641     if (v3_excp_pending(info)) {
642
643         uint_t excp = v3_get_excp_number(info);
644         
645         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
646         guest_ctrl->EVENTINJ.vector = excp;
647
648         if (info->excp_state.excp_error_code_valid) {
649             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
650             guest_ctrl->EVENTINJ.ev = 1;
651 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
652             PrintDebug(info->vm_info, info, "Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
653 #endif
654         } else {
655             guest_ctrl->EVENTINJ.error_code = 0;
656             guest_ctrl->EVENTINJ.ev = 0;
657         }
658
659         guest_ctrl->EVENTINJ.rsvd = 0;
660         guest_ctrl->EVENTINJ.valid = 1;
661
662 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
663         PrintDebug(info->vm_info, info, "<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
664                    (int)info->num_exits, 
665                    guest_ctrl->EVENTINJ.vector, 
666                    (void *)(addr_t)info->ctrl_regs.cr2,
667                    (void *)(addr_t)info->rip);
668 #endif
669
670         v3_injecting_excp(info, excp);
671
672     } else if (info->intr_core_state.irq_started == 1) {
673
674 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
675         PrintDebug(info->vm_info, info, "IRQ pending from previous injection\n");
676 #endif
677         guest_ctrl->guest_ctrl.V_IRQ = 1;
678         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
679
680         // We ignore the virtual TPR on this injection
681         // TPR/PPR tests have already been done in the APIC.
682         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
683         guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ;  // 0xf;
684
685     } else {
686         switch (v3_intr_pending(info)) {
687             case V3_EXTERNAL_IRQ: {
688                 int irq = v3_get_intr(info); 
689
690                 if (irq<0) {
691                   break;
692                 }
693
694                 guest_ctrl->guest_ctrl.V_IRQ = 1;
695                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
696
697                 // We ignore the virtual TPR on this injection
698                 // TPR/PPR tests have already been done in the APIC.
699                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
700                 guest_ctrl->guest_ctrl.V_INTR_PRIO = info->intr_core_state.irq_vector >> 4 ;  // 0xf;
701
702 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
703                 PrintDebug(info->vm_info, info, "Injecting Interrupt %d (EIP=%p)\n", 
704                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
705                            (void *)(addr_t)info->rip);
706 #endif
707
708                 info->intr_core_state.irq_pending = 1;
709                 info->intr_core_state.irq_vector = irq;
710
711                 break;
712                 
713             }
714             case V3_NMI:
715 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
716                 PrintDebug(info->vm_info, info, "Injecting NMI\n");
717 #endif
718                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
719                 guest_ctrl->EVENTINJ.ev = 0;
720                 guest_ctrl->EVENTINJ.error_code = 0;
721                 guest_ctrl->EVENTINJ.rsvd = 0;
722                 guest_ctrl->EVENTINJ.valid = 1;
723
724                 break;
725
726             case V3_SOFTWARE_INTR:
727 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
728                 PrintDebug(info->vm_info, info, "Injecting software interrupt --  type: %d, vector: %d\n", 
729                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
730 #endif
731                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
732                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
733                 guest_ctrl->EVENTINJ.ev = 0;
734                 guest_ctrl->EVENTINJ.error_code = 0;
735                 guest_ctrl->EVENTINJ.rsvd = 0;
736                 guest_ctrl->EVENTINJ.valid = 1;
737             
738                 /* reset swintr state */
739                 info->intr_core_state.swintr_posted = 0;
740                 info->intr_core_state.swintr_vector = 0;
741
742                 break;
743             case V3_VIRTUAL_IRQ:
744                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
745                 break;
746
747             case V3_INVALID_INTR:
748             default:
749                 break;
750         }
751         
752     }
753
754     return 0;
755 }
756
757 int 
758 v3_svm_config_tsc_virtualization(struct guest_info * info) {
759     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
760
761
762     if (info->time_state.flags & VM_TIME_TRAP_RDTSC) {
763         ctrl_area->instrs.RDTSC = 1;
764         ctrl_area->svm_instrs.RDTSCP = 1;
765     } else {
766         ctrl_area->instrs.RDTSC = 0;
767         ctrl_area->svm_instrs.RDTSCP = 0;
768
769         if (info->time_state.flags & VM_TIME_TSC_PASSTHROUGH) {
770                 ctrl_area->TSC_OFFSET = 0;
771         } else {
772                 ctrl_area->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
773         }
774     }
775     return 0;
776 }
777
778
779
780 /* 
781  * CAUTION and DANGER!!! 
782  * 
783  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
784  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
785  * on its contents will cause things to break. The contents at the time of the exit WILL 
786  * change before the exit handler is executed.
787  */
788 int v3_svm_enter(struct guest_info * info) {
789     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
790     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
791     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
792     uint64_t guest_cycles = 0;
793
794
795     // Conditionally yield the CPU if the timeslice has expired
796     v3_schedule(info);
797
798 #ifdef V3_CONFIG_MEM_TRACK
799     v3_mem_track_entry(info);
800 #endif 
801
802     // Update timer devices after being in the VM before doing 
803     // IRQ updates, so that any interrupts they raise get seen 
804     // immediately.
805
806     v3_advance_time(info, NULL);
807
808     v3_update_timers(info);
809
810
811     // disable global interrupts for vm state transition
812     v3_clgi();
813
814     // Synchronize the guest state to the VMCB
815     guest_state->cr0 = info->ctrl_regs.cr0;
816     guest_state->cr2 = info->ctrl_regs.cr2;
817     guest_state->cr3 = info->ctrl_regs.cr3;
818     guest_state->cr4 = info->ctrl_regs.cr4;
819     guest_state->dr6 = info->dbg_regs.dr6;
820     guest_state->dr7 = info->dbg_regs.dr7;
821
822     // CR8 is now updated by read/writes and it contains the APIC TPR
823     // the V_TPR should be just the class part of that.
824     // This update is here just for completeness.  We currently
825     // are ignoring V_TPR on all injections and doing the priority logivc
826     // in the APIC.
827     // guest_ctrl->guest_ctrl.V_TPR = ((info->ctrl_regs.apic_tpr) >> 4) & 0xf;
828
829     //guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
830     // 
831     
832     guest_state->rflags = info->ctrl_regs.rflags;
833
834     // LMA ,LME, SVE?
835
836     guest_state->efer = info->ctrl_regs.efer;
837     
838     /* Synchronize MSRs */
839     guest_state->star = info->msrs.star;
840     guest_state->lstar = info->msrs.lstar;
841     guest_state->sfmask = info->msrs.sfmask;
842     guest_state->KernelGsBase = info->msrs.kern_gs_base;
843
844     guest_state->cpl = info->cpl;
845
846     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
847
848     guest_state->rax = info->vm_regs.rax;
849     guest_state->rip = info->rip;
850     guest_state->rsp = info->vm_regs.rsp;
851
852     V3_FP_ENTRY_RESTORE(info);
853
854 #ifdef V3_CONFIG_SYMCALL
855     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
856         update_irq_entry_state(info);
857     }
858 #else 
859
860     update_irq_entry_state(info);
861 #endif
862
863 #ifdef V3_CONFIG_TM_FUNC
864     v3_tm_check_intr_state(info, guest_ctrl, guest_state);
865 #endif
866
867
868     /* ** */
869
870     /*
871       PrintDebug(info->vm_info, info, "SVM Entry to CS=%p  rip=%p...\n", 
872       (void *)(addr_t)info->segments.cs.base, 
873       (void *)(addr_t)info->rip);
874     */
875
876 #ifdef V3_CONFIG_SYMCALL
877     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
878         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
879             V3_Print(info->vm_info, info, "!!! Injecting Interrupt during Sym call !!!\n");
880         }
881     }
882 #endif
883
884     v3_svm_config_tsc_virtualization(info);
885
886     //V3_Print(info->vm_info, info, "Calling v3_svm_launch\n");
887     {   
888         uint64_t entry_tsc = 0;
889         uint64_t exit_tsc = 0;
890         
891 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
892         v3_pwrstat_telemetry_enter(info);
893 #endif
894
895 #ifdef V3_CONFIG_PMU_TELEMETRY
896         v3_pmu_telemetry_enter(info);
897 #endif
898
899
900         if (guest_ctrl->EVENTINJ.valid && guest_ctrl->interrupt_shadow) { 
901 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
902             PrintDebug(info->vm_info,info,"Event injection during an interrupt shadow\n");
903 #endif
904         }
905
906         rdtscll(entry_tsc);
907
908         v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
909
910         rdtscll(exit_tsc);
911
912 #ifdef V3_CONFIG_PMU_TELEMETRY
913         v3_pmu_telemetry_exit(info);
914 #endif
915
916 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
917         v3_pwrstat_telemetry_exit(info);
918 #endif
919
920         guest_cycles = exit_tsc - entry_tsc;
921     }
922
923
924     //V3_Print(info->vm_info, info, "SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
925
926     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
927
928     v3_advance_time(info, &guest_cycles);
929
930     info->num_exits++;
931
932     V3_FP_EXIT_SAVE(info);
933
934     // Save Guest state from VMCB
935     info->rip = guest_state->rip;
936     info->vm_regs.rsp = guest_state->rsp;
937     info->vm_regs.rax = guest_state->rax;
938
939     info->cpl = guest_state->cpl;
940
941     info->ctrl_regs.cr0 = guest_state->cr0;
942     info->ctrl_regs.cr2 = guest_state->cr2;
943     info->ctrl_regs.cr3 = guest_state->cr3;
944     info->ctrl_regs.cr4 = guest_state->cr4;
945     info->dbg_regs.dr6 = guest_state->dr6;
946     info->dbg_regs.dr7 = guest_state->dr7;
947     //
948     // We do not track this anymore
949     // V_TPR is ignored and we do the logic in the APIC
950     //info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
951     //
952     info->ctrl_regs.rflags = guest_state->rflags;
953     info->ctrl_regs.efer = guest_state->efer;
954     
955     /* Synchronize MSRs */
956     info->msrs.star =  guest_state->star;
957     info->msrs.lstar = guest_state->lstar;
958     info->msrs.sfmask = guest_state->sfmask;
959     info->msrs.kern_gs_base = guest_state->KernelGsBase;
960
961     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
962     info->cpu_mode = v3_get_vm_cpu_mode(info);
963     info->mem_mode = v3_get_vm_mem_mode(info);
964     /* ** */
965
966     // save exit info here
967     exit_code = guest_ctrl->exit_code;
968     exit_info1 = guest_ctrl->exit_info1;
969     exit_info2 = guest_ctrl->exit_info2;
970
971 #ifdef V3_CONFIG_SYMCALL
972     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
973         update_irq_exit_state(info);
974     }
975 #else
976     update_irq_exit_state(info);
977 #endif
978
979     // reenable global interrupts after vm exit
980     v3_stgi();
981
982     // Conditionally yield the CPU if the timeslice has expired
983     v3_schedule(info);
984
985     // This update timers is for time-dependent handlers
986     // if we're slaved to host time
987     v3_advance_time(info, NULL);
988     v3_update_timers(info);
989
990
991     {
992         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
993         
994         if (ret != 0) {
995             PrintError(info->vm_info, info, "Error in SVM exit handler (ret=%d)\n", ret);
996             PrintError(info->vm_info, info, "  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
997
998             return -1;
999         }
1000     }
1001
1002
1003     if (info->timeouts.timeout_active) {
1004         /* Check to see if any timeouts have expired */
1005         v3_handle_timeouts(info, guest_cycles);
1006     }
1007
1008 #ifdef V3_CONFIG_MEM_TRACK
1009     v3_mem_track_exit(info);
1010 #endif 
1011
1012
1013     return 0;
1014 }
1015
1016 int v3_start_svm_guest(struct guest_info * info) {
1017
1018     int started=0;
1019
1020     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
1021     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
1022
1023     PrintDebug(info->vm_info, info, "Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
1024
1025
1026 #ifdef V3_CONFIG_MULTIBOOT
1027     if (v3_setup_multiboot_core_for_boot(info)) { 
1028         PrintError(info->vm_info, info, "Failed to setup Multiboot core...\n");
1029         return -1;
1030     }
1031 #endif
1032
1033 #ifdef V3_CONFIG_HVM
1034     if (v3_setup_hvm_hrt_core_for_boot(info)) { 
1035         PrintError(info->vm_info, info, "Failed to setup HRT core...\n");
1036         return -1;
1037     } 
1038 #endif
1039  
1040     while (1) {
1041
1042         if (info->core_run_state == CORE_STOPPED) {
1043
1044             if (info->vcpu_id == 0) {
1045                 info->core_run_state = CORE_RUNNING;
1046             } else  { 
1047                 PrintDebug(info->vm_info, info, "SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
1048
1049                 V3_NO_WORK(info);
1050
1051                 // Compiler must not optimize away this read
1052                 while (*((volatile int *)(&info->core_run_state)) == CORE_STOPPED) {
1053                     
1054                     if (info->vm_info->run_state == VM_STOPPED) {
1055                         // The VM was stopped before this core was initialized. 
1056                         return 0;
1057                     }
1058                     
1059                     V3_STILL_NO_WORK(info);
1060
1061                     //PrintDebug(info->vm_info, info, "SVM core %u: still waiting for INIT\n", info->vcpu_id);
1062                 }
1063
1064                 V3_HAVE_WORK_AGAIN(info);
1065                 
1066                 PrintDebug(info->vm_info, info, "SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
1067                 
1068                 // We'll be paranoid about race conditions here
1069                 v3_wait_at_barrier(info);
1070             } 
1071         }
1072
1073         if (!started) {
1074
1075             started=1;
1076             
1077             PrintDebug(info->vm_info, info, "SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
1078                        info->vcpu_id, info->pcpu_id, 
1079                        info->segments.cs.selector, (void *)(info->segments.cs.base), 
1080                        info->segments.cs.limit, (void *)(info->rip));
1081             
1082             
1083             
1084             PrintDebug(info->vm_info, info, "SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
1085                        info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
1086
1087 #ifdef V3_CONFIG_DEBUG_SVM
1088             PrintDebugVMCB((vmcb_t*)(info->vmm_data));
1089 #endif
1090             
1091             v3_start_time(info);
1092         }
1093         
1094         if (info->vm_info->run_state == VM_STOPPED) {
1095             info->core_run_state = CORE_STOPPED;
1096             break;
1097         }
1098         
1099         
1100 #ifdef V3_CONFIG_HVM
1101         if (v3_handle_hvm_reset(info) > 0) { 
1102             continue;
1103         }
1104 #endif
1105        
1106 #ifdef V3_CONFIG_MULTIBOOT
1107         if (v3_handle_multiboot_reset(info) > 0) {
1108             continue;
1109         }
1110 #endif
1111         
1112         if (svm_handle_standard_reset(info) > 0 ) {
1113             continue;
1114         }
1115         
1116
1117
1118 #ifdef V3_CONFIG_PMU_TELEMETRY
1119         v3_pmu_telemetry_start(info);
1120 #endif
1121         
1122 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1123         v3_pwrstat_telemetry_start(info);
1124 #endif
1125         
1126         if (v3_svm_enter(info) == -1 ) {
1127             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
1128             addr_t host_addr;
1129             addr_t linear_addr = 0;
1130             
1131             info->vm_info->run_state = VM_ERROR;
1132             
1133             V3_Print(info->vm_info, info, "SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
1134             
1135             v3_print_guest_state(info);
1136             
1137             V3_Print(info->vm_info, info, "SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
1138             
1139             V3_Print(info->vm_info, info, "SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
1140             V3_Print(info->vm_info, info, "SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
1141             
1142             V3_Print(info->vm_info, info, "SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
1143             V3_Print(info->vm_info, info, "SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
1144             
1145             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
1146             
1147             if (info->mem_mode == PHYSICAL_MEM) {
1148                 if (v3_gpa_to_hva(info, linear_addr, &host_addr)) {
1149                     PrintError(info->vm_info, info, "Cannot translate address\n");
1150                     break;
1151                 }
1152             } else if (info->mem_mode == VIRTUAL_MEM) {
1153                 if (v3_gva_to_hva(info, linear_addr, &host_addr)) {
1154                     PrintError(info->vm_info, info, "Cannot translate address\n");
1155                     break;
1156                 }
1157             }
1158             
1159             V3_Print(info->vm_info, info, "SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
1160             
1161             V3_Print(info->vm_info, info, "SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
1162             v3_dump_mem((uint8_t *)host_addr, 15);
1163             
1164             v3_print_stack(info);
1165             
1166             break;
1167         }
1168         
1169         v3_wait_at_barrier(info);
1170         
1171
1172         if (info->vm_info->run_state == VM_STOPPED) {
1173             PrintDebug(info->vm_info,info,"Stopping core as VM is stopped\n");
1174             info->core_run_state = CORE_STOPPED;
1175             break;
1176         }
1177
1178         
1179
1180 /*
1181         if ((info->num_exits % 50000) == 0) {
1182             V3_Print(info->vm_info, info, "SVM Exit number %d\n", (uint32_t)info->num_exits);
1183             v3_print_guest_state(info);
1184         }
1185 */
1186         
1187     }
1188
1189 #ifdef V3_CONFIG_PMU_TELEMETRY
1190     v3_pmu_telemetry_end(info);
1191 #endif
1192
1193 #ifdef V3_CONFIG_PWRSTAT_TELEMETRY
1194     v3_pwrstat_telemetry_end(info);
1195 #endif
1196     // Need to take down the other cores on error... 
1197
1198     return 0;
1199 }
1200
1201
1202
1203
1204 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
1205     // init vmcb_bios
1206
1207     // Write the RIP, CS, and descriptor
1208     // assume the rest is already good to go
1209     //
1210     // vector VV -> rip at 0
1211     //              CS = VV00
1212     //  This means we start executing at linear address VV000
1213     //
1214     // So the selector needs to be VV00
1215     // and the base needs to be VV000
1216     //
1217     core->rip = 0;
1218     core->segments.cs.selector = rip << 8;
1219     core->segments.cs.limit = 0xffff;
1220     core->segments.cs.base = rip << 12;
1221
1222     return 0;
1223 }
1224
1225
1226
1227
1228
1229
1230 /* Checks machine SVM capability */
1231 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
1232 int v3_is_svm_capable() {
1233     uint_t vm_cr_low = 0, vm_cr_high = 0;
1234     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1235
1236     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1237   
1238     PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
1239
1240     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
1241       V3_Print(VM_NONE, VCORE_NONE,  "SVM Not Available\n");
1242       return 0;
1243     }  else {
1244         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
1245         
1246         PrintDebug(VM_NONE, VCORE_NONE, "SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
1247         
1248         if (vm_cr_low & SVM_VM_CR_MSR_svmdis) {
1249             V3_Print(VM_NONE, VCORE_NONE, "SVM is available but is disabled.\n");
1250             
1251             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1252             
1253             PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
1254             
1255             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
1256                 V3_Print(VM_NONE, VCORE_NONE,  "SVM BIOS Disabled, not unlockable\n");
1257             } else {
1258                 V3_Print(VM_NONE, VCORE_NONE,  "SVM is locked with a key\n");
1259             }
1260             return 0;
1261
1262         } else {
1263             V3_Print(VM_NONE, VCORE_NONE,  "SVM is available and  enabled.\n");
1264
1265             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1266             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
1267             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
1268             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
1269             PrintDebug(VM_NONE, VCORE_NONE, "CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
1270
1271             if (!(edx & 0x8)) { 
1272               PrintError(VM_NONE,VCORE_NONE, "WARNING: NO SVM SUPPORT FOR NRIP - SW INTR INJECTION WILL LIKELY FAIL\n");
1273             }
1274
1275             return 1;
1276         }
1277     }
1278 }
1279
1280 static int has_svm_nested_paging() {
1281     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
1282     
1283     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
1284     
1285     //PrintDebug(VM_NONE, VCORE_NONE,  "CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
1286     
1287     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
1288         V3_Print(VM_NONE, VCORE_NONE, "SVM Nested Paging not supported\n");
1289         return 0;
1290     } else {
1291         V3_Print(VM_NONE, VCORE_NONE, "SVM Nested Paging supported\n");
1292         return 1;
1293     }
1294  }
1295  
1296
1297
1298 void v3_init_svm_cpu(int cpu_id) {
1299     reg_ex_t msr;
1300     extern v3_cpu_arch_t v3_cpu_types[];
1301
1302     // Enable SVM on the CPU
1303     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
1304     msr.e_reg.low |= EFER_MSR_svm_enable;
1305     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
1306
1307     V3_Print(VM_NONE, VCORE_NONE,  "SVM Enabled\n");
1308
1309     // Setup the host state save area
1310     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4); // need not be shadow-safe, not exposed to guest
1311
1312     if (!host_vmcbs[cpu_id]) {
1313         PrintError(VM_NONE, VCORE_NONE,  "Failed to allocate VMCB\n");
1314         return;
1315     }
1316
1317     /* 64-BIT-ISSUE */
1318     //  msr.e_reg.high = 0;
1319     //msr.e_reg.low = (uint_t)host_vmcb;
1320     msr.r_reg = host_vmcbs[cpu_id];
1321
1322     PrintDebug(VM_NONE, VCORE_NONE,  "Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
1323     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
1324
1325
1326     if (has_svm_nested_paging() == 1) {
1327         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
1328     } else {
1329         v3_cpu_types[cpu_id] = V3_SVM_CPU;
1330     }
1331 }
1332
1333
1334
1335 void v3_deinit_svm_cpu(int cpu_id) {
1336     reg_ex_t msr;
1337     extern v3_cpu_arch_t v3_cpu_types[];
1338
1339     // reset SVM_VM_HSAVE_PA_MSR
1340     // Does setting it to NULL disable??
1341     msr.r_reg = 0;
1342     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
1343
1344     // Disable SVM?
1345     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
1346     msr.e_reg.low &= ~EFER_MSR_svm_enable;
1347     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
1348
1349     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
1350
1351     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
1352
1353     V3_Print(VM_NONE, VCORE_NONE,  "Host CPU %d host area freed, and SVM disabled\n", cpu_id);
1354     return;
1355 }
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406 #if 0
1407 /* 
1408  * Test VMSAVE/VMLOAD Latency 
1409  */
1410 #define vmsave ".byte 0x0F,0x01,0xDB ; "
1411 #define vmload ".byte 0x0F,0x01,0xDA ; "
1412 {
1413     uint32_t start_lo, start_hi;
1414     uint32_t end_lo, end_hi;
1415     uint64_t start, end;
1416     
1417     __asm__ __volatile__ (
1418                           "rdtsc ; "
1419                           "movl %%eax, %%esi ; "
1420                           "movl %%edx, %%edi ; "
1421                           "movq  %%rcx, %%rax ; "
1422                           vmsave
1423                           "rdtsc ; "
1424                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1425                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1426                           );
1427     
1428     start = start_hi;
1429     start <<= 32;
1430     start += start_lo;
1431     
1432     end = end_hi;
1433     end <<= 32;
1434     end += end_lo;
1435     
1436     PrintDebug(core->vm_info, core, "VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1437     
1438     __asm__ __volatile__ (
1439                           "rdtsc ; "
1440                           "movl %%eax, %%esi ; "
1441                           "movl %%edx, %%edi ; "
1442                           "movq  %%rcx, %%rax ; "
1443                           vmload
1444                           "rdtsc ; "
1445                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1446                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1447                               );
1448         
1449         start = start_hi;
1450         start <<= 32;
1451         start += start_lo;
1452
1453         end = end_hi;
1454         end <<= 32;
1455         end += end_lo;
1456
1457
1458         PrintDebug(core->vm_info, core, "VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1459     }
1460     /* End Latency Test */
1461
1462 #endif
1463
1464
1465
1466
1467
1468
1469