Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


a6a6879e96f20c969422724a1bb44adfb163608d
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38 #include <palacios/vmm_barrier.h>
39
40 #ifdef V3_CONFIG_CHECKPOINT
41 #include <palacios/vmm_checkpoint.h>
42 #endif
43
44 #include <palacios/vmm_direct_paging.h>
45
46 #include <palacios/vmm_ctrl_regs.h>
47 #include <palacios/svm_io.h>
48
49 #include <palacios/vmm_sprintf.h>
50
51
52 #ifndef V3_CONFIG_DEBUG_SVM
53 #undef PrintDebug
54 #define PrintDebug(fmt, args...)
55 #endif
56
57
58 uint32_t v3_last_exit;
59
60 // This is a global pointer to the host's VMCB
61 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
62
63
64
65 extern void v3_stgi();
66 extern void v3_clgi();
67 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
68 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
69
70
71 static vmcb_t * Allocate_VMCB() {
72     vmcb_t * vmcb_page = NULL;
73     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
74
75     if ((void *)vmcb_pa == NULL) {
76         PrintError("Error allocating VMCB\n");
77         return NULL;
78     }
79
80     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
81
82     memset(vmcb_page, 0, 4096);
83
84     return vmcb_page;
85 }
86
87
88 static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data)
89 {
90     int status;
91
92     // Call arch-independent handler
93     if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) {
94         return status;
95     }
96
97     // SVM-specific code
98     {
99         // Ensure that hardware visible EFER.SVME bit is set (SVM Enable)
100         struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer);
101         hw_efer->svme = 1;
102     }
103
104     return 0;
105 }
106
107
108 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
109     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
110     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
111     uint_t i;
112
113
114     //
115     ctrl_area->svm_instrs.VMRUN = 1;
116     ctrl_area->svm_instrs.VMMCALL = 1;
117     ctrl_area->svm_instrs.VMLOAD = 1;
118     ctrl_area->svm_instrs.VMSAVE = 1;
119     ctrl_area->svm_instrs.STGI = 1;
120     ctrl_area->svm_instrs.CLGI = 1;
121     ctrl_area->svm_instrs.SKINIT = 1;
122     ctrl_area->svm_instrs.ICEBP = 1;
123     ctrl_area->svm_instrs.WBINVD = 1;
124     ctrl_area->svm_instrs.MONITOR = 1;
125     ctrl_area->svm_instrs.MWAIT_always = 1;
126     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
127     ctrl_area->instrs.INVLPGA = 1;
128     ctrl_area->instrs.CPUID = 1;
129
130     ctrl_area->instrs.HLT = 1;
131
132 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
133     ctrl_area->instrs.RDTSC = 1;
134     ctrl_area->svm_instrs.RDTSCP = 1;
135 #endif
136
137     // guest_state->cr0 = 0x00000001;    // PE 
138   
139     /*
140       ctrl_area->exceptions.de = 1;
141       ctrl_area->exceptions.df = 1;
142       
143       ctrl_area->exceptions.ts = 1;
144       ctrl_area->exceptions.ss = 1;
145       ctrl_area->exceptions.ac = 1;
146       ctrl_area->exceptions.mc = 1;
147       ctrl_area->exceptions.gp = 1;
148       ctrl_area->exceptions.ud = 1;
149       ctrl_area->exceptions.np = 1;
150       ctrl_area->exceptions.of = 1;
151       
152       ctrl_area->exceptions.nmi = 1;
153     */
154     
155
156     ctrl_area->instrs.NMI = 1;
157     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
158     ctrl_area->instrs.INIT = 1;
159     ctrl_area->instrs.PAUSE = 1;
160     ctrl_area->instrs.shutdown_evts = 1;
161
162
163     /* DEBUG FOR RETURN CODE */
164     ctrl_area->exit_code = 1;
165
166
167     /* Setup Guest Machine state */
168
169     core->vm_regs.rsp = 0x00;
170     core->rip = 0xfff0;
171
172     core->vm_regs.rdx = 0x00000f00;
173
174
175     core->cpl = 0;
176
177     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
178     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
179     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
180
181
182
183
184
185     core->segments.cs.selector = 0xf000;
186     core->segments.cs.limit = 0xffff;
187     core->segments.cs.base = 0x0000000f0000LL;
188
189     // (raw attributes = 0xf3)
190     core->segments.cs.type = 0x3;
191     core->segments.cs.system = 0x1;
192     core->segments.cs.dpl = 0x3;
193     core->segments.cs.present = 1;
194
195
196
197     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
198                                       &(core->segments.es), &(core->segments.fs), 
199                                       &(core->segments.gs), NULL};
200
201     for ( i = 0; segregs[i] != NULL; i++) {
202         struct v3_segment * seg = segregs[i];
203         
204         seg->selector = 0x0000;
205         //    seg->base = seg->selector << 4;
206         seg->base = 0x00000000;
207         seg->limit = ~0u;
208
209         // (raw attributes = 0xf3)
210         seg->type = 0x3;
211         seg->system = 0x1;
212         seg->dpl = 0x3;
213         seg->present = 1;
214     }
215
216     core->segments.gdtr.limit = 0x0000ffff;
217     core->segments.gdtr.base = 0x0000000000000000LL;
218     core->segments.idtr.limit = 0x0000ffff;
219     core->segments.idtr.base = 0x0000000000000000LL;
220
221     core->segments.ldtr.selector = 0x0000;
222     core->segments.ldtr.limit = 0x0000ffff;
223     core->segments.ldtr.base = 0x0000000000000000LL;
224     core->segments.tr.selector = 0x0000;
225     core->segments.tr.limit = 0x0000ffff;
226     core->segments.tr.base = 0x0000000000000000LL;
227
228
229     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
230     core->dbg_regs.dr7 = 0x0000000000000400LL;
231
232
233     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
234     ctrl_area->instrs.IOIO_PROT = 1;
235             
236     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
237     ctrl_area->instrs.MSR_PROT = 1;   
238
239
240     PrintDebug("Exiting on interrupts\n");
241     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
242     ctrl_area->instrs.INTR = 1;
243
244
245     v3_hook_msr(core->vm_info, EFER_MSR, 
246                 &v3_handle_efer_read,
247                 &v3_svm_handle_efer_write, 
248                 core);
249
250     if (core->shdw_pg_mode == SHADOW_PAGING) {
251         PrintDebug("Creating initial shadow page table\n");
252         
253         /* JRL: This is a performance killer, and a simplistic solution */
254         /* We need to fix this */
255         ctrl_area->TLB_CONTROL = 1;
256         ctrl_area->guest_ASID = 1;
257         
258         
259         if (v3_init_passthrough_pts(core) == -1) {
260             PrintError("Could not initialize passthrough page tables\n");
261             return ;
262         }
263
264
265         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
266         PrintDebug("Created\n");
267         
268         core->ctrl_regs.cr0 |= 0x80000000;
269         core->ctrl_regs.cr3 = core->direct_map_pt;
270
271         ctrl_area->cr_reads.cr0 = 1;
272         ctrl_area->cr_writes.cr0 = 1;
273         //ctrl_area->cr_reads.cr4 = 1;
274         ctrl_area->cr_writes.cr4 = 1;
275         ctrl_area->cr_reads.cr3 = 1;
276         ctrl_area->cr_writes.cr3 = 1;
277
278
279
280         ctrl_area->instrs.INVLPG = 1;
281
282         ctrl_area->exceptions.pf = 1;
283
284         guest_state->g_pat = 0x7040600070406ULL;
285
286
287
288     } else if (core->shdw_pg_mode == NESTED_PAGING) {
289         // Flush the TLB on entries/exits
290         ctrl_area->TLB_CONTROL = 1;
291         ctrl_area->guest_ASID = 1;
292
293         // Enable Nested Paging
294         ctrl_area->NP_ENABLE = 1;
295
296         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
297
298         // Set the Nested Page Table pointer
299         if (v3_init_passthrough_pts(core) == -1) {
300             PrintError("Could not initialize Nested page tables\n");
301             return ;
302         }
303
304         ctrl_area->N_CR3 = core->direct_map_pt;
305
306         guest_state->g_pat = 0x7040600070406ULL;
307     }
308     
309     /* tell the guest that we don't support SVM */
310     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
311         &v3_handle_vm_cr_read,
312         &v3_handle_vm_cr_write, 
313         core);
314 }
315
316
317 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
318
319     PrintDebug("Allocating VMCB\n");
320     core->vmm_data = (void *)Allocate_VMCB();
321     
322     if (core->vmm_data == NULL) {
323         PrintError("Could not allocate VMCB, Exiting...\n");
324         return -1;
325     }
326
327     if (vm_class == V3_PC_VM) {
328         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
329         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
330     } else {
331         PrintError("Invalid VM class\n");
332         return -1;
333     }
334
335     return 0;
336 }
337
338
339 int v3_deinit_svm_vmcb(struct guest_info * core) {
340     V3_FreePages(V3_PAddr(core->vmm_data), 1);
341     return 0;
342 }
343
344
345 #ifdef V3_CONFIG_CHECKPOINT
346 int v3_svm_save_core(struct guest_info * core, void * ctx){
347
348     v3_chkpt_save(ctx, "vmcb_data", PAGE_SIZE, core->vmm_data);
349
350     return 0;
351 }
352
353 int v3_svm_patch_core(struct guest_info * core, void * chkpt_ctx){
354     struct cr0_32 * shadow_cr0;
355     vmcb_saved_state_t * guest_state; 
356     vmcb_ctrl_t * guest_ctrl;
357
358
359
360     if (v3_chkpt_load(chkpt_ctx, "vmcb_data", PAGE_SIZE, core->vmm_data) == -1){
361         return -1;
362     }
363
364     guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t *)(core->vmm_data));
365     guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t *)(core->vmm_data));
366
367         
368     core->rip = guest_state->rip;
369     core->vm_regs.rsp = guest_state->rsp;
370     core->vm_regs.rax = guest_state->rax;
371
372     core->cpl = guest_state->cpl;
373
374     core->ctrl_regs.cr0 = guest_state->cr0;
375     core->ctrl_regs.cr2 = guest_state->cr2;
376     core->ctrl_regs.cr4 = guest_state->cr4;
377     core->dbg_regs.dr6 = guest_state->dr6;
378     core->dbg_regs.dr7 = guest_state->dr7;
379     core->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
380     core->ctrl_regs.rflags = guest_state->rflags;
381     core->ctrl_regs.efer = guest_state->efer;
382
383                 
384     shadow_cr0 = (struct cr0_32 *)&(core->ctrl_regs.cr0);
385
386
387     if (core->shdw_pg_mode == SHADOW_PAGING) {
388         if (shadow_cr0->pg){
389             if (v3_activate_passthrough_pt(core) == -1) {
390                 PrintError("Failed to activate passthrough page tables\n");
391                 return -1;
392             }
393         }
394     }
395
396
397     v3_get_vmcb_segments((vmcb_t*)(core->vmm_data), &(core->segments));
398     return 0;
399 }
400 #endif
401
402 static int update_irq_exit_state(struct guest_info * info) {
403     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
404
405     // Fix for QEMU bug using EVENTINJ as an internal cache
406     guest_ctrl->EVENTINJ.valid = 0;
407
408     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
409         
410 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
411         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
412 #endif
413
414         info->intr_core_state.irq_started = 1;
415         info->intr_core_state.irq_pending = 0;
416
417         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
418     }
419
420     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
421 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
422         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
423 #endif
424
425         // Interrupt was taken fully vectored
426         info->intr_core_state.irq_started = 0;
427
428     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
429 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
430         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
431 #endif
432     }
433
434     return 0;
435 }
436
437
438 static int update_irq_entry_state(struct guest_info * info) {
439     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
440
441
442     if (info->intr_core_state.irq_pending == 0) {
443         guest_ctrl->guest_ctrl.V_IRQ = 0;
444         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
445     }
446     
447     if (v3_excp_pending(info)) {
448         uint_t excp = v3_get_excp_number(info);
449         
450         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
451         
452         if (info->excp_state.excp_error_code_valid) {
453             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
454             guest_ctrl->EVENTINJ.ev = 1;
455 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
456             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
457 #endif
458         }
459         
460         guest_ctrl->EVENTINJ.vector = excp;
461         
462         guest_ctrl->EVENTINJ.valid = 1;
463
464 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
465         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
466                    (int)info->num_exits, 
467                    guest_ctrl->EVENTINJ.vector, 
468                    (void *)(addr_t)info->ctrl_regs.cr2,
469                    (void *)(addr_t)info->rip);
470 #endif
471
472         v3_injecting_excp(info, excp);
473     } else if (info->intr_core_state.irq_started == 1) {
474 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
475         PrintDebug("IRQ pending from previous injection\n");
476 #endif
477         guest_ctrl->guest_ctrl.V_IRQ = 1;
478         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
479         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
480         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
481
482     } else {
483         switch (v3_intr_pending(info)) {
484             case V3_EXTERNAL_IRQ: {
485                 uint32_t irq = v3_get_intr(info);
486
487                 guest_ctrl->guest_ctrl.V_IRQ = 1;
488                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
489                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
490                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
491
492 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
493                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
494                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
495                            (void *)(addr_t)info->rip);
496 #endif
497
498                 info->intr_core_state.irq_pending = 1;
499                 info->intr_core_state.irq_vector = irq;
500                 
501                 break;
502             }
503             case V3_NMI:
504                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
505                 break;
506             case V3_SOFTWARE_INTR:
507                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
508
509 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
510                 PrintDebug("Injecting software interrupt --  type: %d, vector: %d\n", 
511                            SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
512 #endif
513                 guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
514                 guest_ctrl->EVENTINJ.valid = 1;
515             
516                 /* reset swintr state */
517                 info->intr_core_state.swintr_posted = 0;
518                 info->intr_core_state.swintr_vector = 0;
519                 
520                 break;
521             case V3_VIRTUAL_IRQ:
522                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
523                 break;
524
525             case V3_INVALID_INTR:
526             default:
527                 break;
528         }
529         
530     }
531
532     return 0;
533 }
534
535
536 /* 
537  * CAUTION and DANGER!!! 
538  * 
539  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
540  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
541  * on its contents will cause things to break. The contents at the time of the exit WILL 
542  * change before the exit handler is executed.
543  */
544 int v3_svm_enter(struct guest_info * info) {
545     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
546     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
547     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
548
549     // Conditionally yield the CPU if the timeslice has expired
550     v3_yield_cond(info);
551
552     // Perform any additional yielding needed for time adjustment
553     v3_adjust_time(info);
554
555     // disable global interrupts for vm state transition
556     v3_clgi();
557
558     // Update timer devices after being in the VM, with interupts
559     // disabled, but before doing IRQ updates, so that any interrupts they 
560     //raise get seen immediately.
561     v3_update_timers(info);
562
563     // Synchronize the guest state to the VMCB
564     guest_state->cr0 = info->ctrl_regs.cr0;
565     guest_state->cr2 = info->ctrl_regs.cr2;
566     guest_state->cr3 = info->ctrl_regs.cr3;
567     guest_state->cr4 = info->ctrl_regs.cr4;
568     guest_state->dr6 = info->dbg_regs.dr6;
569     guest_state->dr7 = info->dbg_regs.dr7;
570     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
571     guest_state->rflags = info->ctrl_regs.rflags;
572     guest_state->efer = info->ctrl_regs.efer;
573     
574     guest_state->cpl = info->cpl;
575
576     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
577
578     guest_state->rax = info->vm_regs.rax;
579     guest_state->rip = info->rip;
580     guest_state->rsp = info->vm_regs.rsp;
581
582 #ifdef V3_CONFIG_SYMCALL
583     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
584         update_irq_entry_state(info);
585     }
586 #else 
587     update_irq_entry_state(info);
588 #endif
589
590
591     /* ** */
592
593     /*
594       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
595       (void *)(addr_t)info->segments.cs.base, 
596       (void *)(addr_t)info->rip);
597     */
598
599 #ifdef V3_CONFIG_SYMCALL
600     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
601         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
602             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
603         }
604     }
605 #endif
606
607     v3_time_enter_vm(info);
608     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
609
610
611     //V3_Print("Calling v3_svm_launch\n");
612
613     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
614
615     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
616
617     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
618
619     // Immediate exit from VM time bookkeeping
620     v3_time_exit_vm(info);
621
622     info->num_exits++;
623
624     // Save Guest state from VMCB
625     info->rip = guest_state->rip;
626     info->vm_regs.rsp = guest_state->rsp;
627     info->vm_regs.rax = guest_state->rax;
628
629     info->cpl = guest_state->cpl;
630
631     info->ctrl_regs.cr0 = guest_state->cr0;
632     info->ctrl_regs.cr2 = guest_state->cr2;
633     info->ctrl_regs.cr3 = guest_state->cr3;
634     info->ctrl_regs.cr4 = guest_state->cr4;
635     info->dbg_regs.dr6 = guest_state->dr6;
636     info->dbg_regs.dr7 = guest_state->dr7;
637     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
638     info->ctrl_regs.rflags = guest_state->rflags;
639     info->ctrl_regs.efer = guest_state->efer;
640     
641     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
642     info->cpu_mode = v3_get_vm_cpu_mode(info);
643     info->mem_mode = v3_get_vm_mem_mode(info);
644     /* ** */
645
646     // save exit info here
647     exit_code = guest_ctrl->exit_code;
648     exit_info1 = guest_ctrl->exit_info1;
649     exit_info2 = guest_ctrl->exit_info2;
650
651 #ifdef V3_CONFIG_SYMCALL
652     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
653         update_irq_exit_state(info);
654     }
655 #else
656     update_irq_exit_state(info);
657 #endif
658
659     // reenable global interrupts after vm exit
660     v3_stgi();
661  
662     // Conditionally yield the CPU if the timeslice has expired
663     v3_yield_cond(info);
664
665     {
666         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
667         
668         if (ret != 0) {
669             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
670             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
671             return -1;
672         }
673     }
674
675
676     return 0;
677 }
678
679
680 int v3_start_svm_guest(struct guest_info * info) {
681     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
682     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
683
684     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
685
686     if (info->vcpu_id == 0) {
687         info->core_run_state = CORE_RUNNING;
688         info->vm_info->run_state = VM_RUNNING;
689     } else  { 
690         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
691
692         while (info->core_run_state == CORE_STOPPED) {
693             v3_yield(info);
694             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
695         }
696
697         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
698     } 
699
700     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
701                info->vcpu_id, info->pcpu_id, 
702                info->segments.cs.selector, (void *)(info->segments.cs.base), 
703                info->segments.cs.limit, (void *)(info->rip));
704
705
706
707     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
708                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
709     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
710     
711     v3_start_time(info);
712
713     while (1) {
714
715         if (info->vm_info->run_state == VM_STOPPED) {
716             info->core_run_state = CORE_STOPPED;
717             break;
718         }
719         
720         if (v3_svm_enter(info) == -1) {
721             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
722             addr_t host_addr;
723             addr_t linear_addr = 0;
724             
725             info->vm_info->run_state = VM_ERROR;
726             
727             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
728             
729             v3_print_guest_state(info);
730             
731             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
732             
733             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
734             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
735             
736             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
737             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
738             
739             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
740             
741             if (info->mem_mode == PHYSICAL_MEM) {
742                 v3_gpa_to_hva(info, linear_addr, &host_addr);
743             } else if (info->mem_mode == VIRTUAL_MEM) {
744                 v3_gva_to_hva(info, linear_addr, &host_addr);
745             }
746             
747             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
748             
749             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
750             v3_dump_mem((uint8_t *)host_addr, 15);
751             
752             v3_print_stack(info);
753
754             break;
755         }
756
757         v3_wait_at_barrier(info);
758
759
760         if (info->vm_info->run_state == VM_STOPPED) {
761             info->core_run_state = CORE_STOPPED;
762             break;
763         }
764
765         
766
767 /*
768         if ((info->num_exits % 50000) == 0) {
769             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
770             v3_print_guest_state(info);
771         }
772 */
773         
774     }
775
776     // Need to take down the other cores on error... 
777
778     return 0;
779 }
780
781
782
783
784 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
785     // init vmcb_bios
786
787     // Write the RIP, CS, and descriptor
788     // assume the rest is already good to go
789     //
790     // vector VV -> rip at 0
791     //              CS = VV00
792     //  This means we start executing at linear address VV000
793     //
794     // So the selector needs to be VV00
795     // and the base needs to be VV000
796     //
797     core->rip = 0;
798     core->segments.cs.selector = rip << 8;
799     core->segments.cs.limit = 0xffff;
800     core->segments.cs.base = rip << 12;
801
802     return 0;
803 }
804
805
806
807
808
809
810 /* Checks machine SVM capability */
811 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
812 int v3_is_svm_capable() {
813     uint_t vm_cr_low = 0, vm_cr_high = 0;
814     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
815
816     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
817   
818     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
819
820     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
821       V3_Print("SVM Not Available\n");
822       return 0;
823     }  else {
824         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
825         
826         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
827         
828         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
829             V3_Print("SVM is available but is disabled.\n");
830             
831             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
832             
833             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
834             
835             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
836                 V3_Print("SVM BIOS Disabled, not unlockable\n");
837             } else {
838                 V3_Print("SVM is locked with a key\n");
839             }
840             return 0;
841
842         } else {
843             V3_Print("SVM is available and  enabled.\n");
844
845             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
846             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
847             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
848             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
849             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
850
851             return 1;
852         }
853     }
854 }
855
856 static int has_svm_nested_paging() {
857     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
858     
859     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
860     
861     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
862     
863     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
864         V3_Print("SVM Nested Paging not supported\n");
865         return 0;
866     } else {
867         V3_Print("SVM Nested Paging supported\n");
868         return 1;
869     }
870  }
871  
872
873
874 void v3_init_svm_cpu(int cpu_id) {
875     reg_ex_t msr;
876     extern v3_cpu_arch_t v3_cpu_types[];
877
878     // Enable SVM on the CPU
879     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
880     msr.e_reg.low |= EFER_MSR_svm_enable;
881     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
882
883     V3_Print("SVM Enabled\n");
884
885     // Setup the host state save area
886     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
887
888     /* 64-BIT-ISSUE */
889     //  msr.e_reg.high = 0;
890     //msr.e_reg.low = (uint_t)host_vmcb;
891     msr.r_reg = host_vmcbs[cpu_id];
892
893     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
894     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
895
896
897     if (has_svm_nested_paging() == 1) {
898         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
899     } else {
900         v3_cpu_types[cpu_id] = V3_SVM_CPU;
901     }
902 }
903
904
905
906 void v3_deinit_svm_cpu(int cpu_id) {
907     reg_ex_t msr;
908     extern v3_cpu_arch_t v3_cpu_types[];
909
910     // reset SVM_VM_HSAVE_PA_MSR
911     // Does setting it to NULL disable??
912     msr.r_reg = 0;
913     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
914
915     // Disable SVM?
916     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
917     msr.e_reg.low &= ~EFER_MSR_svm_enable;
918     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
919
920     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
921
922     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
923
924     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
925     return;
926 }
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977 #if 0
978 /* 
979  * Test VMSAVE/VMLOAD Latency 
980  */
981 #define vmsave ".byte 0x0F,0x01,0xDB ; "
982 #define vmload ".byte 0x0F,0x01,0xDA ; "
983 {
984     uint32_t start_lo, start_hi;
985     uint32_t end_lo, end_hi;
986     uint64_t start, end;
987     
988     __asm__ __volatile__ (
989                           "rdtsc ; "
990                           "movl %%eax, %%esi ; "
991                           "movl %%edx, %%edi ; "
992                           "movq  %%rcx, %%rax ; "
993                           vmsave
994                           "rdtsc ; "
995                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
996                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
997                           );
998     
999     start = start_hi;
1000     start <<= 32;
1001     start += start_lo;
1002     
1003     end = end_hi;
1004     end <<= 32;
1005     end += end_lo;
1006     
1007     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
1008     
1009     __asm__ __volatile__ (
1010                           "rdtsc ; "
1011                           "movl %%eax, %%esi ; "
1012                           "movl %%edx, %%edi ; "
1013                           "movq  %%rcx, %%rax ; "
1014                           vmload
1015                           "rdtsc ; "
1016                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
1017                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
1018                               );
1019         
1020         start = start_hi;
1021         start <<= 32;
1022         start += start_lo;
1023
1024         end = end_hi;
1025         end <<= 32;
1026         end += end_lo;
1027
1028
1029         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
1030     }
1031     /* End Latency Test */
1032
1033 #endif
1034
1035
1036
1037
1038
1039
1040
1041 #if 0
1042 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
1043   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
1044   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
1045   uint_t i = 0;
1046
1047
1048   guest_state->rsp = vm_info.vm_regs.rsp;
1049   guest_state->rip = vm_info.rip;
1050
1051
1052   /* I pretty much just gutted this from TVMM */
1053   /* Note: That means its probably wrong */
1054
1055   // set the segment registers to mirror ours
1056   guest_state->cs.selector = 1<<3;
1057   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
1058   guest_state->cs.attrib.fields.S = 1;
1059   guest_state->cs.attrib.fields.P = 1;
1060   guest_state->cs.attrib.fields.db = 1;
1061   guest_state->cs.attrib.fields.G = 1;
1062   guest_state->cs.limit = 0xfffff;
1063   guest_state->cs.base = 0;
1064   
1065   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
1066   for ( i = 0; segregs[i] != NULL; i++) {
1067     struct vmcb_selector * seg = segregs[i];
1068     
1069     seg->selector = 2<<3;
1070     seg->attrib.fields.type = 0x2; // Data Segment+read/write
1071     seg->attrib.fields.S = 1;
1072     seg->attrib.fields.P = 1;
1073     seg->attrib.fields.db = 1;
1074     seg->attrib.fields.G = 1;
1075     seg->limit = 0xfffff;
1076     seg->base = 0;
1077   }
1078
1079
1080   {
1081     /* JRL THIS HAS TO GO */
1082     
1083     //    guest_state->tr.selector = GetTR_Selector();
1084     guest_state->tr.attrib.fields.type = 0x9; 
1085     guest_state->tr.attrib.fields.P = 1;
1086     // guest_state->tr.limit = GetTR_Limit();
1087     //guest_state->tr.base = GetTR_Base();// - 0x2000;
1088     /* ** */
1089   }
1090
1091
1092   /* ** */
1093
1094
1095   guest_state->efer |= EFER_MSR_svm_enable;
1096   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1097   ctrl_area->svm_instrs.VMRUN = 1;
1098   guest_state->cr0 = 0x00000001;    // PE 
1099   ctrl_area->guest_ASID = 1;
1100
1101
1102   //  guest_state->cpl = 0;
1103
1104
1105
1106   // Setup exits
1107
1108   ctrl_area->cr_writes.cr4 = 1;
1109   
1110   ctrl_area->exceptions.de = 1;
1111   ctrl_area->exceptions.df = 1;
1112   ctrl_area->exceptions.pf = 1;
1113   ctrl_area->exceptions.ts = 1;
1114   ctrl_area->exceptions.ss = 1;
1115   ctrl_area->exceptions.ac = 1;
1116   ctrl_area->exceptions.mc = 1;
1117   ctrl_area->exceptions.gp = 1;
1118   ctrl_area->exceptions.ud = 1;
1119   ctrl_area->exceptions.np = 1;
1120   ctrl_area->exceptions.of = 1;
1121   ctrl_area->exceptions.nmi = 1;
1122
1123   
1124
1125   ctrl_area->instrs.IOIO_PROT = 1;
1126   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1127   
1128   {
1129     reg_ex_t tmp_reg;
1130     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1131     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1132   }
1133
1134   ctrl_area->instrs.INTR = 1;
1135
1136   
1137   {
1138     char gdt_buf[6];
1139     char idt_buf[6];
1140
1141     memset(gdt_buf, 0, 6);
1142     memset(idt_buf, 0, 6);
1143
1144
1145     uint_t gdt_base, idt_base;
1146     ushort_t gdt_limit, idt_limit;
1147     
1148     GetGDTR(gdt_buf);
1149     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1150     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1151     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1152
1153     GetIDTR(idt_buf);
1154     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1155     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1156     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1157
1158
1159     // gdt_base -= 0x2000;
1160     //idt_base -= 0x2000;
1161
1162     guest_state->gdtr.base = gdt_base;
1163     guest_state->gdtr.limit = gdt_limit;
1164     guest_state->idtr.base = idt_base;
1165     guest_state->idtr.limit = idt_limit;
1166
1167
1168   }
1169   
1170   
1171   // also determine if CPU supports nested paging
1172   /*
1173   if (vm_info.page_tables) {
1174     //   if (0) {
1175     // Flush the TLB on entries/exits
1176     ctrl_area->TLB_CONTROL = 1;
1177
1178     // Enable Nested Paging
1179     ctrl_area->NP_ENABLE = 1;
1180
1181     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1182
1183         // Set the Nested Page Table pointer
1184     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1185
1186
1187     //   ctrl_area->N_CR3 = Get_CR3();
1188     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1189
1190     guest_state->g_pat = 0x7040600070406ULL;
1191
1192     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1193     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1194     // Enable Paging
1195     //    guest_state->cr0 |= 0x80000000;
1196   }
1197   */
1198
1199 }
1200
1201
1202
1203
1204
1205 #endif
1206
1207