Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Merge branch 'devel' of palacios@newskysaw.cs.northwestern.edu:/home/palacios/palacio...
[palacios.git] / palacios / src / palacios / svm.c
1
2 /* 
3  * This file is part of the Palacios Virtual Machine Monitor developed
4  * by the V3VEE Project with funding from the United States National 
5  * Science Foundation and the Department of Energy.  
6  *
7  * The V3VEE Project is a joint project between Northwestern University
8  * and the University of New Mexico.  You can find out more at 
9  * http://www.v3vee.org
10  *
11  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
12  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved.
14  *
15  * Author: Jack Lange <jarusl@cs.northwestern.edu>
16  *
17  * This is free software.  You are permitted to use,
18  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
19  */
20
21
22 #include <palacios/svm.h>
23 #include <palacios/vmm.h>
24
25 #include <palacios/vmcb.h>
26 #include <palacios/vmm_mem.h>
27 #include <palacios/vmm_paging.h>
28 #include <palacios/svm_handler.h>
29
30 #include <palacios/vmm_debug.h>
31 #include <palacios/vm_guest_mem.h>
32
33 #include <palacios/vmm_decoder.h>
34 #include <palacios/vmm_string.h>
35 #include <palacios/vmm_lowlevel.h>
36 #include <palacios/svm_msr.h>
37
38 #include <palacios/vmm_rbtree.h>
39
40 #include <palacios/vmm_direct_paging.h>
41
42 #include <palacios/vmm_ctrl_regs.h>
43 #include <palacios/svm_io.h>
44
45 #include <palacios/vmm_sprintf.h>
46
47
48 #ifndef V3_CONFIG_DEBUG_SVM
49 #undef PrintDebug
50 #define PrintDebug(fmt, args...)
51 #endif
52
53
54 uint32_t v3_last_exit;
55
56 // This is a global pointer to the host's VMCB
57 static addr_t host_vmcbs[V3_CONFIG_MAX_CPUS] = { [0 ... V3_CONFIG_MAX_CPUS - 1] = 0};
58
59
60
61 extern void v3_stgi();
62 extern void v3_clgi();
63 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
64 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
65
66
67 static vmcb_t * Allocate_VMCB() {
68     vmcb_t * vmcb_page = NULL;
69     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
70
71     if ((void *)vmcb_pa == NULL) {
72         PrintError("Error allocating VMCB\n");
73         return NULL;
74     }
75
76     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
77
78     memset(vmcb_page, 0, 4096);
79
80     return vmcb_page;
81 }
82
83
84
85 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
86     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
87     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
88     uint_t i;
89
90
91     //
92     ctrl_area->svm_instrs.VMRUN = 1;
93     ctrl_area->svm_instrs.VMMCALL = 1;
94     ctrl_area->svm_instrs.VMLOAD = 1;
95     ctrl_area->svm_instrs.VMSAVE = 1;
96     ctrl_area->svm_instrs.STGI = 1;
97     ctrl_area->svm_instrs.CLGI = 1;
98     ctrl_area->svm_instrs.SKINIT = 1;
99     ctrl_area->svm_instrs.ICEBP = 1;
100     ctrl_area->svm_instrs.WBINVD = 1;
101     ctrl_area->svm_instrs.MONITOR = 1;
102     ctrl_area->svm_instrs.MWAIT_always = 1;
103     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
104     ctrl_area->instrs.INVLPGA = 1;
105     ctrl_area->instrs.CPUID = 1;
106
107     ctrl_area->instrs.HLT = 1;
108
109 #ifdef V3_CONFIG_TIME_VIRTUALIZE_TSC
110     ctrl_area->instrs.RDTSC = 1;
111     ctrl_area->svm_instrs.RDTSCP = 1;
112 #endif
113
114     // guest_state->cr0 = 0x00000001;    // PE 
115   
116     /*
117       ctrl_area->exceptions.de = 1;
118       ctrl_area->exceptions.df = 1;
119       
120       ctrl_area->exceptions.ts = 1;
121       ctrl_area->exceptions.ss = 1;
122       ctrl_area->exceptions.ac = 1;
123       ctrl_area->exceptions.mc = 1;
124       ctrl_area->exceptions.gp = 1;
125       ctrl_area->exceptions.ud = 1;
126       ctrl_area->exceptions.np = 1;
127       ctrl_area->exceptions.of = 1;
128       
129       ctrl_area->exceptions.nmi = 1;
130     */
131     
132
133     ctrl_area->instrs.NMI = 1;
134     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
135     ctrl_area->instrs.INIT = 1;
136     ctrl_area->instrs.PAUSE = 1;
137     ctrl_area->instrs.shutdown_evts = 1;
138
139
140     /* DEBUG FOR RETURN CODE */
141     ctrl_area->exit_code = 1;
142
143
144     /* Setup Guest Machine state */
145
146     core->vm_regs.rsp = 0x00;
147     core->rip = 0xfff0;
148
149     core->vm_regs.rdx = 0x00000f00;
150
151
152     core->cpl = 0;
153
154     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
155     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
156     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
157
158
159
160
161
162     core->segments.cs.selector = 0xf000;
163     core->segments.cs.limit = 0xffff;
164     core->segments.cs.base = 0x0000000f0000LL;
165
166     // (raw attributes = 0xf3)
167     core->segments.cs.type = 0x3;
168     core->segments.cs.system = 0x1;
169     core->segments.cs.dpl = 0x3;
170     core->segments.cs.present = 1;
171
172
173
174     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
175                                       &(core->segments.es), &(core->segments.fs), 
176                                       &(core->segments.gs), NULL};
177
178     for ( i = 0; segregs[i] != NULL; i++) {
179         struct v3_segment * seg = segregs[i];
180         
181         seg->selector = 0x0000;
182         //    seg->base = seg->selector << 4;
183         seg->base = 0x00000000;
184         seg->limit = ~0u;
185
186         // (raw attributes = 0xf3)
187         seg->type = 0x3;
188         seg->system = 0x1;
189         seg->dpl = 0x3;
190         seg->present = 1;
191     }
192
193     core->segments.gdtr.limit = 0x0000ffff;
194     core->segments.gdtr.base = 0x0000000000000000LL;
195     core->segments.idtr.limit = 0x0000ffff;
196     core->segments.idtr.base = 0x0000000000000000LL;
197
198     core->segments.ldtr.selector = 0x0000;
199     core->segments.ldtr.limit = 0x0000ffff;
200     core->segments.ldtr.base = 0x0000000000000000LL;
201     core->segments.tr.selector = 0x0000;
202     core->segments.tr.limit = 0x0000ffff;
203     core->segments.tr.base = 0x0000000000000000LL;
204
205
206     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
207     core->dbg_regs.dr7 = 0x0000000000000400LL;
208
209
210     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
211     ctrl_area->instrs.IOIO_PROT = 1;
212             
213     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
214     ctrl_area->instrs.MSR_PROT = 1;   
215
216
217     PrintDebug("Exiting on interrupts\n");
218     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
219     ctrl_area->instrs.INTR = 1;
220
221
222     v3_hook_msr(core->vm_info, EFER_MSR, 
223                 &v3_handle_efer_read,
224                 &v3_handle_efer_write, 
225                 core);
226
227     if (core->shdw_pg_mode == SHADOW_PAGING) {
228         PrintDebug("Creating initial shadow page table\n");
229         
230         /* JRL: This is a performance killer, and a simplistic solution */
231         /* We need to fix this */
232         ctrl_area->TLB_CONTROL = 1;
233         ctrl_area->guest_ASID = 1;
234         
235         
236         if (v3_init_passthrough_pts(core) == -1) {
237             PrintError("Could not initialize passthrough page tables\n");
238             return ;
239         }
240
241
242         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
243         PrintDebug("Created\n");
244         
245         core->ctrl_regs.cr0 |= 0x80000000;
246         core->ctrl_regs.cr3 = core->direct_map_pt;
247
248         ctrl_area->cr_reads.cr0 = 1;
249         ctrl_area->cr_writes.cr0 = 1;
250         //ctrl_area->cr_reads.cr4 = 1;
251         ctrl_area->cr_writes.cr4 = 1;
252         ctrl_area->cr_reads.cr3 = 1;
253         ctrl_area->cr_writes.cr3 = 1;
254
255
256
257         ctrl_area->instrs.INVLPG = 1;
258
259         ctrl_area->exceptions.pf = 1;
260
261         guest_state->g_pat = 0x7040600070406ULL;
262
263
264
265     } else if (core->shdw_pg_mode == NESTED_PAGING) {
266         // Flush the TLB on entries/exits
267         ctrl_area->TLB_CONTROL = 1;
268         ctrl_area->guest_ASID = 1;
269
270         // Enable Nested Paging
271         ctrl_area->NP_ENABLE = 1;
272
273         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
274
275         // Set the Nested Page Table pointer
276         if (v3_init_passthrough_pts(core) == -1) {
277             PrintError("Could not initialize Nested page tables\n");
278             return ;
279         }
280
281         ctrl_area->N_CR3 = core->direct_map_pt;
282
283         guest_state->g_pat = 0x7040600070406ULL;
284     }
285     
286     /* tell the guest that we don't support SVM */
287     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
288         &v3_handle_vm_cr_read,
289         &v3_handle_vm_cr_write, 
290         core);
291 }
292
293
294 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
295
296     PrintDebug("Allocating VMCB\n");
297     core->vmm_data = (void *)Allocate_VMCB();
298     
299     if (core->vmm_data == NULL) {
300         PrintError("Could not allocate VMCB, Exiting...\n");
301         return -1;
302     }
303
304     if (vm_class == V3_PC_VM) {
305         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
306         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
307     } else {
308         PrintError("Invalid VM class\n");
309         return -1;
310     }
311
312     return 0;
313 }
314
315
316 int v3_deinit_svm_vmcb(struct guest_info * core) {
317     V3_FreePages(V3_PAddr(core->vmm_data), 1);
318     return 0;
319 }
320
321
322 static int update_irq_exit_state(struct guest_info * info) {
323     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
324
325     // Fix for QEMU bug using EVENTINJ as an internal cache
326     guest_ctrl->EVENTINJ.valid = 0;
327
328     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
329         
330 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
331         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
332 #endif
333
334         info->intr_core_state.irq_started = 1;
335         info->intr_core_state.irq_pending = 0;
336
337         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
338     }
339
340     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
341 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
342         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
343 #endif
344
345         // Interrupt was taken fully vectored
346         info->intr_core_state.irq_started = 0;
347
348     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
349 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
350         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
351 #endif
352     }
353
354     return 0;
355 }
356
357
358 static int update_irq_entry_state(struct guest_info * info) {
359     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
360
361
362     if (info->intr_core_state.irq_pending == 0) {
363         guest_ctrl->guest_ctrl.V_IRQ = 0;
364         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
365     }
366     
367     if (v3_excp_pending(info)) {
368         uint_t excp = v3_get_excp_number(info);
369         
370         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
371         
372         if (info->excp_state.excp_error_code_valid) {
373             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
374             guest_ctrl->EVENTINJ.ev = 1;
375 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
376             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
377 #endif
378         }
379         
380         guest_ctrl->EVENTINJ.vector = excp;
381         
382         guest_ctrl->EVENTINJ.valid = 1;
383
384 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
385         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
386                    (int)info->num_exits, 
387                    guest_ctrl->EVENTINJ.vector, 
388                    (void *)(addr_t)info->ctrl_regs.cr2,
389                    (void *)(addr_t)info->rip);
390 #endif
391
392         v3_injecting_excp(info, excp);
393     } else if (info->intr_core_state.irq_started == 1) {
394 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
395         PrintDebug("IRQ pending from previous injection\n");
396 #endif
397         guest_ctrl->guest_ctrl.V_IRQ = 1;
398         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
399         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
400         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
401
402     } else {
403         switch (v3_intr_pending(info)) {
404             case V3_EXTERNAL_IRQ: {
405                 uint32_t irq = v3_get_intr(info);
406
407                 guest_ctrl->guest_ctrl.V_IRQ = 1;
408                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
409                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
410                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
411
412 #ifdef V3_CONFIG_DEBUG_INTERRUPTS
413                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
414                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
415                            (void *)(addr_t)info->rip);
416 #endif
417
418                 info->intr_core_state.irq_pending = 1;
419                 info->intr_core_state.irq_vector = irq;
420                 
421                 break;
422             }
423             case V3_NMI:
424                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
425                 break;
426             case V3_SOFTWARE_INTR:
427                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
428                 break;
429             case V3_VIRTUAL_IRQ:
430                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
431                 break;
432
433             case V3_INVALID_INTR:
434             default:
435                 break;
436         }
437         
438     }
439
440     return 0;
441 }
442
443
444 /* 
445  * CAUTION and DANGER!!! 
446  * 
447  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
448  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
449  * on its contents will cause things to break. The contents at the time of the exit WILL 
450  * change before the exit handler is executed.
451  */
452 int v3_svm_enter(struct guest_info * info) {
453     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
454     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
455     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
456
457     // Conditionally yield the CPU if the timeslice has expired
458     v3_yield_cond(info);
459
460     // Perform any additional yielding needed for time adjustment
461     v3_adjust_time(info);
462
463     // disable global interrupts for vm state transition
464     v3_clgi();
465
466     // Update timer devices after being in the VM, with interupts
467     // disabled, but before doing IRQ updates, so that any interrupts they 
468     //raise get seen immediately.
469     v3_update_timers(info);
470
471     // Synchronize the guest state to the VMCB
472     guest_state->cr0 = info->ctrl_regs.cr0;
473     guest_state->cr2 = info->ctrl_regs.cr2;
474     guest_state->cr3 = info->ctrl_regs.cr3;
475     guest_state->cr4 = info->ctrl_regs.cr4;
476     guest_state->dr6 = info->dbg_regs.dr6;
477     guest_state->dr7 = info->dbg_regs.dr7;
478     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
479     guest_state->rflags = info->ctrl_regs.rflags;
480     guest_state->efer = info->ctrl_regs.efer;
481     
482     guest_state->cpl = info->cpl;
483
484     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
485
486     guest_state->rax = info->vm_regs.rax;
487     guest_state->rip = info->rip;
488     guest_state->rsp = info->vm_regs.rsp;
489
490 #ifdef V3_CONFIG_SYMCALL
491     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
492         update_irq_entry_state(info);
493     }
494 #else 
495     update_irq_entry_state(info);
496 #endif
497
498
499     /* ** */
500
501     /*
502       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
503       (void *)(addr_t)info->segments.cs.base, 
504       (void *)(addr_t)info->rip);
505     */
506
507 #ifdef V3_CONFIG_SYMCALL
508     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
509         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
510             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
511         }
512     }
513 #endif
514
515     v3_time_enter_vm(info);
516     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
517
518     //V3_Print("Calling v3_svm_launch\n");
519
520     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
521
522     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
523
524     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
525
526     // Immediate exit from VM time bookkeeping
527     v3_time_exit_vm(info);
528
529     info->num_exits++;
530
531     // Save Guest state from VMCB
532     info->rip = guest_state->rip;
533     info->vm_regs.rsp = guest_state->rsp;
534     info->vm_regs.rax = guest_state->rax;
535
536     info->cpl = guest_state->cpl;
537
538     info->ctrl_regs.cr0 = guest_state->cr0;
539     info->ctrl_regs.cr2 = guest_state->cr2;
540     info->ctrl_regs.cr3 = guest_state->cr3;
541     info->ctrl_regs.cr4 = guest_state->cr4;
542     info->dbg_regs.dr6 = guest_state->dr6;
543     info->dbg_regs.dr7 = guest_state->dr7;
544     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
545     info->ctrl_regs.rflags = guest_state->rflags;
546     info->ctrl_regs.efer = guest_state->efer;
547     
548     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
549     info->cpu_mode = v3_get_vm_cpu_mode(info);
550     info->mem_mode = v3_get_vm_mem_mode(info);
551     /* ** */
552
553     // save exit info here
554     exit_code = guest_ctrl->exit_code;
555     exit_info1 = guest_ctrl->exit_info1;
556     exit_info2 = guest_ctrl->exit_info2;
557
558 #ifdef V3_CONFIG_SYMCALL
559     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
560         update_irq_exit_state(info);
561     }
562 #else
563     update_irq_exit_state(info);
564 #endif
565
566     // reenable global interrupts after vm exit
567     v3_stgi();
568  
569     // Conditionally yield the CPU if the timeslice has expired
570     v3_yield_cond(info);
571
572     {
573         int ret = v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2);
574         
575         if (ret != 0) {
576             PrintError("Error in SVM exit handler (ret=%d)\n", ret);
577             PrintError("  last Exit was %d (exit code=0x%llx)\n", v3_last_exit, (uint64_t) exit_code);
578             return -1;
579         }
580     if (v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2) != 0) {
581         PrintError("Error in SVM exit handler\n");
582         PrintError("  last exit was %d\n", v3_last_exit);
583         return -1;
584     }
585
586
587     return 0;
588 }
589
590
591 int v3_start_svm_guest(struct guest_info * info) {
592     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
593     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
594
595     PrintDebug("Starting SVM core %u (on logical core %u)\n", info->vcpu_id, info->pcpu_id);
596
597     if (info->vcpu_id == 0) {
598         info->core_run_state = CORE_RUNNING;
599         info->vm_info->run_state = VM_RUNNING;
600     } else  { 
601         PrintDebug("SVM core %u (on %u): Waiting for core initialization\n", info->vcpu_id, info->pcpu_id);
602
603         while (info->core_run_state == CORE_STOPPED) {
604             v3_yield(info);
605             //PrintDebug("SVM core %u: still waiting for INIT\n", info->vcpu_id);
606         }
607
608         PrintDebug("SVM core %u(on %u) initialized\n", info->vcpu_id, info->pcpu_id);
609     } 
610
611     PrintDebug("SVM core %u(on %u): I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
612                info->vcpu_id, info->pcpu_id, 
613                info->segments.cs.selector, (void *)(info->segments.cs.base), 
614                info->segments.cs.limit, (void *)(info->rip));
615
616
617
618     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p) (on cpu %u)\n", 
619                info->vcpu_id, (void *)info->vmm_data, info->pcpu_id);
620     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
621     
622     v3_start_time(info);
623
624     while (1) {
625
626         if (info->vm_info->run_state == VM_STOPPED) {
627             info->core_run_state = CORE_STOPPED;
628             break;
629         }
630         
631         if (v3_svm_enter(info) == -1) {
632             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
633             addr_t host_addr;
634             addr_t linear_addr = 0;
635             
636             info->vm_info->run_state = VM_ERROR;
637             
638             V3_Print("SVM core %u: SVM ERROR!!\n", info->vcpu_id); 
639             
640             v3_print_guest_state(info);
641             
642             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->vcpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
643             
644             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
645             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
646             
647             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->vcpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
648             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->vcpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
649             
650             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
651             
652             if (info->mem_mode == PHYSICAL_MEM) {
653                 v3_gpa_to_hva(info, linear_addr, &host_addr);
654             } else if (info->mem_mode == VIRTUAL_MEM) {
655                 v3_gva_to_hva(info, linear_addr, &host_addr);
656             }
657             
658             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->vcpu_id, (void *)host_addr);
659             
660             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->vcpu_id, (void *)host_addr);
661             v3_dump_mem((uint8_t *)host_addr, 15);
662             
663             v3_print_stack(info);
664
665             break;
666         }
667
668
669         if (info->vm_info->run_state == VM_STOPPED) {
670             info->core_run_state = CORE_STOPPED;
671             break;
672         }
673
674         
675
676 /*
677         if ((info->num_exits % 50000) == 0) {
678             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
679             v3_print_guest_state(info);
680         }
681 */
682         
683     }
684
685     // Need to take down the other cores on error... 
686
687     return 0;
688 }
689
690
691
692
693 int v3_reset_svm_vm_core(struct guest_info * core, addr_t rip) {
694     // init vmcb_bios
695
696     // Write the RIP, CS, and descriptor
697     // assume the rest is already good to go
698     //
699     // vector VV -> rip at 0
700     //              CS = VV00
701     //  This means we start executing at linear address VV000
702     //
703     // So the selector needs to be VV00
704     // and the base needs to be VV000
705     //
706     core->rip = 0;
707     core->segments.cs.selector = rip << 8;
708     core->segments.cs.limit = 0xffff;
709     core->segments.cs.base = rip << 12;
710
711     return 0;
712 }
713
714
715
716
717
718
719 /* Checks machine SVM capability */
720 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
721 int v3_is_svm_capable() {
722     uint_t vm_cr_low = 0, vm_cr_high = 0;
723     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
724
725     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
726   
727     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
728
729     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
730       V3_Print("SVM Not Available\n");
731       return 0;
732     }  else {
733         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
734         
735         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
736         
737         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
738             V3_Print("SVM is available but is disabled.\n");
739             
740             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
741             
742             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
743             
744             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
745                 V3_Print("SVM BIOS Disabled, not unlockable\n");
746             } else {
747                 V3_Print("SVM is locked with a key\n");
748             }
749             return 0;
750
751         } else {
752             V3_Print("SVM is available and  enabled.\n");
753
754             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
755             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
756             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
757             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
758             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
759
760             return 1;
761         }
762     }
763 }
764
765 static int has_svm_nested_paging() {
766     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
767
768     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
769
770     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
771
772     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
773         V3_Print("SVM Nested Paging not supported\n");
774         return 0;
775     } else {
776         V3_Print("SVM Nested Paging supported\n");
777         return 1;
778     }
779 }
780
781
782
783 void v3_init_svm_cpu(int cpu_id) {
784     reg_ex_t msr;
785     extern v3_cpu_arch_t v3_cpu_types[];
786
787     // Enable SVM on the CPU
788     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
789     msr.e_reg.low |= EFER_MSR_svm_enable;
790     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
791
792     V3_Print("SVM Enabled\n");
793
794     // Setup the host state save area
795     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
796
797     /* 64-BIT-ISSUE */
798     //  msr.e_reg.high = 0;
799     //msr.e_reg.low = (uint_t)host_vmcb;
800     msr.r_reg = host_vmcbs[cpu_id];
801
802     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
803     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
804
805
806     if (has_svm_nested_paging() == 1) {
807         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
808     } else {
809         v3_cpu_types[cpu_id] = V3_SVM_CPU;
810     }
811 }
812
813
814
815 void v3_deinit_svm_cpu(int cpu_id) {
816     reg_ex_t msr;
817     extern v3_cpu_arch_t v3_cpu_types[];
818
819     // reset SVM_VM_HSAVE_PA_MSR
820     // Does setting it to NULL disable??
821     msr.r_reg = 0;
822     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
823
824     // Disable SVM?
825     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
826     msr.e_reg.low &= ~EFER_MSR_svm_enable;
827     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
828
829     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
830
831     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
832
833     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
834     return;
835 }
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886 #if 0
887 /* 
888  * Test VMSAVE/VMLOAD Latency 
889  */
890 #define vmsave ".byte 0x0F,0x01,0xDB ; "
891 #define vmload ".byte 0x0F,0x01,0xDA ; "
892 {
893     uint32_t start_lo, start_hi;
894     uint32_t end_lo, end_hi;
895     uint64_t start, end;
896     
897     __asm__ __volatile__ (
898                           "rdtsc ; "
899                           "movl %%eax, %%esi ; "
900                           "movl %%edx, %%edi ; "
901                           "movq  %%rcx, %%rax ; "
902                           vmsave
903                           "rdtsc ; "
904                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
905                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
906                           );
907     
908     start = start_hi;
909     start <<= 32;
910     start += start_lo;
911     
912     end = end_hi;
913     end <<= 32;
914     end += end_lo;
915     
916     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
917     
918     __asm__ __volatile__ (
919                           "rdtsc ; "
920                           "movl %%eax, %%esi ; "
921                           "movl %%edx, %%edi ; "
922                           "movq  %%rcx, %%rax ; "
923                           vmload
924                           "rdtsc ; "
925                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
926                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
927                               );
928         
929         start = start_hi;
930         start <<= 32;
931         start += start_lo;
932
933         end = end_hi;
934         end <<= 32;
935         end += end_lo;
936
937
938         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
939     }
940     /* End Latency Test */
941
942 #endif
943
944
945
946
947
948
949
950 #if 0
951 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
952   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
953   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
954   uint_t i = 0;
955
956
957   guest_state->rsp = vm_info.vm_regs.rsp;
958   guest_state->rip = vm_info.rip;
959
960
961   /* I pretty much just gutted this from TVMM */
962   /* Note: That means its probably wrong */
963
964   // set the segment registers to mirror ours
965   guest_state->cs.selector = 1<<3;
966   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
967   guest_state->cs.attrib.fields.S = 1;
968   guest_state->cs.attrib.fields.P = 1;
969   guest_state->cs.attrib.fields.db = 1;
970   guest_state->cs.attrib.fields.G = 1;
971   guest_state->cs.limit = 0xfffff;
972   guest_state->cs.base = 0;
973   
974   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
975   for ( i = 0; segregs[i] != NULL; i++) {
976     struct vmcb_selector * seg = segregs[i];
977     
978     seg->selector = 2<<3;
979     seg->attrib.fields.type = 0x2; // Data Segment+read/write
980     seg->attrib.fields.S = 1;
981     seg->attrib.fields.P = 1;
982     seg->attrib.fields.db = 1;
983     seg->attrib.fields.G = 1;
984     seg->limit = 0xfffff;
985     seg->base = 0;
986   }
987
988
989   {
990     /* JRL THIS HAS TO GO */
991     
992     //    guest_state->tr.selector = GetTR_Selector();
993     guest_state->tr.attrib.fields.type = 0x9; 
994     guest_state->tr.attrib.fields.P = 1;
995     // guest_state->tr.limit = GetTR_Limit();
996     //guest_state->tr.base = GetTR_Base();// - 0x2000;
997     /* ** */
998   }
999
1000
1001   /* ** */
1002
1003
1004   guest_state->efer |= EFER_MSR_svm_enable;
1005   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1006   ctrl_area->svm_instrs.VMRUN = 1;
1007   guest_state->cr0 = 0x00000001;    // PE 
1008   ctrl_area->guest_ASID = 1;
1009
1010
1011   //  guest_state->cpl = 0;
1012
1013
1014
1015   // Setup exits
1016
1017   ctrl_area->cr_writes.cr4 = 1;
1018   
1019   ctrl_area->exceptions.de = 1;
1020   ctrl_area->exceptions.df = 1;
1021   ctrl_area->exceptions.pf = 1;
1022   ctrl_area->exceptions.ts = 1;
1023   ctrl_area->exceptions.ss = 1;
1024   ctrl_area->exceptions.ac = 1;
1025   ctrl_area->exceptions.mc = 1;
1026   ctrl_area->exceptions.gp = 1;
1027   ctrl_area->exceptions.ud = 1;
1028   ctrl_area->exceptions.np = 1;
1029   ctrl_area->exceptions.of = 1;
1030   ctrl_area->exceptions.nmi = 1;
1031
1032   
1033
1034   ctrl_area->instrs.IOIO_PROT = 1;
1035   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1036   
1037   {
1038     reg_ex_t tmp_reg;
1039     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1040     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1041   }
1042
1043   ctrl_area->instrs.INTR = 1;
1044
1045   
1046   {
1047     char gdt_buf[6];
1048     char idt_buf[6];
1049
1050     memset(gdt_buf, 0, 6);
1051     memset(idt_buf, 0, 6);
1052
1053
1054     uint_t gdt_base, idt_base;
1055     ushort_t gdt_limit, idt_limit;
1056     
1057     GetGDTR(gdt_buf);
1058     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1059     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1060     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1061
1062     GetIDTR(idt_buf);
1063     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1064     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1065     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1066
1067
1068     // gdt_base -= 0x2000;
1069     //idt_base -= 0x2000;
1070
1071     guest_state->gdtr.base = gdt_base;
1072     guest_state->gdtr.limit = gdt_limit;
1073     guest_state->idtr.base = idt_base;
1074     guest_state->idtr.limit = idt_limit;
1075
1076
1077   }
1078   
1079   
1080   // also determine if CPU supports nested paging
1081   /*
1082   if (vm_info.page_tables) {
1083     //   if (0) {
1084     // Flush the TLB on entries/exits
1085     ctrl_area->TLB_CONTROL = 1;
1086
1087     // Enable Nested Paging
1088     ctrl_area->NP_ENABLE = 1;
1089
1090     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1091
1092         // Set the Nested Page Table pointer
1093     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1094
1095
1096     //   ctrl_area->N_CR3 = Get_CR3();
1097     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1098
1099     guest_state->g_pat = 0x7040600070406ULL;
1100
1101     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1102     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1103     // Enable Paging
1104     //    guest_state->cr0 |= 0x80000000;
1105   }
1106   */
1107
1108 }
1109
1110
1111
1112
1113
1114 #endif
1115
1116