Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


added ability to inject an env variable into guest user process
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38
39 #include <palacios/vmm_direct_paging.h>
40
41 #include <palacios/vmm_ctrl_regs.h>
42 #include <palacios/svm_io.h>
43
44 #include <palacios/vmm_sprintf.h>
45
46
47 #ifndef CONFIG_DEBUG_SVM
48 #undef PrintDebug
49 #define PrintDebug(fmt, args...)
50 #endif
51
52
53 uint32_t v3_last_exit;
54
55 // This is a global pointer to the host's VMCB
56 static addr_t host_vmcbs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
57
58
59
60 extern void v3_stgi();
61 extern void v3_clgi();
62 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
63 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
64
65
66 static vmcb_t * Allocate_VMCB() {
67     vmcb_t * vmcb_page = NULL;
68     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
69
70     if ((void *)vmcb_pa == NULL) {
71         PrintError("Error allocating VMCB\n");
72         return NULL;
73     }
74
75     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
76
77     memset(vmcb_page, 0, 4096);
78
79     return vmcb_page;
80 }
81
82
83
84 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
85     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
86     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
87     uint_t i;
88
89
90     //
91     ctrl_area->svm_instrs.VMRUN = 1;
92     ctrl_area->svm_instrs.VMMCALL = 1;
93     ctrl_area->svm_instrs.VMLOAD = 1;
94     ctrl_area->svm_instrs.VMSAVE = 1;
95     ctrl_area->svm_instrs.STGI = 1;
96     ctrl_area->svm_instrs.CLGI = 1;
97     ctrl_area->svm_instrs.SKINIT = 1;
98     ctrl_area->svm_instrs.ICEBP = 1;
99     ctrl_area->svm_instrs.WBINVD = 1;
100     ctrl_area->svm_instrs.MONITOR = 1;
101     ctrl_area->svm_instrs.MWAIT_always = 1;
102     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
103     ctrl_area->instrs.INVLPGA = 1;
104     ctrl_area->instrs.CPUID = 1;
105
106     ctrl_area->instrs.HLT = 1;
107
108 #ifdef CONFIG_TIME_VIRTUALIZE_TSC
109     ctrl_area->instrs.RDTSC = 1;
110     ctrl_area->svm_instrs.RDTSCP = 1;
111 #endif
112
113     // guest_state->cr0 = 0x00000001;    // PE 
114   
115     /*
116       ctrl_area->exceptions.de = 1;
117       ctrl_area->exceptions.df = 1;
118       
119       ctrl_area->exceptions.ts = 1;
120       ctrl_area->exceptions.ss = 1;
121       ctrl_area->exceptions.ac = 1;
122       ctrl_area->exceptions.mc = 1;
123       ctrl_area->exceptions.gp = 1;
124       ctrl_area->exceptions.ud = 1;
125       ctrl_area->exceptions.np = 1;
126       ctrl_area->exceptions.of = 1;
127       
128       ctrl_area->exceptions.nmi = 1;
129     */
130     
131
132     ctrl_area->instrs.NMI = 1;
133     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
134     ctrl_area->instrs.INIT = 1;
135     ctrl_area->instrs.PAUSE = 1;
136     ctrl_area->instrs.shutdown_evts = 1;
137
138     /* KCH: intercept SW Interrupts (INT instr) */
139 #ifdef CONFIG_SW_INTERRUPTS
140     ctrl_area->instrs.INTn = 1;
141 #endif
142
143
144     /* DEBUG FOR RETURN CODE */
145     ctrl_area->exit_code = 1;
146
147
148     /* Setup Guest Machine state */
149
150     core->vm_regs.rsp = 0x00;
151     core->rip = 0xfff0;
152
153     core->vm_regs.rdx = 0x00000f00;
154
155
156     core->cpl = 0;
157
158     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
159     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
160     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
161
162
163
164
165
166     core->segments.cs.selector = 0xf000;
167     core->segments.cs.limit = 0xffff;
168     core->segments.cs.base = 0x0000000f0000LL;
169
170     // (raw attributes = 0xf3)
171     core->segments.cs.type = 0x3;
172     core->segments.cs.system = 0x1;
173     core->segments.cs.dpl = 0x3;
174     core->segments.cs.present = 1;
175
176
177
178     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
179                                       &(core->segments.es), &(core->segments.fs), 
180                                       &(core->segments.gs), NULL};
181
182     for ( i = 0; segregs[i] != NULL; i++) {
183         struct v3_segment * seg = segregs[i];
184         
185         seg->selector = 0x0000;
186         //    seg->base = seg->selector << 4;
187         seg->base = 0x00000000;
188         seg->limit = ~0u;
189
190         // (raw attributes = 0xf3)
191         seg->type = 0x3;
192         seg->system = 0x1;
193         seg->dpl = 0x3;
194         seg->present = 1;
195     }
196
197     core->segments.gdtr.limit = 0x0000ffff;
198     core->segments.gdtr.base = 0x0000000000000000LL;
199     core->segments.idtr.limit = 0x0000ffff;
200     core->segments.idtr.base = 0x0000000000000000LL;
201
202     core->segments.ldtr.selector = 0x0000;
203     core->segments.ldtr.limit = 0x0000ffff;
204     core->segments.ldtr.base = 0x0000000000000000LL;
205     core->segments.tr.selector = 0x0000;
206     core->segments.tr.limit = 0x0000ffff;
207     core->segments.tr.base = 0x0000000000000000LL;
208
209
210     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
211     core->dbg_regs.dr7 = 0x0000000000000400LL;
212
213
214     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
215     ctrl_area->instrs.IOIO_PROT = 1;
216             
217     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
218     ctrl_area->instrs.MSR_PROT = 1;   
219
220
221     PrintDebug("Exiting on interrupts\n");
222     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
223     ctrl_area->instrs.INTR = 1;
224
225
226     v3_hook_msr(core->vm_info, EFER_MSR, 
227                 &v3_handle_efer_read,
228                 &v3_handle_efer_write, 
229                 core);
230
231 #ifdef CONFIG_HIJACK_MSR
232     /* KCH: for syscall hijacking */
233     v3_hook_msr(core->vm_info, STAR_MSR,
234         &v3_handle_star_read,
235         &v3_handle_star_write,
236         core);
237     v3_hook_msr(core->vm_info, LSTAR_MSR,
238         &v3_handle_lstar_read,
239         &v3_handle_lstar_write,
240         core);
241     v3_hook_msr(core->vm_info, CSTAR_MSR,
242         &v3_handle_cstar_read,
243         &v3_handle_cstar_write,
244         core);
245 #endif
246
247     if (core->shdw_pg_mode == SHADOW_PAGING) {
248         PrintDebug("Creating initial shadow page table\n");
249         
250         /* JRL: This is a performance killer, and a simplistic solution */
251         /* We need to fix this */
252         ctrl_area->TLB_CONTROL = 1;
253         ctrl_area->guest_ASID = 1;
254         
255         
256         if (v3_init_passthrough_pts(core) == -1) {
257             PrintError("Could not initialize passthrough page tables\n");
258             return ;
259         }
260
261
262         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
263         PrintDebug("Created\n");
264         
265         core->ctrl_regs.cr0 |= 0x80000000;
266         core->ctrl_regs.cr3 = core->direct_map_pt;
267
268         ctrl_area->cr_reads.cr0 = 1;
269         ctrl_area->cr_writes.cr0 = 1;
270         //ctrl_area->cr_reads.cr4 = 1;
271         ctrl_area->cr_writes.cr4 = 1;
272         ctrl_area->cr_reads.cr3 = 1;
273         ctrl_area->cr_writes.cr3 = 1;
274
275
276
277         ctrl_area->instrs.INVLPG = 1;
278
279         ctrl_area->exceptions.pf = 1;
280
281         guest_state->g_pat = 0x7040600070406ULL;
282
283
284
285     } else if (core->shdw_pg_mode == NESTED_PAGING) {
286         // Flush the TLB on entries/exits
287         ctrl_area->TLB_CONTROL = 1;
288         ctrl_area->guest_ASID = 1;
289
290         // Enable Nested Paging
291         ctrl_area->NP_ENABLE = 1;
292
293         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
294
295         // Set the Nested Page Table pointer
296         if (v3_init_passthrough_pts(core) == -1) {
297             PrintError("Could not initialize Nested page tables\n");
298             return ;
299         }
300
301         ctrl_area->N_CR3 = core->direct_map_pt;
302
303         guest_state->g_pat = 0x7040600070406ULL;
304     }
305     
306     /* tell the guest that we don't support SVM */
307     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
308         &v3_handle_vm_cr_read,
309         &v3_handle_vm_cr_write, 
310         core);
311 }
312
313
314 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
315
316     PrintDebug("Allocating VMCB\n");
317     core->vmm_data = (void *)Allocate_VMCB();
318     
319     if (core->vmm_data == NULL) {
320         PrintError("Could not allocate VMCB, Exiting...\n");
321         return -1;
322     }
323
324     if (vm_class == V3_PC_VM) {
325         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
326         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
327     } else {
328         PrintError("Invalid VM class\n");
329         return -1;
330     }
331
332     return 0;
333 }
334
335
336 int v3_deinit_svm_vmcb(struct guest_info * core) {
337     V3_FreePages(V3_PAddr(core->vmm_data), 1);
338     return 0;
339 }
340
341
342 static int update_irq_exit_state(struct guest_info * info) {
343     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
344
345     // Fix for QEMU bug using EVENTINJ as an internal cache
346     guest_ctrl->EVENTINJ.valid = 0;
347
348     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
349         
350 #ifdef CONFIG_DEBUG_INTERRUPTS
351         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
352 #endif
353
354         info->intr_core_state.irq_started = 1;
355         info->intr_core_state.irq_pending = 0;
356
357         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
358     }
359
360     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
361 #ifdef CONFIG_DEBUG_INTERRUPTS
362         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
363 #endif
364
365         // Interrupt was taken fully vectored
366         info->intr_core_state.irq_started = 0;
367
368     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
369 #ifdef CONFIG_DEBUG_INTERRUPTS
370         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
371 #endif
372     }
373
374     return 0;
375 }
376
377
378 static int update_irq_entry_state(struct guest_info * info) {
379     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
380
381
382     if (info->intr_core_state.irq_pending == 0) {
383         guest_ctrl->guest_ctrl.V_IRQ = 0;
384         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
385     }
386     
387     if (v3_excp_pending(info)) {
388         uint_t excp = v3_get_excp_number(info);
389         
390         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
391         
392         if (info->excp_state.excp_error_code_valid) {
393             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
394             guest_ctrl->EVENTINJ.ev = 1;
395 #ifdef CONFIG_DEBUG_INTERRUPTS
396             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
397 #endif
398         }
399         
400         guest_ctrl->EVENTINJ.vector = excp;
401         
402         guest_ctrl->EVENTINJ.valid = 1;
403
404 #ifdef CONFIG_DEBUG_INTERRUPTS
405         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
406                    (int)info->num_exits, 
407                    guest_ctrl->EVENTINJ.vector, 
408                    (void *)(addr_t)info->ctrl_regs.cr2,
409                    (void *)(addr_t)info->rip);
410 #endif
411
412         v3_injecting_excp(info, excp);
413     } else if (info->intr_core_state.irq_started == 1) {
414 #ifdef CONFIG_DEBUG_INTERRUPTS
415         PrintDebug("IRQ pending from previous injection\n");
416 #endif
417         guest_ctrl->guest_ctrl.V_IRQ = 1;
418         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
419         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
420         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
421
422     } else {
423         switch (v3_intr_pending(info)) {
424             case V3_EXTERNAL_IRQ: {
425                 uint32_t irq = v3_get_intr(info);
426
427                 guest_ctrl->guest_ctrl.V_IRQ = 1;
428                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
429                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
430                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
431
432 #ifdef CONFIG_DEBUG_INTERRUPTS
433                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
434                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
435                            (void *)(addr_t)info->rip);
436 #endif
437
438                 info->intr_core_state.irq_pending = 1;
439                 info->intr_core_state.irq_vector = irq;
440                 
441                 break;
442             }
443             case V3_NMI:
444                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
445                 break;
446             case V3_SOFTWARE_INTR: {
447 #ifdef CONFIG_DEBUG_INTERRUPTS
448             PrintDebug("Caught an injected software interrupt\n");
449             PrintDebug("\ttype: %d, vector: %d\n", SVM_INJECTION_SOFT_INTR, info->intr_core_state.swintr_vector);
450 #endif
451             guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
452             guest_ctrl->EVENTINJ.vector = info->intr_core_state.swintr_vector;
453             guest_ctrl->EVENTINJ.valid = 1;
454             
455             /* reset the software interrupt state. 
456                 we can do this because we know only one
457                 sw int can be posted at a time on a given 
458                 core, unlike irqs */
459             info->intr_core_state.swintr_posted = 0;
460             info->intr_core_state.swintr_vector = 0;
461             break;
462         }
463             case V3_VIRTUAL_IRQ:
464                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
465                 break;
466
467             case V3_INVALID_INTR:
468             default:
469                 break;
470         }
471         
472     }
473
474     return 0;
475 }
476
477
478 /* 
479  * CAUTION and DANGER!!! 
480  * 
481  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
482  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
483  * on its contents will cause things to break. The contents at the time of the exit WILL 
484  * change before the exit handler is executed.
485  */
486 int v3_svm_enter(struct guest_info * info) {
487     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
488     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
489     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
490
491     // Conditionally yield the CPU if the timeslice has expired
492     v3_yield_cond(info);
493
494     // Perform any additional yielding needed for time adjustment
495     v3_adjust_time(info);
496
497     // disable global interrupts for vm state transition
498     v3_clgi();
499
500     // Update timer devices prior to entering VM.
501     v3_update_timers(info);
502
503     // Synchronize the guest state to the VMCB
504     guest_state->cr0 = info->ctrl_regs.cr0;
505     guest_state->cr2 = info->ctrl_regs.cr2;
506     guest_state->cr3 = info->ctrl_regs.cr3;
507     guest_state->cr4 = info->ctrl_regs.cr4;
508     guest_state->dr6 = info->dbg_regs.dr6;
509     guest_state->dr7 = info->dbg_regs.dr7;
510     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
511     guest_state->rflags = info->ctrl_regs.rflags;
512     guest_state->efer = info->ctrl_regs.efer;
513     
514     guest_state->cpl = info->cpl;
515
516     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
517
518     guest_state->rax = info->vm_regs.rax;
519     guest_state->rip = info->rip;
520     guest_state->rsp = info->vm_regs.rsp;
521
522 #ifdef CONFIG_SYMCALL
523     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
524         update_irq_entry_state(info);
525     }
526 #else 
527     update_irq_entry_state(info);
528 #endif
529
530
531     /* ** */
532
533     /*
534       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
535       (void *)(addr_t)info->segments.cs.base, 
536       (void *)(addr_t)info->rip);
537     */
538
539 #ifdef CONFIG_SYMCALL
540     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
541         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
542             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
543         }
544     }
545 #endif
546
547     v3_time_enter_vm(info);
548     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
549
550     //V3_Print("Calling v3_svm_launch\n");
551
552     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
553
554     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
555
556     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
557
558     // Immediate exit from VM time bookkeeping
559     v3_time_exit_vm(info);
560
561     info->num_exits++;
562
563     // Save Guest state from VMCB
564     info->rip = guest_state->rip;
565     info->vm_regs.rsp = guest_state->rsp;
566     info->vm_regs.rax = guest_state->rax;
567
568     info->cpl = guest_state->cpl;
569
570     info->ctrl_regs.cr0 = guest_state->cr0;
571     info->ctrl_regs.cr2 = guest_state->cr2;
572     info->ctrl_regs.cr3 = guest_state->cr3;
573     info->ctrl_regs.cr4 = guest_state->cr4;
574     info->dbg_regs.dr6 = guest_state->dr6;
575     info->dbg_regs.dr7 = guest_state->dr7;
576     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
577     info->ctrl_regs.rflags = guest_state->rflags;
578     info->ctrl_regs.efer = guest_state->efer;
579     
580     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
581     info->cpu_mode = v3_get_vm_cpu_mode(info);
582     info->mem_mode = v3_get_vm_mem_mode(info);
583     /* ** */
584
585
586     // save exit info here
587     exit_code = guest_ctrl->exit_code;
588     exit_info1 = guest_ctrl->exit_info1;
589     exit_info2 = guest_ctrl->exit_info2;
590
591
592 #ifdef CONFIG_SYMCALL
593     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
594         update_irq_exit_state(info);
595     }
596 #else
597     update_irq_exit_state(info);
598 #endif
599
600
601     // reenable global interrupts after vm exit
602     v3_stgi();
603
604  
605     // Conditionally yield the CPU if the timeslice has expired
606     v3_yield_cond(info);
607
608
609
610     if (v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2) != 0) {
611         PrintError("Error in SVM exit handler\n");
612         PrintError("  last exit was %d\n", v3_last_exit);
613         return -1;
614     }
615
616
617     return 0;
618 }
619
620
621 int v3_start_svm_guest(struct guest_info * info) {
622     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
623     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
624
625     PrintDebug("Starting SVM core %u\n", info->cpu_id);
626
627     if (info->cpu_id == 0) {
628         info->core_run_state = CORE_RUNNING;
629         info->vm_info->run_state = VM_RUNNING;
630     } else  { 
631         PrintDebug("SVM core %u: Waiting for core initialization\n", info->cpu_id);
632
633         while (info->core_run_state == CORE_STOPPED) {
634             v3_yield(info);
635             //PrintDebug("SVM core %u: still waiting for INIT\n",info->cpu_id);
636         }
637
638         PrintDebug("SVM core %u initialized\n", info->cpu_id);
639     } 
640
641     PrintDebug("SVM core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
642                info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base), 
643                info->segments.cs.limit, (void *)(info->rip));
644
645
646
647     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p)\n", info->cpu_id, (void *)info->vmm_data);
648     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
649     
650     v3_start_time(info);
651
652     while (1) {
653
654         if (info->vm_info->run_state == VM_STOPPED) {
655             info->core_run_state = CORE_STOPPED;
656             break;
657         }
658         
659         if (v3_svm_enter(info) == -1) {
660             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
661             addr_t host_addr;
662             addr_t linear_addr = 0;
663             
664             info->vm_info->run_state = VM_ERROR;
665             
666             V3_Print("SVM core %u: SVM ERROR!!\n", info->cpu_id); 
667             
668             v3_print_guest_state(info);
669             
670             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->cpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
671             
672             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
673             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
674             
675             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
676             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
677             
678             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
679             
680             if (info->mem_mode == PHYSICAL_MEM) {
681                 v3_gpa_to_hva(info, linear_addr, &host_addr);
682             } else if (info->mem_mode == VIRTUAL_MEM) {
683                 v3_gva_to_hva(info, linear_addr, &host_addr);
684             }
685             
686             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->cpu_id, (void *)host_addr);
687             
688             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->cpu_id, (void *)host_addr);
689             v3_dump_mem((uint8_t *)host_addr, 15);
690             
691             v3_print_stack(info);
692
693             break;
694         }
695
696
697         if (info->vm_info->run_state == VM_STOPPED) {
698             info->core_run_state = CORE_STOPPED;
699             break;
700         }
701
702         
703 /*
704         if ((info->num_exits % 5000) == 0) {
705             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
706         }
707 */
708         
709     }
710
711     // Need to take down the other cores on error... 
712
713     return 0;
714 }
715
716
717
718
719
720 /* Checks machine SVM capability */
721 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
722 int v3_is_svm_capable() {
723     uint_t vm_cr_low = 0, vm_cr_high = 0;
724     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
725
726     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
727   
728     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
729
730     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
731       V3_Print("SVM Not Available\n");
732       return 0;
733     }  else {
734         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
735         
736         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
737         
738         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
739             V3_Print("SVM is available but is disabled.\n");
740             
741             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
742             
743             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
744             
745             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
746                 V3_Print("SVM BIOS Disabled, not unlockable\n");
747             } else {
748                 V3_Print("SVM is locked with a key\n");
749             }
750             return 0;
751
752         } else {
753             V3_Print("SVM is available and  enabled.\n");
754
755             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
756             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
757             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
758             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
759             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
760
761             return 1;
762         }
763     }
764 }
765
766 static int has_svm_nested_paging() {
767     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
768
769     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
770
771     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
772
773     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
774         V3_Print("SVM Nested Paging not supported\n");
775         return 0;
776     } else {
777         V3_Print("SVM Nested Paging supported\n");
778         return 1;
779     }
780 }
781
782
783 void v3_init_svm_cpu(int cpu_id) {
784     reg_ex_t msr;
785     extern v3_cpu_arch_t v3_cpu_types[];
786
787     // Enable SVM on the CPU
788     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
789     msr.e_reg.low |= EFER_MSR_svm_enable;
790     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
791
792     V3_Print("SVM Enabled\n");
793
794     // Setup the host state save area
795     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
796
797     /* 64-BIT-ISSUE */
798     //  msr.e_reg.high = 0;
799     //msr.e_reg.low = (uint_t)host_vmcb;
800     msr.r_reg = host_vmcbs[cpu_id];
801
802     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
803     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
804
805
806     if (has_svm_nested_paging() == 1) {
807         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
808     } else {
809         v3_cpu_types[cpu_id] = V3_SVM_CPU;
810     }
811 }
812
813
814
815 void v3_deinit_svm_cpu(int cpu_id) {
816     reg_ex_t msr;
817     extern v3_cpu_arch_t v3_cpu_types[];
818
819     // reset SVM_VM_HSAVE_PA_MSR
820     // Does setting it to NULL disable??
821     msr.r_reg = 0;
822     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
823
824     // Disable SVM?
825     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
826     msr.e_reg.low &= ~EFER_MSR_svm_enable;
827     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
828
829     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
830
831     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
832
833     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
834     return;
835 }
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886 #if 0
887 /* 
888  * Test VMSAVE/VMLOAD Latency 
889  */
890 #define vmsave ".byte 0x0F,0x01,0xDB ; "
891 #define vmload ".byte 0x0F,0x01,0xDA ; "
892 {
893     uint32_t start_lo, start_hi;
894     uint32_t end_lo, end_hi;
895     uint64_t start, end;
896     
897     __asm__ __volatile__ (
898                           "rdtsc ; "
899                           "movl %%eax, %%esi ; "
900                           "movl %%edx, %%edi ; "
901                           "movq  %%rcx, %%rax ; "
902                           vmsave
903                           "rdtsc ; "
904                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
905                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
906                           );
907     
908     start = start_hi;
909     start <<= 32;
910     start += start_lo;
911     
912     end = end_hi;
913     end <<= 32;
914     end += end_lo;
915     
916     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
917     
918     __asm__ __volatile__ (
919                           "rdtsc ; "
920                           "movl %%eax, %%esi ; "
921                           "movl %%edx, %%edi ; "
922                           "movq  %%rcx, %%rax ; "
923                           vmload
924                           "rdtsc ; "
925                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
926                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
927                               );
928         
929         start = start_hi;
930         start <<= 32;
931         start += start_lo;
932
933         end = end_hi;
934         end <<= 32;
935         end += end_lo;
936
937
938         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
939     }
940     /* End Latency Test */
941
942 #endif
943
944
945
946
947
948
949
950 #if 0
951 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
952   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
953   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
954   uint_t i = 0;
955
956
957   guest_state->rsp = vm_info.vm_regs.rsp;
958   guest_state->rip = vm_info.rip;
959
960
961   /* I pretty much just gutted this from TVMM */
962   /* Note: That means its probably wrong */
963
964   // set the segment registers to mirror ours
965   guest_state->cs.selector = 1<<3;
966   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
967   guest_state->cs.attrib.fields.S = 1;
968   guest_state->cs.attrib.fields.P = 1;
969   guest_state->cs.attrib.fields.db = 1;
970   guest_state->cs.attrib.fields.G = 1;
971   guest_state->cs.limit = 0xfffff;
972   guest_state->cs.base = 0;
973   
974   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
975   for ( i = 0; segregs[i] != NULL; i++) {
976     struct vmcb_selector * seg = segregs[i];
977     
978     seg->selector = 2<<3;
979     seg->attrib.fields.type = 0x2; // Data Segment+read/write
980     seg->attrib.fields.S = 1;
981     seg->attrib.fields.P = 1;
982     seg->attrib.fields.db = 1;
983     seg->attrib.fields.G = 1;
984     seg->limit = 0xfffff;
985     seg->base = 0;
986   }
987
988
989   {
990     /* JRL THIS HAS TO GO */
991     
992     //    guest_state->tr.selector = GetTR_Selector();
993     guest_state->tr.attrib.fields.type = 0x9; 
994     guest_state->tr.attrib.fields.P = 1;
995     // guest_state->tr.limit = GetTR_Limit();
996     //guest_state->tr.base = GetTR_Base();// - 0x2000;
997     /* ** */
998   }
999
1000
1001   /* ** */
1002
1003
1004   guest_state->efer |= EFER_MSR_svm_enable;
1005   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1006   ctrl_area->svm_instrs.VMRUN = 1;
1007   guest_state->cr0 = 0x00000001;    // PE 
1008   ctrl_area->guest_ASID = 1;
1009
1010
1011   //  guest_state->cpl = 0;
1012
1013
1014
1015   // Setup exits
1016
1017   ctrl_area->cr_writes.cr4 = 1;
1018   
1019   ctrl_area->exceptions.de = 1;
1020   ctrl_area->exceptions.df = 1;
1021   ctrl_area->exceptions.pf = 1;
1022   ctrl_area->exceptions.ts = 1;
1023   ctrl_area->exceptions.ss = 1;
1024   ctrl_area->exceptions.ac = 1;
1025   ctrl_area->exceptions.mc = 1;
1026   ctrl_area->exceptions.gp = 1;
1027   ctrl_area->exceptions.ud = 1;
1028   ctrl_area->exceptions.np = 1;
1029   ctrl_area->exceptions.of = 1;
1030   ctrl_area->exceptions.nmi = 1;
1031
1032   
1033
1034   ctrl_area->instrs.IOIO_PROT = 1;
1035   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1036   
1037   {
1038     reg_ex_t tmp_reg;
1039     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1040     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1041   }
1042
1043   ctrl_area->instrs.INTR = 1;
1044
1045   
1046   {
1047     char gdt_buf[6];
1048     char idt_buf[6];
1049
1050     memset(gdt_buf, 0, 6);
1051     memset(idt_buf, 0, 6);
1052
1053
1054     uint_t gdt_base, idt_base;
1055     ushort_t gdt_limit, idt_limit;
1056     
1057     GetGDTR(gdt_buf);
1058     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1059     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1060     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1061
1062     GetIDTR(idt_buf);
1063     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1064     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1065     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1066
1067
1068     // gdt_base -= 0x2000;
1069     //idt_base -= 0x2000;
1070
1071     guest_state->gdtr.base = gdt_base;
1072     guest_state->gdtr.limit = gdt_limit;
1073     guest_state->idtr.base = idt_base;
1074     guest_state->idtr.limit = idt_limit;
1075
1076
1077   }
1078   
1079   
1080   // also determine if CPU supports nested paging
1081   /*
1082   if (vm_info.page_tables) {
1083     //   if (0) {
1084     // Flush the TLB on entries/exits
1085     ctrl_area->TLB_CONTROL = 1;
1086
1087     // Enable Nested Paging
1088     ctrl_area->NP_ENABLE = 1;
1089
1090     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1091
1092         // Set the Nested Page Table pointer
1093     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1094
1095
1096     //   ctrl_area->N_CR3 = Get_CR3();
1097     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1098
1099     guest_state->g_pat = 0x7040600070406ULL;
1100
1101     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1102     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1103     // Enable Paging
1104     //    guest_state->cr0 |= 0x80000000;
1105   }
1106   */
1107
1108 }
1109
1110
1111
1112
1113
1114 #endif
1115
1116