Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


additions for syscall hijacking
[palacios.git] / palacios / src / palacios / svm.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20
21 #include <palacios/svm.h>
22 #include <palacios/vmm.h>
23
24 #include <palacios/vmcb.h>
25 #include <palacios/vmm_mem.h>
26 #include <palacios/vmm_paging.h>
27 #include <palacios/svm_handler.h>
28
29 #include <palacios/vmm_debug.h>
30 #include <palacios/vm_guest_mem.h>
31
32 #include <palacios/vmm_decoder.h>
33 #include <palacios/vmm_string.h>
34 #include <palacios/vmm_lowlevel.h>
35 #include <palacios/svm_msr.h>
36
37 #include <palacios/vmm_rbtree.h>
38
39 #include <palacios/vmm_direct_paging.h>
40
41 #include <palacios/vmm_ctrl_regs.h>
42 #include <palacios/svm_io.h>
43
44 #include <palacios/vmm_sprintf.h>
45
46
47 #ifndef CONFIG_DEBUG_SVM
48 #undef PrintDebug
49 #define PrintDebug(fmt, args...)
50 #endif
51
52
53 uint32_t v3_last_exit;
54
55 // This is a global pointer to the host's VMCB
56 static addr_t host_vmcbs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
57
58
59
60 extern void v3_stgi();
61 extern void v3_clgi();
62 //extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, uint64_t * fs, uint64_t * gs);
63 extern int v3_svm_launch(vmcb_t * vmcb, struct v3_gprs * vm_regs, vmcb_t * host_vmcb);
64
65
66 static vmcb_t * Allocate_VMCB() {
67     vmcb_t * vmcb_page = NULL;
68     addr_t vmcb_pa = (addr_t)V3_AllocPages(1);
69
70     if ((void *)vmcb_pa == NULL) {
71         PrintError("Error allocating VMCB\n");
72         return NULL;
73     }
74
75     vmcb_page = (vmcb_t *)V3_VAddr((void *)vmcb_pa);
76
77     memset(vmcb_page, 0, 4096);
78
79     return vmcb_page;
80 }
81
82
83
84 static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
85     vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
86     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
87     uint_t i;
88
89
90     //
91     ctrl_area->svm_instrs.VMRUN = 1;
92     ctrl_area->svm_instrs.VMMCALL = 1;
93     ctrl_area->svm_instrs.VMLOAD = 1;
94     ctrl_area->svm_instrs.VMSAVE = 1;
95     ctrl_area->svm_instrs.STGI = 1;
96     ctrl_area->svm_instrs.CLGI = 1;
97     ctrl_area->svm_instrs.SKINIT = 1;
98     ctrl_area->svm_instrs.ICEBP = 1;
99     ctrl_area->svm_instrs.WBINVD = 1;
100     ctrl_area->svm_instrs.MONITOR = 1;
101     ctrl_area->svm_instrs.MWAIT_always = 1;
102     ctrl_area->svm_instrs.MWAIT_if_armed = 1;
103     ctrl_area->instrs.INVLPGA = 1;
104     ctrl_area->instrs.CPUID = 1;
105
106     ctrl_area->instrs.HLT = 1;
107
108 #ifdef CONFIG_TIME_VIRTUALIZE_TSC
109     ctrl_area->instrs.RDTSC = 1;
110     ctrl_area->svm_instrs.RDTSCP = 1;
111 #endif
112
113     // guest_state->cr0 = 0x00000001;    // PE 
114   
115     /*
116       ctrl_area->exceptions.de = 1;
117       ctrl_area->exceptions.df = 1;
118       
119       ctrl_area->exceptions.ts = 1;
120       ctrl_area->exceptions.ss = 1;
121       ctrl_area->exceptions.ac = 1;
122       ctrl_area->exceptions.mc = 1;
123       ctrl_area->exceptions.gp = 1;
124       ctrl_area->exceptions.ud = 1;
125       ctrl_area->exceptions.np = 1;
126       ctrl_area->exceptions.of = 1;
127       
128       ctrl_area->exceptions.nmi = 1;
129     */
130     
131
132     ctrl_area->instrs.NMI = 1;
133     ctrl_area->instrs.SMI = 0; // allow SMIs to run in guest
134     ctrl_area->instrs.INIT = 1;
135     ctrl_area->instrs.PAUSE = 1;
136     ctrl_area->instrs.shutdown_evts = 1;
137
138     /* KCH: intercept writes to IDTR and SW Interrupts (INT) */
139 #ifdef CONFIG_SYSCALL_HIJACK
140     ctrl_area->instrs.WR_IDTR = 0;
141     ctrl_area->instrs.INTn = 1;
142 #endif
143
144
145     /* DEBUG FOR RETURN CODE */
146     ctrl_area->exit_code = 1;
147
148
149     /* Setup Guest Machine state */
150
151     core->vm_regs.rsp = 0x00;
152     core->rip = 0xfff0;
153
154     core->vm_regs.rdx = 0x00000f00;
155
156
157     core->cpl = 0;
158
159     core->ctrl_regs.rflags = 0x00000002; // The reserved bit is always 1
160     core->ctrl_regs.cr0 = 0x60010010; // Set the WP flag so the memory hooks work in real-mode
161     core->ctrl_regs.efer |= EFER_MSR_svm_enable;
162
163
164
165
166
167     core->segments.cs.selector = 0xf000;
168     core->segments.cs.limit = 0xffff;
169     core->segments.cs.base = 0x0000000f0000LL;
170
171     // (raw attributes = 0xf3)
172     core->segments.cs.type = 0x3;
173     core->segments.cs.system = 0x1;
174     core->segments.cs.dpl = 0x3;
175     core->segments.cs.present = 1;
176
177
178
179     struct v3_segment * segregs [] = {&(core->segments.ss), &(core->segments.ds), 
180                                       &(core->segments.es), &(core->segments.fs), 
181                                       &(core->segments.gs), NULL};
182
183     for ( i = 0; segregs[i] != NULL; i++) {
184         struct v3_segment * seg = segregs[i];
185         
186         seg->selector = 0x0000;
187         //    seg->base = seg->selector << 4;
188         seg->base = 0x00000000;
189         seg->limit = ~0u;
190
191         // (raw attributes = 0xf3)
192         seg->type = 0x3;
193         seg->system = 0x1;
194         seg->dpl = 0x3;
195         seg->present = 1;
196     }
197
198     core->segments.gdtr.limit = 0x0000ffff;
199     core->segments.gdtr.base = 0x0000000000000000LL;
200     core->segments.idtr.limit = 0x0000ffff;
201     core->segments.idtr.base = 0x0000000000000000LL;
202
203     core->segments.ldtr.selector = 0x0000;
204     core->segments.ldtr.limit = 0x0000ffff;
205     core->segments.ldtr.base = 0x0000000000000000LL;
206     core->segments.tr.selector = 0x0000;
207     core->segments.tr.limit = 0x0000ffff;
208     core->segments.tr.base = 0x0000000000000000LL;
209
210
211     core->dbg_regs.dr6 = 0x00000000ffff0ff0LL;
212     core->dbg_regs.dr7 = 0x0000000000000400LL;
213
214
215     ctrl_area->IOPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->io_map.arch_data);
216     ctrl_area->instrs.IOIO_PROT = 1;
217             
218     ctrl_area->MSRPM_BASE_PA = (addr_t)V3_PAddr(core->vm_info->msr_map.arch_data);
219     ctrl_area->instrs.MSR_PROT = 1;   
220
221
222     PrintDebug("Exiting on interrupts\n");
223     ctrl_area->guest_ctrl.V_INTR_MASKING = 1;
224     ctrl_area->instrs.INTR = 1;
225
226
227     v3_hook_msr(core->vm_info, EFER_MSR, 
228                 &v3_handle_efer_read,
229                 &v3_handle_efer_write, 
230                 core);
231
232 #ifdef CONFIG_HIJACK_MSR
233     /* KCH: for syscall interposition */
234     v3_hook_msr(core->vm_info, STAR_MSR,
235         &v3_handle_star_read,
236         &v3_handle_star_write,
237         core);
238     v3_hook_msr(core->vm_info, LSTAR_MSR,
239         &v3_handle_lstar_read,
240         &v3_handle_lstar_write,
241         core);
242     v3_hook_msr(core->vm_info, CSTAR_MSR,
243         &v3_handle_cstar_read,
244         &v3_handle_cstar_write,
245         core);
246 #endif
247
248     if (core->shdw_pg_mode == SHADOW_PAGING) {
249         PrintDebug("Creating initial shadow page table\n");
250         
251         /* JRL: This is a performance killer, and a simplistic solution */
252         /* We need to fix this */
253         ctrl_area->TLB_CONTROL = 1;
254         ctrl_area->guest_ASID = 1;
255         
256         
257         if (v3_init_passthrough_pts(core) == -1) {
258             PrintError("Could not initialize passthrough page tables\n");
259             return ;
260         }
261
262
263         core->shdw_pg_state.guest_cr0 = 0x0000000000000010LL;
264         PrintDebug("Created\n");
265         
266         core->ctrl_regs.cr0 |= 0x80000000;
267         core->ctrl_regs.cr3 = core->direct_map_pt;
268
269         ctrl_area->cr_reads.cr0 = 1;
270         ctrl_area->cr_writes.cr0 = 1;
271         //ctrl_area->cr_reads.cr4 = 1;
272         ctrl_area->cr_writes.cr4 = 1;
273         ctrl_area->cr_reads.cr3 = 1;
274         ctrl_area->cr_writes.cr3 = 1;
275
276
277
278         ctrl_area->instrs.INVLPG = 1;
279
280         ctrl_area->exceptions.pf = 1;
281
282         guest_state->g_pat = 0x7040600070406ULL;
283
284
285
286     } else if (core->shdw_pg_mode == NESTED_PAGING) {
287         // Flush the TLB on entries/exits
288         ctrl_area->TLB_CONTROL = 1;
289         ctrl_area->guest_ASID = 1;
290
291         // Enable Nested Paging
292         ctrl_area->NP_ENABLE = 1;
293
294         PrintDebug("NP_Enable at 0x%p\n", (void *)&(ctrl_area->NP_ENABLE));
295
296         // Set the Nested Page Table pointer
297         if (v3_init_passthrough_pts(core) == -1) {
298             PrintError("Could not initialize Nested page tables\n");
299             return ;
300         }
301
302         ctrl_area->N_CR3 = core->direct_map_pt;
303
304         guest_state->g_pat = 0x7040600070406ULL;
305     }
306     
307     /* tell the guest that we don't support SVM */
308     v3_hook_msr(core->vm_info, SVM_VM_CR_MSR, 
309         &v3_handle_vm_cr_read,
310         &v3_handle_vm_cr_write, 
311         core);
312 }
313
314
315 int v3_init_svm_vmcb(struct guest_info * core, v3_vm_class_t vm_class) {
316
317     PrintDebug("Allocating VMCB\n");
318     core->vmm_data = (void *)Allocate_VMCB();
319     
320     if (core->vmm_data == NULL) {
321         PrintError("Could not allocate VMCB, Exiting...\n");
322         return -1;
323     }
324
325     if (vm_class == V3_PC_VM) {
326         PrintDebug("Initializing VMCB (addr=%p)\n", (void *)core->vmm_data);
327         Init_VMCB_BIOS((vmcb_t*)(core->vmm_data), core);
328     } else {
329         PrintError("Invalid VM class\n");
330         return -1;
331     }
332
333     return 0;
334 }
335
336
337 int v3_deinit_svm_vmcb(struct guest_info * core) {
338     V3_FreePages(V3_PAddr(core->vmm_data), 1);
339     return 0;
340 }
341
342
343 static int update_irq_exit_state(struct guest_info * info) {
344     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
345
346     // Fix for QEMU bug using EVENTINJ as an internal cache
347     guest_ctrl->EVENTINJ.valid = 0;
348
349     if ((info->intr_core_state.irq_pending == 1) && (guest_ctrl->guest_ctrl.V_IRQ == 0)) {
350         
351 #ifdef CONFIG_DEBUG_INTERRUPTS
352         PrintDebug("INTAK cycle completed for irq %d\n", info->intr_core_state.irq_vector);
353 #endif
354
355         info->intr_core_state.irq_started = 1;
356         info->intr_core_state.irq_pending = 0;
357
358         v3_injecting_intr(info, info->intr_core_state.irq_vector, V3_EXTERNAL_IRQ);
359     }
360
361     if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 0)) {
362 #ifdef CONFIG_DEBUG_INTERRUPTS
363         PrintDebug("Interrupt %d taken by guest\n", info->intr_core_state.irq_vector);
364 #endif
365
366         // Interrupt was taken fully vectored
367         info->intr_core_state.irq_started = 0;
368
369     } else if ((info->intr_core_state.irq_started == 1) && (guest_ctrl->exit_int_info.valid == 1)) {
370 #ifdef CONFIG_DEBUG_INTERRUPTS
371         PrintDebug("EXIT INT INFO is set (vec=%d)\n", guest_ctrl->exit_int_info.vector);
372 #endif
373     }
374
375     return 0;
376 }
377
378
379 static int update_irq_entry_state(struct guest_info * info) {
380     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
381
382
383     if (info->intr_core_state.irq_pending == 0) {
384         guest_ctrl->guest_ctrl.V_IRQ = 0;
385         guest_ctrl->guest_ctrl.V_INTR_VECTOR = 0;
386     }
387     
388     if (v3_excp_pending(info)) {
389         uint_t excp = v3_get_excp_number(info);
390         
391         guest_ctrl->EVENTINJ.type = SVM_INJECTION_EXCEPTION;
392         
393         if (info->excp_state.excp_error_code_valid) {
394             guest_ctrl->EVENTINJ.error_code = info->excp_state.excp_error_code;
395             guest_ctrl->EVENTINJ.ev = 1;
396 #ifdef CONFIG_DEBUG_INTERRUPTS
397             PrintDebug("Injecting exception %d with error code %x\n", excp, guest_ctrl->EVENTINJ.error_code);
398 #endif
399         }
400         
401         guest_ctrl->EVENTINJ.vector = excp;
402         
403         guest_ctrl->EVENTINJ.valid = 1;
404
405 #ifdef CONFIG_DEBUG_INTERRUPTS
406         PrintDebug("<%d> Injecting Exception %d (CR2=%p) (EIP=%p)\n", 
407                    (int)info->num_exits, 
408                    guest_ctrl->EVENTINJ.vector, 
409                    (void *)(addr_t)info->ctrl_regs.cr2,
410                    (void *)(addr_t)info->rip);
411 #endif
412
413         v3_injecting_excp(info, excp);
414     } else if (info->intr_core_state.irq_started == 1) {
415 #ifdef CONFIG_DEBUG_INTERRUPTS
416         PrintDebug("IRQ pending from previous injection\n");
417 #endif
418         guest_ctrl->guest_ctrl.V_IRQ = 1;
419         guest_ctrl->guest_ctrl.V_INTR_VECTOR = info->intr_core_state.irq_vector;
420         guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
421         guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
422
423     } else {
424         switch (v3_intr_pending(info)) {
425             case V3_EXTERNAL_IRQ: {
426                 uint32_t irq = v3_get_intr(info);
427
428                 guest_ctrl->guest_ctrl.V_IRQ = 1;
429                 guest_ctrl->guest_ctrl.V_INTR_VECTOR = irq;
430                 guest_ctrl->guest_ctrl.V_IGN_TPR = 1;
431                 guest_ctrl->guest_ctrl.V_INTR_PRIO = 0xf;
432
433 #ifdef CONFIG_DEBUG_INTERRUPTS
434                 PrintDebug("Injecting Interrupt %d (EIP=%p)\n", 
435                            guest_ctrl->guest_ctrl.V_INTR_VECTOR, 
436                            (void *)(addr_t)info->rip);
437 #endif
438
439                 info->intr_core_state.irq_pending = 1;
440                 info->intr_core_state.irq_vector = irq;
441                 
442                 break;
443             }
444             case V3_NMI:
445                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_NMI;
446                 break;
447             case V3_SOFTWARE_INTR: {
448             PrintDebug("KCH: Caught an injected software interrupt\n");
449             PrintDebug("\ttype: %d, vector: %d\n", SVM_INJECTION_SOFT_INTR, info->intr_core_state.sw_intr_vector);
450             guest_ctrl->EVENTINJ.type = SVM_INJECTION_SOFT_INTR;
451             guest_ctrl->EVENTINJ.vector = info->intr_core_state.sw_intr_vector;
452             guest_ctrl->EVENTINJ.valid = 1;
453             
454             // clear out stuff?
455             info->intr_core_state.sw_intr_pending = 0;
456             info->intr_core_state.sw_intr_vector = 0;
457             break;
458         }
459             case V3_VIRTUAL_IRQ:
460                 guest_ctrl->EVENTINJ.type = SVM_INJECTION_IRQ;
461                 break;
462
463             case V3_INVALID_INTR:
464             default:
465                 break;
466         }
467         
468     }
469
470     return 0;
471 }
472
473
474 /* 
475  * CAUTION and DANGER!!! 
476  * 
477  * The VMCB CANNOT(!!) be accessed outside of the clgi/stgi calls inside this function
478  * When exectuing a symbiotic call, the VMCB WILL be overwritten, so any dependencies 
479  * on its contents will cause things to break. The contents at the time of the exit WILL 
480  * change before the exit handler is executed.
481  */
482 int v3_svm_enter(struct guest_info * info) {
483     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
484     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
485     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
486
487     // Conditionally yield the CPU if the timeslice has expired
488     v3_yield_cond(info);
489
490     // Perform any additional yielding needed for time adjustment
491     v3_adjust_time(info);
492
493     // disable global interrupts for vm state transition
494     v3_clgi();
495
496     // Update timer devices prior to entering VM.
497     v3_update_timers(info);
498
499     // Synchronize the guest state to the VMCB
500     guest_state->cr0 = info->ctrl_regs.cr0;
501     guest_state->cr2 = info->ctrl_regs.cr2;
502     guest_state->cr3 = info->ctrl_regs.cr3;
503     guest_state->cr4 = info->ctrl_regs.cr4;
504     guest_state->dr6 = info->dbg_regs.dr6;
505     guest_state->dr7 = info->dbg_regs.dr7;
506     guest_ctrl->guest_ctrl.V_TPR = info->ctrl_regs.cr8 & 0xff;
507     guest_state->rflags = info->ctrl_regs.rflags;
508     guest_state->efer = info->ctrl_regs.efer;
509     
510     guest_state->cpl = info->cpl;
511
512     v3_set_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
513
514     guest_state->rax = info->vm_regs.rax;
515     guest_state->rip = info->rip;
516     guest_state->rsp = info->vm_regs.rsp;
517
518 #ifdef CONFIG_SYMCALL
519     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
520         update_irq_entry_state(info);
521     }
522 #else 
523     update_irq_entry_state(info);
524 #endif
525
526
527     /* ** */
528
529     /*
530       PrintDebug("SVM Entry to CS=%p  rip=%p...\n", 
531       (void *)(addr_t)info->segments.cs.base, 
532       (void *)(addr_t)info->rip);
533     */
534
535 #ifdef CONFIG_SYMCALL
536     if (info->sym_core_state.symcall_state.sym_call_active == 1) {
537         if (guest_ctrl->guest_ctrl.V_IRQ == 1) {
538             V3_Print("!!! Injecting Interrupt during Sym call !!!\n");
539         }
540     }
541 #endif
542
543     v3_time_enter_vm(info);
544     guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
545
546     //V3_Print("Calling v3_svm_launch\n");
547
548     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[V3_Get_CPU()]);
549
550     //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
551
552     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
553
554     // Immediate exit from VM time bookkeeping
555     v3_time_exit_vm(info);
556
557     info->num_exits++;
558
559     // Save Guest state from VMCB
560     info->rip = guest_state->rip;
561     info->vm_regs.rsp = guest_state->rsp;
562     info->vm_regs.rax = guest_state->rax;
563
564     info->cpl = guest_state->cpl;
565
566     info->ctrl_regs.cr0 = guest_state->cr0;
567     info->ctrl_regs.cr2 = guest_state->cr2;
568     info->ctrl_regs.cr3 = guest_state->cr3;
569     info->ctrl_regs.cr4 = guest_state->cr4;
570     info->dbg_regs.dr6 = guest_state->dr6;
571     info->dbg_regs.dr7 = guest_state->dr7;
572     info->ctrl_regs.cr8 = guest_ctrl->guest_ctrl.V_TPR;
573     info->ctrl_regs.rflags = guest_state->rflags;
574     info->ctrl_regs.efer = guest_state->efer;
575     
576     v3_get_vmcb_segments((vmcb_t*)(info->vmm_data), &(info->segments));
577     info->cpu_mode = v3_get_vm_cpu_mode(info);
578     info->mem_mode = v3_get_vm_mem_mode(info);
579     /* ** */
580
581
582     // save exit info here
583     exit_code = guest_ctrl->exit_code;
584     exit_info1 = guest_ctrl->exit_info1;
585     exit_info2 = guest_ctrl->exit_info2;
586
587
588 #ifdef CONFIG_SYMCALL
589     if (info->sym_core_state.symcall_state.sym_call_active == 0) {
590         update_irq_exit_state(info);
591     }
592 #else
593     update_irq_exit_state(info);
594 #endif
595
596
597     // reenable global interrupts after vm exit
598     v3_stgi();
599
600  
601     // Conditionally yield the CPU if the timeslice has expired
602     v3_yield_cond(info);
603
604
605
606     if (v3_handle_svm_exit(info, exit_code, exit_info1, exit_info2) != 0) {
607         PrintError("Error in SVM exit handler\n");
608         PrintError("  last exit was %d\n", v3_last_exit);
609         return -1;
610     }
611
612
613     return 0;
614 }
615
616
617 int v3_start_svm_guest(struct guest_info * info) {
618     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
619     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
620
621     PrintDebug("Starting SVM core %u\n", info->cpu_id);
622
623     if (info->cpu_id == 0) {
624         info->core_run_state = CORE_RUNNING;
625         info->vm_info->run_state = VM_RUNNING;
626     } else  { 
627         PrintDebug("SVM core %u: Waiting for core initialization\n", info->cpu_id);
628
629         while (info->core_run_state == CORE_STOPPED) {
630             v3_yield(info);
631             //PrintDebug("SVM core %u: still waiting for INIT\n",info->cpu_id);
632         }
633
634         PrintDebug("SVM core %u initialized\n", info->cpu_id);
635     } 
636
637     PrintDebug("SVM core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
638                info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base), 
639                info->segments.cs.limit, (void *)(info->rip));
640
641
642
643     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p)\n", info->cpu_id, (void *)info->vmm_data);
644     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
645     
646     v3_start_time(info);
647
648     while (1) {
649
650         if (info->vm_info->run_state == VM_STOPPED) {
651             info->core_run_state = CORE_STOPPED;
652             break;
653         }
654         
655         if (v3_svm_enter(info) == -1) {
656             vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
657             addr_t host_addr;
658             addr_t linear_addr = 0;
659             
660             info->vm_info->run_state = VM_ERROR;
661             
662             V3_Print("SVM core %u: SVM ERROR!!\n", info->cpu_id); 
663             
664             v3_print_guest_state(info);
665             
666             V3_Print("SVM core %u: SVM Exit Code: %p\n", info->cpu_id, (void *)(addr_t)guest_ctrl->exit_code); 
667             
668             V3_Print("SVM core %u: exit_info1 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info1));
669             V3_Print("SVM core %u: exit_info1 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info1)) + 4));
670             
671             V3_Print("SVM core %u: exit_info2 low = 0x%.8x\n", info->cpu_id, *(uint_t*)&(guest_ctrl->exit_info2));
672             V3_Print("SVM core %u: exit_info2 high = 0x%.8x\n", info->cpu_id, *(uint_t *)(((uchar_t *)&(guest_ctrl->exit_info2)) + 4));
673             
674             linear_addr = get_addr_linear(info, info->rip, &(info->segments.cs));
675             
676             if (info->mem_mode == PHYSICAL_MEM) {
677                 v3_gpa_to_hva(info, linear_addr, &host_addr);
678             } else if (info->mem_mode == VIRTUAL_MEM) {
679                 v3_gva_to_hva(info, linear_addr, &host_addr);
680             }
681             
682             V3_Print("SVM core %u: Host Address of rip = 0x%p\n", info->cpu_id, (void *)host_addr);
683             
684             V3_Print("SVM core %u: Instr (15 bytes) at %p:\n", info->cpu_id, (void *)host_addr);
685             v3_dump_mem((uint8_t *)host_addr, 15);
686             
687             v3_print_stack(info);
688
689             break;
690         }
691
692
693         if (info->vm_info->run_state == VM_STOPPED) {
694             info->core_run_state = CORE_STOPPED;
695             break;
696         }
697
698         
699 /*
700         if ((info->num_exits % 5000) == 0) {
701             V3_Print("SVM Exit number %d\n", (uint32_t)info->num_exits);
702         }
703 */
704         
705     }
706
707     // Need to take down the other cores on error... 
708
709     return 0;
710 }
711
712
713
714
715
716 /* Checks machine SVM capability */
717 /* Implemented from: AMD Arch Manual 3, sect 15.4 */ 
718 int v3_is_svm_capable() {
719     uint_t vm_cr_low = 0, vm_cr_high = 0;
720     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
721
722     v3_cpuid(CPUID_EXT_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
723   
724     PrintDebug("CPUID_EXT_FEATURE_IDS_ecx=0x%x\n", ecx);
725
726     if ((ecx & CPUID_EXT_FEATURE_IDS_ecx_svm_avail) == 0) {
727       V3_Print("SVM Not Available\n");
728       return 0;
729     }  else {
730         v3_get_msr(SVM_VM_CR_MSR, &vm_cr_high, &vm_cr_low);
731         
732         PrintDebug("SVM_VM_CR_MSR = 0x%x 0x%x\n", vm_cr_high, vm_cr_low);
733         
734         if ((vm_cr_low & SVM_VM_CR_MSR_svmdis) == 1) {
735             V3_Print("SVM is available but is disabled.\n");
736             
737             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
738             
739             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
740             
741             if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_svml) == 0) {
742                 V3_Print("SVM BIOS Disabled, not unlockable\n");
743             } else {
744                 V3_Print("SVM is locked with a key\n");
745             }
746             return 0;
747
748         } else {
749             V3_Print("SVM is available and  enabled.\n");
750
751             v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
752             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_eax=0x%x\n", eax);
753             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ebx=0x%x\n", ebx);
754             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_ecx=0x%x\n", ecx);
755             PrintDebug("CPUID_SVM_REV_AND_FEATURE_IDS_edx=0x%x\n", edx);
756
757             return 1;
758         }
759     }
760 }
761
762 static int has_svm_nested_paging() {
763     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
764
765     v3_cpuid(CPUID_SVM_REV_AND_FEATURE_IDS, &eax, &ebx, &ecx, &edx);
766
767     //PrintDebug("CPUID_EXT_FEATURE_IDS_edx=0x%x\n", edx);
768
769     if ((edx & CPUID_SVM_REV_AND_FEATURE_IDS_edx_np) == 0) {
770         V3_Print("SVM Nested Paging not supported\n");
771         return 0;
772     } else {
773         V3_Print("SVM Nested Paging supported\n");
774         return 1;
775     }
776 }
777
778
779 void v3_init_svm_cpu(int cpu_id) {
780     reg_ex_t msr;
781     extern v3_cpu_arch_t v3_cpu_types[];
782
783     // Enable SVM on the CPU
784     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
785     msr.e_reg.low |= EFER_MSR_svm_enable;
786     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
787
788     V3_Print("SVM Enabled\n");
789
790     // Setup the host state save area
791     host_vmcbs[cpu_id] = (addr_t)V3_AllocPages(4);
792
793     /* 64-BIT-ISSUE */
794     //  msr.e_reg.high = 0;
795     //msr.e_reg.low = (uint_t)host_vmcb;
796     msr.r_reg = host_vmcbs[cpu_id];
797
798     PrintDebug("Host State being saved at %p\n", (void *)host_vmcbs[cpu_id]);
799     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
800
801
802     if (has_svm_nested_paging() == 1) {
803         v3_cpu_types[cpu_id] = V3_SVM_REV3_CPU;
804     } else {
805         v3_cpu_types[cpu_id] = V3_SVM_CPU;
806     }
807 }
808
809
810
811 void v3_deinit_svm_cpu(int cpu_id) {
812     reg_ex_t msr;
813     extern v3_cpu_arch_t v3_cpu_types[];
814
815     // reset SVM_VM_HSAVE_PA_MSR
816     // Does setting it to NULL disable??
817     msr.r_reg = 0;
818     v3_set_msr(SVM_VM_HSAVE_PA_MSR, msr.e_reg.high, msr.e_reg.low);
819
820     // Disable SVM?
821     v3_get_msr(EFER_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
822     msr.e_reg.low &= ~EFER_MSR_svm_enable;
823     v3_set_msr(EFER_MSR, 0, msr.e_reg.low);
824
825     v3_cpu_types[cpu_id] = V3_INVALID_CPU;
826
827     V3_FreePages((void *)host_vmcbs[cpu_id], 4);
828
829     V3_Print("Host CPU %d host area freed, and SVM disabled\n", cpu_id);
830     return;
831 }
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882 #if 0
883 /* 
884  * Test VMSAVE/VMLOAD Latency 
885  */
886 #define vmsave ".byte 0x0F,0x01,0xDB ; "
887 #define vmload ".byte 0x0F,0x01,0xDA ; "
888 {
889     uint32_t start_lo, start_hi;
890     uint32_t end_lo, end_hi;
891     uint64_t start, end;
892     
893     __asm__ __volatile__ (
894                           "rdtsc ; "
895                           "movl %%eax, %%esi ; "
896                           "movl %%edx, %%edi ; "
897                           "movq  %%rcx, %%rax ; "
898                           vmsave
899                           "rdtsc ; "
900                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
901                           : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
902                           );
903     
904     start = start_hi;
905     start <<= 32;
906     start += start_lo;
907     
908     end = end_hi;
909     end <<= 32;
910     end += end_lo;
911     
912     PrintDebug("VMSave Cycle Latency: %d\n", (uint32_t)(end - start));
913     
914     __asm__ __volatile__ (
915                           "rdtsc ; "
916                           "movl %%eax, %%esi ; "
917                           "movl %%edx, %%edi ; "
918                           "movq  %%rcx, %%rax ; "
919                           vmload
920                           "rdtsc ; "
921                           : "=D"(start_hi), "=S"(start_lo), "=a"(end_lo),"=d"(end_hi)
922                               : "c"(host_vmcb[cpu_id]), "0"(0), "1"(0), "2"(0), "3"(0)
923                               );
924         
925         start = start_hi;
926         start <<= 32;
927         start += start_lo;
928
929         end = end_hi;
930         end <<= 32;
931         end += end_lo;
932
933
934         PrintDebug("VMLoad Cycle Latency: %d\n", (uint32_t)(end - start));
935     }
936     /* End Latency Test */
937
938 #endif
939
940
941
942
943
944
945
946 #if 0
947 void Init_VMCB_pe(vmcb_t *vmcb, struct guest_info vm_info) {
948   vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb);
949   vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA(vmcb);
950   uint_t i = 0;
951
952
953   guest_state->rsp = vm_info.vm_regs.rsp;
954   guest_state->rip = vm_info.rip;
955
956
957   /* I pretty much just gutted this from TVMM */
958   /* Note: That means its probably wrong */
959
960   // set the segment registers to mirror ours
961   guest_state->cs.selector = 1<<3;
962   guest_state->cs.attrib.fields.type = 0xa; // Code segment+read
963   guest_state->cs.attrib.fields.S = 1;
964   guest_state->cs.attrib.fields.P = 1;
965   guest_state->cs.attrib.fields.db = 1;
966   guest_state->cs.attrib.fields.G = 1;
967   guest_state->cs.limit = 0xfffff;
968   guest_state->cs.base = 0;
969   
970   struct vmcb_selector *segregs [] = {&(guest_state->ss), &(guest_state->ds), &(guest_state->es), &(guest_state->fs), &(guest_state->gs), NULL};
971   for ( i = 0; segregs[i] != NULL; i++) {
972     struct vmcb_selector * seg = segregs[i];
973     
974     seg->selector = 2<<3;
975     seg->attrib.fields.type = 0x2; // Data Segment+read/write
976     seg->attrib.fields.S = 1;
977     seg->attrib.fields.P = 1;
978     seg->attrib.fields.db = 1;
979     seg->attrib.fields.G = 1;
980     seg->limit = 0xfffff;
981     seg->base = 0;
982   }
983
984
985   {
986     /* JRL THIS HAS TO GO */
987     
988     //    guest_state->tr.selector = GetTR_Selector();
989     guest_state->tr.attrib.fields.type = 0x9; 
990     guest_state->tr.attrib.fields.P = 1;
991     // guest_state->tr.limit = GetTR_Limit();
992     //guest_state->tr.base = GetTR_Base();// - 0x2000;
993     /* ** */
994   }
995
996
997   /* ** */
998
999
1000   guest_state->efer |= EFER_MSR_svm_enable;
1001   guest_state->rflags = 0x00000002; // The reserved bit is always 1
1002   ctrl_area->svm_instrs.VMRUN = 1;
1003   guest_state->cr0 = 0x00000001;    // PE 
1004   ctrl_area->guest_ASID = 1;
1005
1006
1007   //  guest_state->cpl = 0;
1008
1009
1010
1011   // Setup exits
1012
1013   ctrl_area->cr_writes.cr4 = 1;
1014   
1015   ctrl_area->exceptions.de = 1;
1016   ctrl_area->exceptions.df = 1;
1017   ctrl_area->exceptions.pf = 1;
1018   ctrl_area->exceptions.ts = 1;
1019   ctrl_area->exceptions.ss = 1;
1020   ctrl_area->exceptions.ac = 1;
1021   ctrl_area->exceptions.mc = 1;
1022   ctrl_area->exceptions.gp = 1;
1023   ctrl_area->exceptions.ud = 1;
1024   ctrl_area->exceptions.np = 1;
1025   ctrl_area->exceptions.of = 1;
1026   ctrl_area->exceptions.nmi = 1;
1027
1028   
1029
1030   ctrl_area->instrs.IOIO_PROT = 1;
1031   ctrl_area->IOPM_BASE_PA = (uint_t)V3_AllocPages(3);
1032   
1033   {
1034     reg_ex_t tmp_reg;
1035     tmp_reg.r_reg = ctrl_area->IOPM_BASE_PA;
1036     memset((void*)(tmp_reg.e_reg.low), 0xffffffff, PAGE_SIZE * 2);
1037   }
1038
1039   ctrl_area->instrs.INTR = 1;
1040
1041   
1042   {
1043     char gdt_buf[6];
1044     char idt_buf[6];
1045
1046     memset(gdt_buf, 0, 6);
1047     memset(idt_buf, 0, 6);
1048
1049
1050     uint_t gdt_base, idt_base;
1051     ushort_t gdt_limit, idt_limit;
1052     
1053     GetGDTR(gdt_buf);
1054     gdt_base = *(ulong_t*)((uchar_t*)gdt_buf + 2) & 0xffffffff;
1055     gdt_limit = *(ushort_t*)(gdt_buf) & 0xffff;
1056     PrintDebug("GDT: base: %x, limit: %x\n", gdt_base, gdt_limit);
1057
1058     GetIDTR(idt_buf);
1059     idt_base = *(ulong_t*)(idt_buf + 2) & 0xffffffff;
1060     idt_limit = *(ushort_t*)(idt_buf) & 0xffff;
1061     PrintDebug("IDT: base: %x, limit: %x\n",idt_base, idt_limit);
1062
1063
1064     // gdt_base -= 0x2000;
1065     //idt_base -= 0x2000;
1066
1067     guest_state->gdtr.base = gdt_base;
1068     guest_state->gdtr.limit = gdt_limit;
1069     guest_state->idtr.base = idt_base;
1070     guest_state->idtr.limit = idt_limit;
1071
1072
1073   }
1074   
1075   
1076   // also determine if CPU supports nested paging
1077   /*
1078   if (vm_info.page_tables) {
1079     //   if (0) {
1080     // Flush the TLB on entries/exits
1081     ctrl_area->TLB_CONTROL = 1;
1082
1083     // Enable Nested Paging
1084     ctrl_area->NP_ENABLE = 1;
1085
1086     PrintDebug("NP_Enable at 0x%x\n", &(ctrl_area->NP_ENABLE));
1087
1088         // Set the Nested Page Table pointer
1089     ctrl_area->N_CR3 |= ((addr_t)vm_info.page_tables & 0xfffff000);
1090
1091
1092     //   ctrl_area->N_CR3 = Get_CR3();
1093     // guest_state->cr3 |= (Get_CR3() & 0xfffff000);
1094
1095     guest_state->g_pat = 0x7040600070406ULL;
1096
1097     PrintDebug("Set Nested CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(ctrl_area->N_CR3)), (uint_t)*((unsigned char *)&(ctrl_area->N_CR3) + 4));
1098     PrintDebug("Set Guest CR3: lo: 0x%x  hi: 0x%x\n", (uint_t)*(&(guest_state->cr3)), (uint_t)*((unsigned char *)&(guest_state->cr3) + 4));
1099     // Enable Paging
1100     //    guest_state->cr0 |= 0x80000000;
1101   }
1102   */
1103
1104 }
1105
1106
1107
1108
1109
1110 #endif
1111
1112