1 #include <nautilus/nautilus.h>
2 #include <nautilus/thread.h>
3 #include <nautilus/printk.h>
4 #include <nautilus/cpu.h>
5 #include <nautilus/mm.h>
6 #include <nautilus/vc.h>
7 #include <nautilus/timer.h>
10 #include <palacios/vmm.h>
17 This is a simple proof-of-concept implementation of the Palacios
18 host interface on top of Nautilus. It is sufficient to allow
19 us to boot a guest OS running Linux. A few things to note:
21 - Nautilus currently has a grand-unified allocator designed to help
22 support parallel run-time integration. All of alloc/valloc/page
23 allocation are built on top of that.
24 - For page allocation, constraints, NUMA, and filter expressions are
26 - thread migration is not supported currently
27 - hooking of host interrupts is not supported currently.
28 - Palacios can sleep, yield, wakeup, etc, but be aware
29 that Nautilus threads operate differently than those of
33 - Do Nautilus regular startup to bring all cores to idle
34 - From a kernel thread, ideally the init thread on core 0,
35 do palacios_vmm_init(memory_size_bytes,options)
36 - You can now use the Palacios v3_* functions, which are
38 - You need to keep the Nautilus VM state in sync with
39 the Palacios VM state. The protocol for this is:
40 1. before doing a VM creation, call
41 palacios_inform_new_vm_pre(name)
42 this will also select the new vm for
43 the creation and going forward
44 then, once v3_create is done, call
45 palacios_inform_new_vm_post(name, vm)
46 2. during execution, whenever you want to
47 manage a different VM, call
48 palacios_inform_select_vm(vm)
50 palacios_inform_select_vm_by_name(name)
51 It is OK to to select repeatedly, etc.
52 3. after doing a VM free, call
53 palacios_inform_free_vm(name)
55 palacios_inform_free_selected_vm()
56 - After you are done, do a palacios_vmm_deinit();
60 // The following can be used to track memory bugs
61 // zero memory after allocation (now applies to valloc and page alloc as well)
62 #define ALLOC_ZERO_MEM 1
63 // pad allocations by this many bytes on both ends of block (heap only)
65 #define MAX_THREAD_NAME 32
67 int run_nk_thread = 0;
69 static struct nk_vm_state vms[NR_VMS];
71 static struct nk_vm_state *selected_vm;
73 static struct v3_vm_info * irq_to_guest_map[256];
75 static unsigned int cpu_khz=-1;
77 static char *print_buffer[NR_CPUS];
79 static void deinit_print_buffers(void)
83 for (i=0;i<NR_CPUS;i++) {
84 if (print_buffer[i]) {
85 palacios_free(print_buffer[i]);
91 static int init_print_buffers(void)
95 memset(print_buffer,0,sizeof(char*)*NR_CPUS);
97 for (i=0;i<NR_CPUS;i++) {
98 print_buffer[i] = palacios_alloc(V3_PRINTK_BUF_SIZE);
99 if (!print_buffer[i]) {
100 ERROR("Cannot allocate print buffer for cpu %d\n",i);
101 deinit_print_buffers();
104 memset(print_buffer[i],0,V3_PRINTK_BUF_SIZE);
115 * Prints a message to the console.
117 void palacios_print_scoped(void * vm, int vcore, const char *fmt, ...)
121 unsigned int cpu = palacios_get_cpu();
122 char *buf = cpu < NR_CPUS ? print_buffer[cpu] : 0;
125 INFO_PRINT("palacios (pcore %u): output skipped - no allocated buffer\n",cpu);
131 vsnprintf(buf,V3_PRINTK_BUF_SIZE, fmt, ap);
136 INFO_PRINT("palacios (pcore %u vm %s vcore %u): %s",
142 INFO_PRINT(KERN_INFO "palacios (pcore %u vm %s): %s",
148 INFO_PRINT(KERN_INFO "palacios (pcore %u): %s",
159 * Allocates a contiguous region of pages of the requested size.
160 * Returns the physical address of the first page in the region.
162 void *palacios_allocate_pages(int num_pages, unsigned int alignment, int node_id, int (*filter_func)(void *paddr, void *filter_state), void *filter_state)
164 void * pg_addr = NULL;
167 ERROR("ALERT ALERT Attempt to allocate zero or fewer pages (%d pages, alignment %d, node %d, filter_func %p, filter_state %p)\n",num_pages, alignment, node_id, filter_func, filter_state);
171 // malloc currently guarantees alignment to the size of
173 pg_addr = (void *)malloc(num_pages*4096);
176 ERROR("ALERT ALERT Page allocation has FAILED Warning (%d pages, alignment %d, node %d, filter_func %p, filter_state %p)\n",num_pages, alignment, node_id, filter_func, filter_state);
180 if ((uint64_t)pg_addr & 0xfff) {
181 ERROR("ALERT ALERT Page allocation has surprise offset\n");
186 memset(pg_addr,0,num_pages*4096);
189 //INFO("allocpages: %p (%llu pages) alignment=%u\n", pg_addr, num_pages, alignment);
196 * Frees a page previously allocated via palacios_allocate_page().
197 * Note that palacios_allocate_page() can allocate multiple pages with
198 * a single call while palacios_free_page() only frees a single page.
201 void palacios_free_pages(void * page_paddr, int num_pages)
203 //INFO("freepages: %p (%llu pages) alignment=%u\n", page_paddr, num_pages);
206 ERROR("Ignoring free pages: 0x%p (0x%lx)for %d pages\n", page_paddr, (uintptr_t)page_paddr, num_pages);
216 palacios_alloc_extended(unsigned int size, unsigned int flags, int node) {
220 ERROR("ALERT ALERT attempt to kmalloc zero bytes rejected\n");
225 addr = malloc(size+2*ALLOC_PAD);
227 // currently no numa-zone specific kmalloc
228 addr = malloc(size+2*ALLOC_PAD);
232 ERROR("ALERT ALERT kmalloc has FAILED FAILED FAILED\n");
237 memset(addr,0,size+2*ALLOC_PAD);
240 //INFO("malloc: 0x%p (%llu bytes)\n",addr+ALLOC_PAD,size);
242 return addr+ALLOC_PAD;
246 palacios_valloc(unsigned int size)
252 ERROR("ALERT ALERT attempt to vmalloc zero bytes rejected\n");
256 // currently no vmalloc
260 ERROR("ALERT ALERT vmalloc has FAILED FAILED FAILED\n");
268 //INFO("valloc: 0x%p (%llu bytes)\n",addr,size);
273 void palacios_vfree(void *p)
275 //INFO("vfree: 0x%p\n",p);
278 ERROR("Ignoring vfree: 0x%p\n",p);
287 * Allocates 'size' bytes of kernel memory.
288 * Returns the kernel virtual address of the memory allocated.
291 palacios_alloc(unsigned int size)
293 return palacios_alloc_extended(size,0,-1);
297 * Frees memory that was previously allocated by palacios_alloc().
300 palacios_free(void *addr)
302 //INFO("free: %p\n",addr-ALLOC_PAD);
305 ERROR("Ignoring free : 0x%p\n", addr);
309 free(addr-ALLOC_PAD);
313 * Converts a kernel virtual address to the corresponding physical address.
316 palacios_vaddr_to_paddr(
320 // our memory mapping is identity
321 // this currently does not include Nautilus PA offsetting
322 // as in Multiverse, but we don't envision running a VM
323 // within an HRT either, so we should be fine
328 * Converts a physical address to the corresponding kernel virtual address.
331 palacios_paddr_to_vaddr(
335 return paddr; // our memory mapping is identity, see v->p comment
339 * Runs a function on the specified CPU.
344 void (*fn)(void *arg),
349 smp_xcall(cpu_id,fn,arg,1);
356 struct nautilus_thread_arg {
357 int (*fn)(void * arg);
359 char name[MAX_THREAD_NAME];
362 static void nautilus_thread_target(void * in, void ** out)
364 struct nautilus_thread_arg * thread_info = (struct nautilus_thread_arg *)in;
367 ret = thread_info->fn(thread_info->arg);
369 INFO("Palacios Thread (%s) EXITING with return code %d\n", thread_info->name, ret);
371 palacios_free(thread_info);
375 * Creates a kernel thread.
378 palacios_create_and_start_kernel_thread(
379 int (*fn) (void * arg),
382 v3_resource_control_t *rctl)
385 struct nautilus_thread_arg * thread_info = palacios_alloc(sizeof(struct nautilus_thread_arg));
386 nk_thread_id_t tid = 0;
389 ERROR("ALERT ALERT Unable to allocate thread\n");
393 thread_info->fn = fn;
394 thread_info->arg = arg;
395 strncpy(thread_info->name,thread_name,MAX_THREAD_NAME);
396 thread_info->name[MAX_THREAD_NAME-1] =0;
398 nk_thread_start(nautilus_thread_target, thread_info, 0, 0, 0, &tid, CPU_ANY);
405 * Starts a kernel thread on the specified CPU.
408 palacios_create_thread_on_cpu(int cpu_id,
409 int (*fn)(void * arg),
412 v3_resource_control_t *rctl)
414 nk_thread_id_t newtid;
415 nk_thread_t * newthread = NULL;
416 struct nautilus_thread_arg * thread_info = palacios_alloc(sizeof(struct nautilus_thread_arg));
418 thread_info->fn = fn;
419 thread_info->arg = arg;
420 strncpy(thread_info->name, thread_name, MAX_THREAD_NAME);
421 thread_info->name[MAX_THREAD_NAME-1] = 0;
423 //INFO("CREATING A THREAD ON CPU ID: %d\n", cpu_id);
425 if (nk_thread_create(nautilus_thread_target, thread_info, 0, 0, 0, &newtid, cpu_id) < 0) {
426 ERROR("COULD NOT CREATE THREAD\n");
429 //INFO("newtid: %lu\n", newtid);
435 palacios_start_thread(void * th)
444 palacios_create_and_start_thread_on_cpu(int cpu_id,
445 int (*fn)(void * arg),
448 v3_resource_control_t *rctl )
453 struct nautilus_thread_arg * thread_info = palacios_alloc(sizeof(struct nautilus_thread_arg));
456 ERROR("ALERT ALERT Unable to allocate thread to start on cpu\n");
460 thread_info->fn = fn;
461 thread_info->arg = arg;
462 strncpy(thread_info->name,thread_name,MAX_THREAD_NAME);
463 thread_info->name[MAX_THREAD_NAME-1] =0;
465 nk_thread_start(nautilus_thread_target, thread_info, 0, 0, 0,&tid,cpu_id); //
473 * Rebind a kernel thread to the specified CPU
474 * The thread will be running on target CPU on return
475 * non-zero return means failure
478 palacios_move_thread_to_cpu(int new_cpu_id,
482 INFO("Moving thread (%p) to cpu %d\n", thread_ptr, new_cpu_id);
483 ERROR("NOT CURRENTLY SUPPORTED\n");
489 * Returns the CPU ID that the caller is running on.
492 palacios_get_cpu(void)
498 palacios_interrupt_cpu( struct v3_vm_info * vm,
502 apic_ipi(per_cpu_get(apic),cpu_id,vector); // find out apic_dev * and cpu to apic id mapping
509 * Dispatches an interrupt to Palacios for handling.
512 palacios_dispatch_interrupt( int vector, void * dev, struct pt_regs * regs ) {
513 struct v3_interrupt intr = {
515 .error = 0, //regs->orig_ax, /* TODO fix this */
519 if (irq_to_guest_map[vector]) {
520 v3_deliver_irq(irq_to_guest_map[vector], &intr);
526 * Instructs the kernel to forward the specified IRQ to Palacios.
529 palacios_hook_interrupt(struct v3_vm_info * vm,
530 unsigned int vector )
532 ERROR("UNSUPPORTED: PALACIOS_HOOK_INTERRUPT\n");
538 * Acknowledges an interrupt.
541 palacios_ack_interrupt(
545 ERROR("UNSUPPORTED: PALACIOS_ACK_INTERRUPT\n");
550 * Returns the CPU frequency in kilohertz.
553 palacios_get_cpu_khz(void)
556 uint32_t cpu = (uint32_t)my_cpu_id();
558 cpu_khz = nk_detect_cpu_freq(cpu);
560 INFO("CANNOT GET THE CPU FREQUENCY. FAKING TO 1000000\n");
564 INFO("Nautilus frequency at %u KHz\n",cpu_khz);
569 * Yield the CPU so other host OS tasks can run.
570 * This will return immediately if there is no other thread that is runnable
571 * And there is no real bound on how long it will yield
574 palacios_yield_cpu(void)
581 * Yield the CPU so other host OS tasks can run.
582 * Given now immediately if there is no other thread that is runnable
583 * And there is no real bound on how long it will yield
585 void palacios_sleep_cpu(unsigned int us)
587 // sleep not supported on Nautilus
593 void palacios_wakeup_cpu(void *thread)
595 // threads never go to sleep, so shouldn't happen
596 ERROR("ERROR ERROR: WAKEUP_CPU CALLED. THREADS ARE NEVER ASLEEP");
602 * Returns NULL on failure.
605 palacios_mutex_alloc(void)
607 spinlock_t *lock = palacios_alloc(sizeof(spinlock_t));
612 ERROR("ALERT ALERT Unable to allocate lock\n");
619 void palacios_mutex_init(void *mutex)
621 spinlock_t *lock = (spinlock_t*)mutex;
625 LOCKCHECK_ALLOC(lock);
630 void palacios_mutex_deinit(void *mutex)
632 spinlock_t *lock = (spinlock_t*)mutex;
635 spinlock_deinit(lock);
636 LOCKCHECK_FREE(lock);
645 palacios_mutex_free(void * mutex) {
646 palacios_free(mutex);
647 LOCKCHECK_FREE(mutex);
654 palacios_mutex_lock(void * mutex, int must_spin) {
655 LOCKCHECK_LOCK_PRE(mutex);
656 spin_lock((spinlock_t *)mutex);
657 LOCKCHECK_LOCK_POST(mutex);
662 * Locks a mutex, disabling interrupts on this core
665 palacios_mutex_lock_irqsave(void * mutex, int must_spin) {
669 LOCKCHECK_LOCK_IRQSAVE_PRE(mutex,flags);
670 flags = spin_lock_irq_save((spinlock_t *)mutex);
671 LOCKCHECK_LOCK_IRQSAVE_POST(mutex,flags);
673 //INFO("lock irqsave flags=%lu\n",flags);
674 return (void *)flags;
682 palacios_mutex_unlock(
686 LOCKCHECK_UNLOCK_PRE(mutex);
687 spin_unlock((spinlock_t *)mutex);
688 LOCKCHECK_UNLOCK_POST(mutex);
693 * Unlocks a mutex and restores previous interrupt state on this core
696 palacios_mutex_unlock_irqrestore(void *mutex, void *flags)
698 //INFO("unlock irqrestore flags=%lu\n",(unsigned long)flags);
699 LOCKCHECK_UNLOCK_IRQRESTORE_PRE(mutex,(unsigned long)flags);
700 // This is correct, flags is opaque
701 spin_unlock_irq_restore((spinlock_t *)mutex,(uint8_t) (unsigned long)flags);
702 LOCKCHECK_UNLOCK_IRQRESTORE_POST(mutex,(unsigned long)flags);
707 * Structure used by the Palacios hypervisor to interface with the host kernel.
709 static struct v3_os_hooks palacios_os_hooks = {
710 .print = palacios_print_scoped,
711 .allocate_pages = palacios_allocate_pages,
712 .free_pages = palacios_free_pages,
713 .vmalloc = palacios_valloc,
714 .vfree = palacios_vfree,
715 .malloc = palacios_alloc,
716 .free = palacios_free,
717 .vaddr_to_paddr = palacios_vaddr_to_paddr,
718 .paddr_to_vaddr = palacios_paddr_to_vaddr,
719 .hook_interrupt = palacios_hook_interrupt,
720 .ack_irq = palacios_ack_interrupt,
721 .get_cpu_khz = palacios_get_cpu_khz,
722 .start_kernel_thread = palacios_create_and_start_kernel_thread,
723 .yield_cpu = palacios_yield_cpu,
724 .sleep_cpu = palacios_sleep_cpu,
725 .wakeup_cpu = palacios_wakeup_cpu,
726 .mutex_alloc = palacios_mutex_alloc,
727 .mutex_free = palacios_mutex_free,
728 .mutex_lock = palacios_mutex_lock,
729 .mutex_unlock = palacios_mutex_unlock,
730 .mutex_lock_irqsave = palacios_mutex_lock_irqsave,
731 .mutex_unlock_irqrestore = palacios_mutex_unlock_irqrestore,
732 .get_cpu = palacios_get_cpu,
733 .interrupt_cpu = palacios_interrupt_cpu,
734 .call_on_cpu = palacios_xcall,
735 .create_thread_on_cpu = palacios_create_thread_on_cpu,
736 .start_thread = palacios_start_thread,
737 .move_thread_to_cpu = palacios_move_thread_to_cpu, // unsupported
741 int palacios_vmm_init(char * options)
743 int num_cpus = nautilus_info.sys.num_cpus;
744 char * cpu_mask = NULL;
751 cpu_mask = palacios_alloc((num_cpus / 8) + 1);
754 ERROR("Cannot allocate cpu mask\n");
758 memset(cpu_mask, 0, (num_cpus / 8) + 1);
760 for (i = 0; i < num_cpus; i++) {
765 *(cpu_mask + major) |= (0x1 << minor);
768 ERROR("Must initialize at least one CPU\n");
773 memset(irq_to_guest_map, 0, sizeof(struct v3_vm_info *) * 256);
775 memset(vms,0,sizeof(vms));
777 if (init_print_buffers()) {
778 INFO("Cannot initialize print buffers\n");
779 palacios_free(cpu_mask);
783 INFO("printbuffer init done\n");
785 INFO("NR_CPU: %d\n", NR_CPUS);
787 INFO("palacios_init starting - calling init_v3\n");
789 INFO("calling init_v3 = %p\n", Init_V3);
791 INFO("num_cpus: %d\ncpu_mask: %x\noptions: %s\n", num_cpus, *cpu_mask, options);
793 Init_V3(&palacios_os_hooks, cpu_mask, num_cpus, options);
795 INFO("init_v3 done\n");
797 #ifdef V3_CONFIG_CONSOLE
798 INFO("Initializing console\n");
799 nautilus_console_init();
802 palacios_free(cpu_mask);
810 int palacios_vmm_exit( void )
813 #ifdef V3_CONFIG_CONSOLE
814 nautilus_console_deinit();
819 INFO("palacios shutdown complete\n");
821 deinit_print_buffers();
827 void palacios_inform_new_vm_pre(char *name)
830 for (i=0;i<NR_VMS;i++) {
831 if (!vms[i].name[0]) {
832 strncpy(vms[i].name,name,MAX_VM_NAME);
833 vms[i].name[MAX_VM_NAME-1]=0;
834 selected_vm = &vms[i];
840 void palacios_inform_new_vm_post(char *name, struct v3_vm_info *vm)
842 struct nk_vm_state *n = palacios_find_vm_by_name(name);
846 INFO("Registered VM %p with name %s, node=%p, selected VM=%p\n",
847 vm, n->name, n, selected_vm);
849 ERROR("Cannot find VM with name \"%s\"\n",name);
853 void palacios_inform_free_vm(char *name)
855 struct nk_vm_state *n = palacios_find_vm_by_name(name);
857 if (n==selected_vm) {
869 void palacios_inform_free_selected_vm()
871 struct nk_vm_state *n = selected_vm;
883 struct nk_vm_state *palacios_find_vm_by_name(char *name)
886 for (i=0;i<NR_VMS;i++) {
887 if (!strncmp(vms[i].name,name,MAX_VM_NAME)) {
894 struct nk_vm_state *palacios_find_vm(struct v3_vm_info *vm)
897 for (i=0;i<NR_VMS;i++) {
898 if (vms[i].vm == vm) {
905 void palacios_select_vm(struct v3_vm_info *vm)
907 struct nk_vm_state *n = palacios_find_vm(vm);
913 void palacios_select_vm_by_name(char *name)
915 struct nk_vm_state *n = palacios_find_vm_by_name(name);
921 struct nk_vm_state *palacios_get_selected_vm()