From: James Whang Date: Mon, 28 Dec 2015 23:08:43 +0000 (-0600) Subject: Nautilus Host Support (proof of concept) X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=66c338acdc299d7496da982999ff3cd3828e442a Nautilus Host Support (proof of concept) This is a simple proof-of-concept implementation of the Palacios host interface on top of the Nautilusi kernel. It is sufficient to allow us to boot a guest OS running Linux. There are several caveats which can be seen in the comments. --- diff --git a/Kconfig b/Kconfig index 6b2cf2a..ba89fa9 100644 --- a/Kconfig +++ b/Kconfig @@ -50,7 +50,24 @@ config LINUX_KERN config MINIX bool "MINIX 3" help - This enables the necesary options to compile Palacios with Kitten + This enables the necesary options to compile Palacios with Minix + +config NAUTILUS + bool "Nautilus" + select V3_DECODER + select FRAME_POINTER + help + This enables the necessary options to compile Palacios + for inclusion in the Nautilus kernel + +config NAUTILUS_KERN + depends on NAUTILUS + string "Nautilus Kernel Source directory" + default "../nautilus" + help + This is the directory containing the + Nautilus sources + config OTHER_OS bool "Other OS" diff --git a/Makefile b/Makefile index bb0860b..8e51d8f 100644 --- a/Makefile +++ b/Makefile @@ -483,6 +483,13 @@ else DEFAULT_EXTRA_TARGETS= endif +ifdef V3_CONFIG_NAUTILUS +DEFAULT_EXTRA_TARGETS=nautilus +else +DEFAULT_EXTRA_TARGETS= +endif + + # The all: target is the default when no target is given on the # command line. # This allow a user to issue only 'make' to build a kernel including modules @@ -493,8 +500,19 @@ all: palacios $(DEFAULT_EXTRA_TARGETS) ifdef V3_CONFIG_LINUX CFLAGS += -mcmodel=kernel else +ifdef V3_CONFIG_NAUTILUS +CFLAGS += -O2 \ + -fno-omit-frame-pointer \ + -ffreestanding \ + -fno-stack-protector \ + -fno-strict-aliasing \ + -mno-red-zone \ + -mcmodel=large +LDFLAGS += -z max-page-size=0x1000 +else CFLAGS += -fPIC endif +endif ifdef V3_CONFIG_FRAME_POINTER CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) @@ -639,6 +657,11 @@ linux_module/v3vee.ko: linux_module/*.c linux_module/*.h libv3vee.a linux_module: linux_module/v3vee.ko +nautilus/libnautilus.a: nautilus/*.c nautilus/*.h libv3vee.a + cd nautilus/ && make + cp nautilus/libnautilus.a . + +nautilus: nautilus/libnautilus.a palacios.asm: palacios diff --git a/nautilus/console.c b/nautilus/console.c new file mode 100644 index 0000000..f571a83 --- /dev/null +++ b/nautilus/console.c @@ -0,0 +1,140 @@ +/* + Interface to Nautilus screen and keyboard +*/ + +#include +#include +#include +#include + + +#include + +#include +#include + +#include "palacios.h" + +/* + This is a gruesome hack to allow the VM designated by the + host as "the_vm" to do I/O to the standard VGA text mode console +*/ + +extern void *the_vm; + +static void * palacios_tty_open(void * private_data, unsigned int width, unsigned int height) +{ + if (width!=80 || height!=25) { + ERROR("Console is wrong size\n"); + return 0; + } + INFO("Console connected\n"); + return (void*)1; +} + + +static int palacios_tty_cursor_set(void * console, int x, int y) +{ + if (console) { + term_setpos(x,y); + return 0; + } else { + return -1; + } +} + +static int palacios_tty_character_set(void * console, int x, int y, char c, unsigned char style) +{ + if (console) { + term_putc(c,style,x,y); + return 0; + } else { + return -1; + } +} + +static int palacios_tty_scroll(void * console, int lines) +{ + if (console) { + int i; + for (i=0;i // for panic + +#include "palacios-nautilus-mm.h" +#include "palacios-nautilus-mm-test.h" +#include "palacios.h" + +void test_palacios_mm(unsigned num_pages_limit) +{ + uintptr_t some_ptr; + unsigned int i = 0; + unsigned alignment = 4096; // gonna keep this constant for now since palacios only uses 4k pages + num_pages_limit -= 10; + + /* Allocate a gigantic piece of memory at once */ + some_ptr = alloc_palacios_pgs(num_pages_limit, alignment, 0, 0, 0); + if(!some_ptr) { + printk("ERROR IN PALACIOS-MM TEST: returned bogus address when not supposed to\n"); + panic(); + } + free_palacios_pgs(some_ptr, num_pages_limit); + + /* check if free_palacios_pg worked */ + some_ptr = alloc_palacios_pgs(100, alignment, 0, 0, 0); + if(!some_ptr) { + printk("FREE_PALACIOS_PGS DIDN'T WORK\n"); + panic(); + } + + /* Allocate many small pieces of memory consecutively */ + for(i = 0; i < num_pages_limit/100; i++) { + free_palacios_pgs(some_ptr, 100); + some_ptr = alloc_palacios_pgs(100, alignment, 0, 0, 0); + if (!some_ptr) { + printk("ERROR IN PALACIOS-MM TEST: returned bogus address when not supposed to\n"); + panic(); + } + } + + free_palacios_pgs(some_ptr, 100); + + uintptr_t ptrs[num_pages_limit]; + + for(i = 0; i < num_pages_limit/100; i++) { + ptrs[i] = alloc_palacios_pgs(100, alignment, 0, 0, 0); + } + + // first free random pages and then try to allocate them again + free_palacios_pgs(ptrs[0], 100); + free_palacios_pgs(ptrs[3], 100); + free_palacios_pgs(ptrs[4], 100); + + ptrs[0] = alloc_palacios_pgs(100, alignment, 0, 0, 0); + ptrs[3] = alloc_palacios_pgs(100, alignment, 0, 0, 0); + ptrs[4] = alloc_palacios_pgs(100, alignment, 0, 0, 0); + + for(i = 0; i < num_pages_limit/100; i++) { + free_palacios_pgs(ptrs[i], 100); + } + + + // TODO: WRITE MORE TESTS + printk("ALL TESTS PASSED - FREED ALL MEMORY\n"); +} diff --git a/nautilus/palacios-nautilus-mm-test.h b/nautilus/palacios-nautilus-mm-test.h new file mode 100644 index 0000000..89ad4ad --- /dev/null +++ b/nautilus/palacios-nautilus-mm-test.h @@ -0,0 +1 @@ +void test_palacios_mm(unsigned num_pages_limit); diff --git a/nautilus/palacios-nautilus-mm.c b/nautilus/palacios-nautilus-mm.c new file mode 100644 index 0000000..4754922 --- /dev/null +++ b/nautilus/palacios-nautilus-mm.c @@ -0,0 +1,217 @@ +#include "palacios-nautilus-mm.h" +#include "palacios-nautilus-mm-test.h" + +/* + + Page-granularity memory management + + This impedence-matches between Nautilus's singular allocator (malloc/free) + and page-level memory allocation needed in Palacios. It does so via + a last-fit-optimized bitmap allocator that operates over a large pool + allocated from Nautilus at startup. + + Note that this allocation currently ignores NUMA and other constraints + as well as general filter expressions. + +*/ + +static uint64_t get_order(uint64_t n) +{ + uint64_t top_bit_pos; + + top_bit_pos = 63 - __builtin_clz(n); + + return top_bit_pos + !!(n & ~(1< 0); + pool.bitmap = palacios_alloc(bitmap_size); + + if (!pool.bitmap) { + ERROR("Palacios MM: Failed to allocate bitmap\n"); + return -1; + } + // note that this may not be aligned + pool.alloc_base_addr = (u64) palacios_alloc(PAGE_SIZE * num_nk_pages); + + if (!pool.alloc_base_addr) { + ERROR("Palacios MM: FAILED TO ALLOCATE MEMORY\n"); + return -1; + } else { + INFO("Palacios MM: success, alloc_base_addr=%p\n",pool.alloc_base_addr); + } + + // Align our memory to a page boundary + pool.base_addr = (u64) (((uint64_t)pool.alloc_base_addr & (~0xfffULL)) + PALACIOS_PAGE_SIZE); + + INFO("Palacios MM: success, cleaned up base_addr=%p\n",pool.base_addr); + + // We have one fewer pages than requested due to the need to align + // the result of the malloc + pool.num_pages = num_palacios_pages - 1 ; + pool.num_free_pages = num_palacios_pages - 1; + pool.pool_start = 0; + + // do unit test if desired + //test_palacios_mm(num_palacios_pages); + + return 0; + +} + + +int deinit_palacios_nautilus_mm(void) { + // free pages from nk + free((void*)pool.alloc_base_addr); pool.alloc_base_addr = 0; + free((void*)pool.bitmap); pool.bitmap = 0; + + return 0; +} + +static uintptr_t alloc_contig_pgs(u64 num_pages, u32 alignment) +{ + + int step = 1; + int i = 0; + int j = 0; + + if (num_pages > pool.num_free_pages) { + ERROR("ERROR(PALACIOS MM) : NOT ENOUGH MEMORY\n"); + return 0; + } + + //INFO("Allocating %llu pages (align=%lu)\n", num_pages, (unsigned long)alignment); + + if (!pool.bitmap || !pool.base_addr) { + ERROR("ERROR: Attempting to allocate from uninitialized memory pool \n"); + return 0; + } + + if (alignment > 0) { + if (alignment != 4096) { + ERROR("ERROR: cannot handle alignment that is not 4KB\n"); + return 0; + } + step = alignment / 4096; + } + + // scan pages from last search forward + for (i = pool.pool_start; i < (pool.num_pages - num_pages + 1) ; ) { + + for (j = i; j < (i+num_pages); j++) { + if (get_page_bit(j)) { + break; + } + } + + if (j==(i+num_pages)) { + for (j = i; j<(i+num_pages); j++) { + set_page_bit(j); + } + + pool.pool_start = j % pool.num_pages; + + return (void*) (pool.base_addr + (i * 4096)); + + } else { + i = j+1; + } + } + + + // scan from front if we didn't find it + for (i = 0; i < (pool.num_pages - num_pages + 1) ; ) { + + for (j = i; j < (i+num_pages); j++) { + if (get_page_bit(j)) { + break; + } + } + + if (j==(i+num_pages)) { + for (j = i; j<(i+num_pages); j++) { + set_page_bit(j); + } + + pool.pool_start = j % pool.num_pages; + + return (void*)( pool.base_addr + (i * 4096)); + + } else { + i = j+1; + } + } + + + ERROR("Palacios MM: ERROR! Cannot allocate memory...\n"); + ERROR("Palacios MM: Pool has %d pages, trying to allocate %d pages\n", pool.num_pages, num_pages); + + return 0; +} + +uintptr_t alloc_palacios_pgs(u64 num_pages, u32 alignment, int node_id, int (*filter_func)(void *paddr, void *filter_state), void *filter_state) +{ + uintptr_t addr = 0; + addr = alloc_contig_pgs(num_pages, alignment); + return addr; +} + + +void free_palacios_pgs(uintptr_t pg_addr, u64 num_pages) +{ + int pg_idx = ((u64)pg_addr - pool.base_addr) / PALACIOS_PAGE_SIZE; + int i = 0; + for (i = pg_idx; i < pg_idx+num_pages; i++) { + clear_page_bit(i); + } +} + + +void free_palacios_pg(uintptr_t pg_addr) +{ + free_palacios_pgs(pg_addr, 1); +} diff --git a/nautilus/palacios-nautilus-mm.h b/nautilus/palacios-nautilus-mm.h new file mode 100644 index 0000000..8a35683 --- /dev/null +++ b/nautilus/palacios-nautilus-mm.h @@ -0,0 +1,26 @@ +#ifndef _PALACIOS_MM_H +#define _PALACIOS_MM_H + +#include +#include +#include +#include + + + +#include "palacios.h" + +#define PALACIOS_PAGE_SIZE 4096 + +static inline int get_page_bit(int index); +static inline void set_page_bit(int index); +static uintptr_t alloc_contig_pgs(u64 num_pages, u32 alignment); + + +int init_palacios_nautilus_mm(uint64_t memsize); +int deinit_palacios_nautilus_mm(void); +uintptr_t alloc_palacios_pgs(u64 num_pages, u32 alignment, int node_id, int (*filter_func)(void *paddr, void *filter_state), void *filter_state); +void free_palacios_pgs(uintptr_t base_addr, u64 num_pages); +void free_palacios_pg(uintptr_t base_addr); + +#endif diff --git a/nautilus/palacios-stubs.c b/nautilus/palacios-stubs.c new file mode 100644 index 0000000..61bf38f --- /dev/null +++ b/nautilus/palacios-stubs.c @@ -0,0 +1,801 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "palacios.h" +#include "palacios-nautilus-mm.h" +#include "console.h" + + +/* + This is a simple proof-of-concept implementation of the Palacios + host interface on top of Nautilus. It is sufficient to allow + us to boot a guest OS running Linux. A few things to note: + + - Nautilus currently has a grand-unified allocator designed to help + support parallel run-time integration. All of alloc/valloc/page + allocation are built on top of that. See palacios-nautilus-mm.c + for how this works for page allocation. + - For page allocation, constraints, NUMA, and filter expressions are + ignored. + - To make this work, you also need updates on the Nautilus side. + (these will eventually get into the Nautilus repo) + - thread migration is not supported currently + - hooking of host interrupts is not supported currently. + - Palacios can sleep, yield, wakeup, etc, but be aware + that Nautilus threads operate differently than those of + a traditional kernel. + + Usage: + - Do Nautilus regular startup to bring all cores to idle + - From a kernel thread, ideally the init thread on core 0, + do palacios_vmm_init(memory_size_bytes,options) + - Create, launch, etc, VMs using the Palacios v3_* functions + (note that these are NOT wrapped here) + - Console assumes void *the_vm is defined in the host, and it + is whatever a v3_create_vm() returned. This is the VM that + has console access (keyboard and screen). + - After you are done, do a palacios_vmm_deinit(); + +*/ + +// The following can be used to track memory bugs +// zero memory after allocation (now applies to valloc and page alloc as well) +#define ALLOC_ZERO_MEM 1 +// pad allocations by this many bytes on both ends of block (heap only) +#define ALLOC_PAD 0 +#define MAX_THREAD_NAME 32 + + +int run_nk_thread = 0; + +static struct v3_vm_info * irq_to_guest_map[256]; + +static unsigned int cpu_khz=-1; + +static char *print_buffer[NR_CPUS]; + +static void deinit_print_buffers(void) +{ + int i; + + for (i=0;i=0) { + printk(KERN_INFO "palacios (pcore %u vm %s vcore %u): %s", + cpu, + "some_guest", + vcore, + buf); + } else { + printk(KERN_INFO "palacios (pcore %u vm %s): %s", + cpu, + "some_guest", + buf); + } + } else { + printk(KERN_INFO "palacios (pcore %u): %s", + cpu, + buf); + } + + return; +} + + + +/* + * Allocates a contiguous region of pages of the requested size. + * Returns the physical address of the first page in the region. + */ +void *palacios_allocate_pages(int num_pages, unsigned int alignment, int node_id, int (*filter_func)(void *paddr, void *filter_state), void *filter_state) +{ + void * pg_addr = NULL; + + if (num_pages<=0) { + ERROR("ALERT ALERT Attempt to allocate zero or fewer pages (%d pages, alignment %d, node %d, filter_func %p, filter_state %p)\n",num_pages, alignment, node_id, filter_func, filter_state); + return NULL; + } + + pg_addr = (void *)alloc_palacios_pgs(num_pages, alignment, node_id, filter_func, filter_state); + + if (!pg_addr) { + ERROR("ALERT ALERT Page allocation has FAILED Warning (%d pages, alignment %d, node %d, filter_func %p, filter_state %p)\n",num_pages, alignment, node_id, filter_func, filter_state); + return NULL; + } + +#if ALLOC_ZERO_MEM + memset(pg_addr,0,num_pages*4096); +#endif + + // INFO("allocpages: %p (%llu pages) alignment=%u\n", pg_addr, num_pages, alignment); + + return pg_addr; +} + + +/** + * Frees a page previously allocated via palacios_allocate_page(). + * Note that palacios_allocate_page() can allocate multiple pages with + * a single call while palacios_free_page() only frees a single page. + */ + +void palacios_free_pages(void * page_paddr, int num_pages) { + if (!page_paddr) { + ERROR("Ignoring free pages: 0x%p (0x%lx)for %d pages\n", page_paddr, (uintptr_t)page_paddr, num_pages); + return; + } + free_palacios_pgs((uintptr_t)page_paddr, num_pages); + + // INFO("freepages: %p (%llu pages) alignment=%u\n", page_paddr, num_pages); +} + + +void * +palacios_alloc_extended(unsigned int size, unsigned int flags, int node) { + void * addr = NULL; + + if (size==0) { + ERROR("ALERT ALERT attempt to kmalloc zero bytes rejected\n"); + return NULL; + } + + if (node==-1) { + addr = malloc(size+2*ALLOC_PAD); + } else { + // currently no numa-zone specific kmalloc + addr = malloc(size+2*ALLOC_PAD); + } + + if (!addr) { + ERROR("ALERT ALERT kmalloc has FAILED FAILED FAILED\n"); + return NULL; + } + +#if ALLOC_ZERO_MEM + memset(addr,0,size+2*ALLOC_PAD); +#endif + + //INFO("malloc: 0x%p (%llu bytes)\n",addr+ALLOC_PAD,size); + + return addr+ALLOC_PAD; +} + +void * +palacios_valloc(unsigned int size) +{ + void * addr = NULL; + + if (size==0) { + ERROR("ALERT ALERT attempt to vmalloc zero bytes rejected\n"); + return NULL; + } + + // currently no vmalloc + addr = malloc(size); + + if (!addr) { + ERROR("ALERT ALERT vmalloc has FAILED FAILED FAILED\n"); + return NULL; + } + +#if ALLOC_ZERO_MEM + memset(addr,0,size); +#endif + + //INFO("valloc: 0x%p (%llu bytes)\n",addr,size); + + return addr; +} + +void palacios_vfree(void *p) +{ + if (!p) { + ERROR("Ignoring vfree: 0x%p\n",p); + return; + } + // no vfree currently + free(p); + + //INFO("vfree: 0x%p\n",p); +} + +/** + * Allocates 'size' bytes of kernel memory. + * Returns the kernel virtual address of the memory allocated. + */ +void * +palacios_alloc(unsigned int size) +{ + return palacios_alloc_extended(size,0,-1); +} + +/** + * Frees memory that was previously allocated by palacios_alloc(). + */ +void +palacios_free(void *addr) +{ + return; + if (!addr) { + ERROR("Ignoring free : 0x%p\n", addr); + return; + } + // no kfree + free(addr-ALLOC_PAD); + //INFO("free: %p\n",addr-ALLOC_PAD); +} + +/** + * Converts a kernel virtual address to the corresponding physical address. + */ +void * +palacios_vaddr_to_paddr( + void * vaddr +) +{ + return vaddr; // our memory mapping is identity + +} + +/** + * Converts a physical address to the corresponding kernel virtual address. + */ +void * +palacios_paddr_to_vaddr( + void * paddr +) +{ + return paddr; // our memory mapping is identity +} + +/** + * Runs a function on the specified CPU. + */ +void +palacios_xcall( + int cpu_id, + void (*fn)(void *arg), + void * arg +) +{ + + smp_xcall(cpu_id,fn,arg,1); + + return; +} + + + +struct nautilus_thread_arg { + int (*fn)(void * arg); + void *arg; + char name[MAX_THREAD_NAME]; +}; + +static void nautilus_thread_target(void * in, void ** out) +{ + struct nautilus_thread_arg * thread_info = (struct nautilus_thread_arg *)in; + int ret; + + ret = thread_info->fn(thread_info->arg); + + INFO("Palacios Thread (%s) EXITING with return code %d\n", thread_info->name, ret); + + palacios_free(thread_info); +} + +/** + * Creates a kernel thread. + */ +void * +palacios_create_and_start_kernel_thread( + int (*fn) (void * arg), + void * arg, + char * thread_name, + v3_resource_control_t *rctl) +{ + + struct nautilus_thread_arg * thread_info = palacios_alloc(sizeof(struct nautilus_thread_arg)); + nk_thread_id_t tid = 0; + + if (!thread_info) { + ERROR("ALERT ALERT Unable to allocate thread\n"); + return NULL; + } + + thread_info->fn = fn; + thread_info->arg = arg; + strncpy(thread_info->name,thread_name,MAX_THREAD_NAME); + thread_info->name[MAX_THREAD_NAME-1] =0; + + nk_thread_start(nautilus_thread_target, thread_info, 0, 0, 0, &tid, CPU_ANY); + + return tid; +} + + +/** + * Starts a kernel thread on the specified CPU. + */ +void * +palacios_create_thread_on_cpu(int cpu_id, + int (*fn)(void * arg), + void * arg, + char * thread_name, + v3_resource_control_t *rctl) +{ + nk_thread_id_t newtid; + nk_thread_t * newthread = NULL; + struct nautilus_thread_arg * thread_info = palacios_alloc(sizeof(struct nautilus_thread_arg)); + + thread_info->fn = fn; + thread_info->arg = arg; + strncpy(thread_info->name, thread_name, MAX_THREAD_NAME); + thread_info->name[MAX_THREAD_NAME-1] = 0; + + //INFO("CREATING A THREAD ON CPU ID: %d\n", cpu_id); + + if (nk_thread_create(nautilus_thread_target, thread_info, 0, 0, 0, &newtid, cpu_id) < 0) { + ERROR("COULD NOT CREATE THREAD\n"); + return NULL; + } + //INFO("newtid: %lu\n", newtid); + + return newtid; +} + +void +palacios_start_thread(void * th) +{ + nk_thread_run(th); +} + +/* + Convenience wrapper +*/ +void * +palacios_create_and_start_thread_on_cpu(int cpu_id, + int (*fn)(void * arg), + void * arg, + char * thread_name, + v3_resource_control_t *rctl ) +{ + + nk_thread_id_t tid; + + struct nautilus_thread_arg * thread_info = palacios_alloc(sizeof(struct nautilus_thread_arg)); + + if (!thread_info) { + ERROR("ALERT ALERT Unable to allocate thread to start on cpu\n"); + return NULL; + } + + thread_info->fn = fn; + thread_info->arg = arg; + strncpy(thread_info->name,thread_name,MAX_THREAD_NAME); + thread_info->name[MAX_THREAD_NAME-1] =0; + + nk_thread_start(nautilus_thread_target, thread_info, 0, 0, 0,&tid,cpu_id); // + + return tid; +} + + + +/** + * Rebind a kernel thread to the specified CPU + * The thread will be running on target CPU on return + * non-zero return means failure + */ +int +palacios_move_thread_to_cpu(int new_cpu_id, + void * thread_ptr) +{ + + INFO("Moving thread (%p) to cpu %d\n", thread_ptr, new_cpu_id); + ERROR("NOT CURRENTLY SUPPORTED\n"); + return -1; +} + + +/** + * Returns the CPU ID that the caller is running on. + */ +unsigned int +palacios_get_cpu(void) +{ + return my_cpu_id(); +} + +static void +palacios_interrupt_cpu( struct v3_vm_info * vm, + int cpu_id, + int vector) +{ + apic_ipi(per_cpu_get(apic),cpu_id,vector); // find out apic_dev * and cpu to apic id mapping +} + +struct pt_regs; + + +/** + * Dispatches an interrupt to Palacios for handling. + */ +static void +palacios_dispatch_interrupt( int vector, void * dev, struct pt_regs * regs ) { + struct v3_interrupt intr = { + .irq = vector, + .error = 0, //regs->orig_ax, /* TODO fix this */ + .should_ack = 1, + }; + + if (irq_to_guest_map[vector]) { + v3_deliver_irq(irq_to_guest_map[vector], &intr); + } + +} + +/** + * Instructs the kernel to forward the specified IRQ to Palacios. + */ +static int +palacios_hook_interrupt(struct v3_vm_info * vm, + unsigned int vector ) +{ + ERROR("UNSUPPORTED: PALACIOS_HOOK_INTERRUPT\n"); + return -1; +} + + +/** + * Acknowledges an interrupt. + */ +static int +palacios_ack_interrupt( + int vector +) +{ + ERROR("UNSUPPORTED: PALACIOS_ACK_INTERRUPT\n"); + return -1; +} + +/** + * Returns the CPU frequency in kilohertz. + */ +unsigned int +palacios_get_cpu_khz(void) +{ + if (cpu_khz==-1) { + uint32_t cpu = (uint32_t)my_cpu_id(); + + cpu_khz = nk_detect_cpu_freq(cpu); + if (cpu_khz==-1) { + INFO("CANNOT GET THE CPU FREQUENCY. FAKING TO 1000000\n"); + cpu_khz=1000000; + } + } + INFO("Nautilus frequency at %u KHz\n",cpu_khz); + return cpu_khz; +} + +/** + * Yield the CPU so other host OS tasks can run. + * This will return immediately if there is no other thread that is runnable + * And there is no real bound on how long it will yield + */ +void +palacios_yield_cpu(void) +{ + nk_yield(); + return; +} + +/** + * Yield the CPU so other host OS tasks can run. + * Given now immediately if there is no other thread that is runnable + * And there is no real bound on how long it will yield + */ +void palacios_sleep_cpu(unsigned int us) +{ + // sleep not supported on Nautilus + // just yield + nk_yield(); + udelay(us); +} + +void palacios_wakeup_cpu(void *thread) +{ + // threads never go to sleep, so shouldn't happen + ERROR("ERROR ERROR: WAKEUP_CPU CALLED. THREADS ARE NEVER ASLEEP"); + return; +} + +/** + * Allocates a mutex. + * Returns NULL on failure. + */ +void * +palacios_mutex_alloc(void) +{ + spinlock_t *lock = palacios_alloc(sizeof(spinlock_t)); + + if (lock) { + spinlock_init(lock); + } else { + ERROR("ALERT ALERT Unable to allocate lock\n"); + return NULL; + } + + return lock; +} + +void palacios_mutex_init(void *mutex) +{ + spinlock_t *lock = (spinlock_t*)mutex; + + if (lock) { + spinlock_init(lock); + LOCKCHECK_ALLOC(lock); + } + +} + +void palacios_mutex_deinit(void *mutex) +{ + spinlock_t *lock = (spinlock_t*)mutex; + + if (lock) { + spinlock_deinit(lock); + LOCKCHECK_FREE(lock); + } +} + + +/** + * Frees a mutex. + */ +void +palacios_mutex_free(void * mutex) { + palacios_free(mutex); + LOCKCHECK_FREE(mutex); +} + +/** + * Locks a mutex. + */ +void +palacios_mutex_lock(void * mutex, int must_spin) { + LOCKCHECK_LOCK_PRE(mutex); + spin_lock((spinlock_t *)mutex); + LOCKCHECK_LOCK_POST(mutex); +} + + +/** + * Locks a mutex, disabling interrupts on this core + */ +void * +palacios_mutex_lock_irqsave(void * mutex, int must_spin) { + + unsigned long flags; + + LOCKCHECK_LOCK_IRQSAVE_PRE(mutex,flags); + flags = spin_lock_irq_save((spinlock_t *)mutex); + LOCKCHECK_LOCK_IRQSAVE_POST(mutex,flags); + + //INFO("lock irqsave flags=%lu\n",flags); + return (void *)flags; +} + + +/** + * Unlocks a mutex. + */ +void +palacios_mutex_unlock( + void * mutex +) +{ + LOCKCHECK_UNLOCK_PRE(mutex); + spin_unlock((spinlock_t *)mutex); + LOCKCHECK_UNLOCK_POST(mutex); +} + + +/** + * Unlocks a mutex and restores previous interrupt state on this core + */ +void +palacios_mutex_unlock_irqrestore(void *mutex, void *flags) +{ + //INFO("unlock irqrestore flags=%lu\n",(unsigned long)flags); + LOCKCHECK_UNLOCK_IRQRESTORE_PRE(mutex,(unsigned long)flags); + // This is correct, flags is opaque + spin_unlock_irq_restore((spinlock_t *)mutex,(uint8_t) (unsigned long)flags); + LOCKCHECK_UNLOCK_IRQRESTORE_POST(mutex,(unsigned long)flags); +} + + +/** + * Structure used by the Palacios hypervisor to interface with the host kernel. + */ +static struct v3_os_hooks palacios_os_hooks = { + .print = palacios_print_scoped, + .allocate_pages = palacios_allocate_pages, + .free_pages = palacios_free_pages, + .vmalloc = palacios_valloc, + .vfree = palacios_vfree, + .malloc = palacios_alloc, + .free = palacios_free, + .vaddr_to_paddr = palacios_vaddr_to_paddr, + .paddr_to_vaddr = palacios_paddr_to_vaddr, + .hook_interrupt = palacios_hook_interrupt, + .ack_irq = palacios_ack_interrupt, + .get_cpu_khz = palacios_get_cpu_khz, + .start_kernel_thread = palacios_create_and_start_kernel_thread, + .yield_cpu = palacios_yield_cpu, + .sleep_cpu = palacios_sleep_cpu, + .wakeup_cpu = palacios_wakeup_cpu, + .mutex_alloc = palacios_mutex_alloc, + .mutex_free = palacios_mutex_free, + .mutex_lock = palacios_mutex_lock, + .mutex_unlock = palacios_mutex_unlock, + .mutex_lock_irqsave = palacios_mutex_lock_irqsave, + .mutex_unlock_irqrestore = palacios_mutex_unlock_irqrestore, + .get_cpu = palacios_get_cpu, + .interrupt_cpu = palacios_interrupt_cpu, + .call_on_cpu = palacios_xcall, + .create_thread_on_cpu = palacios_create_thread_on_cpu, + .start_thread = palacios_start_thread, + .move_thread_to_cpu = palacios_move_thread_to_cpu, // unsupported +}; + + + + +int palacios_vmm_init(uint64_t memsize, char * options) +{ + int num_cpus = nautilus_info.sys.num_cpus; + char * cpu_mask = NULL; + + if (num_cpus > 0) { + int major = 0; + int minor = 0; + int i = 0; + + cpu_mask = palacios_alloc((num_cpus / 8) + 1); + + if (!cpu_mask) { + ERROR("Cannot allocate cpu mask\n"); + return -1; + } + + memset(cpu_mask, 0, (num_cpus / 8) + 1); + + for (i = 0; i < num_cpus; i++) { + + major = i / 8; + minor = i % 8; + + *(cpu_mask + major) |= (0x1 << minor); + } + } + + INFO("calling palacios-mm init\n"); + if (init_palacios_nautilus_mm(memsize)) { + ERROR("Failted to initialize memory management\n"); + return -1; + } + INFO("palacios-mm init done\n"); + + memset(irq_to_guest_map, 0, sizeof(struct v3_vm_info *) * 256); + + if (init_print_buffers()) { + INFO("Cannot initialize print buffers\n"); + palacios_free(cpu_mask); + return -1; + } + + INFO("printbuffer init done\n"); + + //palacios_print_scoped(0, 0, "Hi%llu\n", 134217728); + + INFO("NR_CPU: %d\n", NR_CPUS); + + INFO("palacios_init starting - calling init_v3\n"); + + INFO("calling init_v3 = %p\n", Init_V3); + + INFO("num_cpus: %d\ncpu_mask: %x\noptions: %s\n", num_cpus, *cpu_mask, options); + + Init_V3(&palacios_os_hooks, cpu_mask, num_cpus, options); + + INFO("init_v3 done\n"); + +#ifdef V3_CONFIG_CONSOLE + INFO("Initializing console\n"); + nautilus_console_init(); +#endif + + + return 0; + +} + + +int palacios_vmm_exit( void ) +{ + +#ifdef V3_CONFIG_CONSOLE + nautilus_console_deinit(); +#endif + + Shutdown_V3(); + + INFO("palacios shutdown complete\n"); + + deinit_print_buffers(); + + deinit_palacios_nautilus_mm(); // free memory from the allocator + + return 0; +} diff --git a/nautilus/palacios.h b/nautilus/palacios.h new file mode 100644 index 0000000..d7c9687 --- /dev/null +++ b/nautilus/palacios.h @@ -0,0 +1,105 @@ +#ifndef _PALACIOS_H +#define _PALACIOS_H + +typedef uint8_t u8; +typedef uint32_t u32; +typedef uint64_t u64; +typedef unsigned int *uintptr_t; +typedef int *intptr_t; + +#define LOCKCHECK_ALLOC(x) +#define LOCKCHECK_LOCK_PRE(x) +#define LOCKCHECK_LOCK_POST(x) +#define LOCKCHECK_LOCK_IRQSAVE_PRE(x,y) +#define LOCKCHECK_LOCK_IRQSAVE_POST(x,y) +#define LOCKCHECK_UNLOCK_PRE(x) +#define LOCKCHECK_UNLOCK_POST(x) +#define LOCKCHECK_UNLOCK_IRQRESTORE_PRE(x,y) +#define LOCKCHECK_UNLOCK_IRQRESTORE_POST(x,y) +#define LOCKCHECK_FREE(x) + + +#define NR_CPUS 64 + + + + +int palacios_vmm_init( uint64_t memsize, char * options ); +int palacios_vmm_exit( void ); + + +struct v3_resource_control; + +// Selected exported stubs, for use in other palacios components, like vnet +// The idea is that everything uses the same stubs +void palacios_print_scoped(void *vm, int vcore, const char *fmt, ...); +#define palacios_print(...) palacios_print_scoped(0,-1, __VA_ARGS__) +// node_id=-1 => no node constraint +void *palacios_allocate_pages(int num_pages, unsigned int alignment, int node_id, int (*filter_func)(void *paddr, void *filter_state), void *filter_state); +void palacios_free_pages(void *page_addr, int num_pages); +void *palacios_alloc(unsigned int size); +// node_id=-1 => no node constraint +void *palacios_alloc_extended(unsigned int size, unsigned int flags, int node_id); +void palacios_free(void *); +void *palacios_valloc(unsigned int size); // use instead of vmalloc +void palacios_vfree(void *); // use instead of vfree +void *palacios_vaddr_to_paddr(void *vaddr); +void *palacios_paddr_to_vaddr(void *paddr); +void palacios_xcall(int cpu_id, void (*fn)(void *arg), void *arg); +void *palacios_create_and_start_kernel_thread(int (*fn)(void * arg), void *arg, char *thread_name, struct v3_resource_control *rctl); +void *palacios_create_thread_on_cpu(int cpu_id, int (*fn)(void * arg), void *arg, char *thread_name, struct v3_resource_control *rctl); +void palacios_start_thread(void *thread_ptr); +void *palacios_creeate_and_start_thread_on_cpu(int cpu_id, int (*fn)(void * arg), void *arg, char *thread_name, struct v3_resource_control *rctl); +int palacios_move_thread_to_cpu(int new_cpu_id, void *thread_ptr); +void palacios_yield_cpu(void); +void palacios_sleep_cpu(unsigned int us); +unsigned int palacios_get_cpu(void); +unsigned int palacios_get_cpu_khz(void); +void palacios_used_fpu(void); +void palacios_need_fpu(void); +void *palacios_mutex_alloc(void); // allocates and inits a lock +void palacios_mutex_init(void *mutex); // only inits a lock +void palacios_mutex_deinit(void *mutex); // only deinits a lock +void palacios_mutex_free(void *mutex); // deinits and frees a lock +void palacios_mutex_lock(void *mutex, int must_spin); +void palacios_mutex_unlock(void *mutex); +void *palacios_mutex_lock_irqsave(void *mutex, int must_spin); +void palacios_mutex_unlock_irqrestore(void *mutex, void *flags); +// Macros for spin-locks in the module code +// By using these macros, the lock checker will be able +// to see the module code as well as the core VMM +#define palacios_spinlock_init(l) palacios_mutex_init(l) +#define palacios_spinlock_deinit(l) palacios_mutex_deinit(l) +#define palacios_spinlock_lock(l) palacios_mutex_lock(l,0) +#define palacios_spinlock_unlock(l) palacios_mutex_unlock(l) +#define palacios_spinlock_lock_irqsave(l,f) do { f=(unsigned long)palacios_mutex_lock_irqsave(l,0); } while (0) +#define palacios_spinlock_unlock_irqrestore(l,f) palacios_mutex_unlock_irqrestore(l,(void*)f) + + +// Palacios Printing Support + +// These macros affect how palacios_print will generate output +// Turn this on for unprefaced output from palacios_print +#define V3_PRINTK_OLD_STYLE_OUTPUT 0 +// Maximum length output from palacios_print +#define V3_PRINTK_BUF_SIZE 1024 +// Turn this on to check if new-style output for palacios_print contains only 7-bit chars +#define V3_PRINTK_CHECK_7BIT 1 + +// +// The following macros are for printing in the linux module itself, even before +// Palacios is initialized and after it it deinitialized +// All printk's in linux_module use these macros, for easier control +#define KERN_ERR "" +#define KERN_WARNING "" +#define KERN_NOTICE "" +#define KERN_INFO "" +#define KERN_DEBUG "" +#define ERROR(fmt, args...) printk((KERN_ERR "palacios (pcore %u) %s(%d): " fmt), palacios_get_cpu(), __FILE__, __LINE__, ##args) +#define WARNING(fmt, args...) printk((KERN_WARNING "palacios (pcore %u): " fmt), palacios_get_cpu(), ##args) +#define NOTICE(fmt, args...) printk((KERN_NOTICE "palacios (pcore %u): " fmt), palacios_get_cpu(), ##args) +#define INFO(fmt, args...) printk((KERN_INFO "palacios (pcore %u): " fmt), palacios_get_cpu(), ##args) +#define DEBUG(fmt, args...) printk((KERN_DEBUG "palacios (pcore %u): " fmt), palacios_get_cpu(), ##args) + + +#endif