From: Lei Xia Date: Thu, 18 Aug 2011 03:40:01 +0000 (-0500) Subject: Merge branch 'devel' of palacios@newskysaw.cs.northwestern.edu:/home/palacios/palacio... X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=cf780b903a02efc700d51dbadcc90115f016256c;hp=020108512c01b113a5e8cca88a8facdc5ba5488a;p=palacios.git Merge branch 'devel' of palacios@newskysaw.cs.northwestern.edu:/home/palacios/palacios into devel --- diff --git a/linux_module/iface-console.c b/linux_module/iface-console.c index 02d9761..9e13546 100644 --- a/linux_module/iface-console.c +++ b/linux_module/iface-console.c @@ -140,7 +140,7 @@ console_write(struct file * filp, const char __user * buf, size_t size, loff_t * for (i = 0; i < size; i++) { - if (copy_from_user(&(event.scan_code), buf, 1)) { + if (copy_from_user(&(event.scan_code), buf + i, 1)) { printk("Console Write fault\n"); return -EFAULT; } @@ -217,7 +217,7 @@ static int console_connect(struct v3_guest * guest, unsigned int cmd, spin_lock_irqsave(&(cons->lock), flags); - cons_fd = anon_inode_getfd("v3-cons", &cons_fops, cons, 0); + cons_fd = anon_inode_getfd("v3-cons", &cons_fops, cons, O_RDWR); if (cons_fd < 0) { printk("Error creating console inode\n"); diff --git a/linux_module/iface-keyed-stream-user.h b/linux_module/iface-keyed-stream-user.h new file mode 100644 index 0000000..0ab1738 --- /dev/null +++ b/linux_module/iface-keyed-stream-user.h @@ -0,0 +1,65 @@ +#ifndef _PALACIOS_KEYED_STREAM_USER_H_ +#define _PALACIOS_KEYED_STREAM_USER_H_ + +/* + * Palacios Keyed Stream User Interface + * (c) Clint Sbisa, 2011 + */ + + +// Attach to the VM +#define V3_VM_KSTREAM_USER_CONNECT (11244+1) + +// get size of pending request +// Note that this is not the wrong ioctl - the connect ioctl applies to the VM device +// the following ioctls apply to the FD returned by the connect +#define V3_KSTREAM_REQUEST_SIZE_IOCTL (11244+1) +// get the pending request +#define V3_KSTREAM_REQUEST_PULL_IOCTL (11244+2) +// push a response to the previously pulled request +#define V3_KSTREAM_RESPONSE_PUSH_IOCTL (11244+3) + +#ifdef __KERNEL__ +#define USER __user +#else +#define USER +#endif + + +struct palacios_user_keyed_stream_url { + uint64_t len; + char url[0]; // len describes it +}; + + +// +// This structure is used for both requests (kernel->user) +// and responses (user->kernel) +// +struct palacios_user_keyed_stream_op { + + uint64_t len; // total structure length (all) + + int type; // request or response type +#define PALACIOS_KSTREAM_OPEN 1 // not used +#define PALACIOS_KSTREAM_CLOSE 2 // not used +#define PALACIOS_KSTREAM_OPEN_KEY 3 +#define PALACIOS_KSTREAM_CLOSE_KEY 4 +#define PALACIOS_KSTREAM_WRITE_KEY 5 +#define PALACIOS_KSTREAM_READ_KEY 6 + + sint64_t xfer; // total bytes read or written (request/response) + + void *user_key; // user tag for an open key (response) + + uint64_t buf_len; // buffer len + char buf[0]; // expanded as needed (key or valye) + + // The buffer contains the key or the value +}; + + + + + +#endif diff --git a/linux_module/iface-keyed-stream.c b/linux_module/iface-keyed-stream.c index 792066a..fc0e1b9 100644 --- a/linux_module/iface-keyed-stream.c +++ b/linux_module/iface-keyed-stream.c @@ -1,29 +1,94 @@ +/* + * Palacios keyed stream interface + * + * Plus implementations for mem, file, and user space implementations + * + * (c) Peter Dinda, 2011 (interface, mem + file implementations + recooked user impl) + * (c) Clint Sbisa, 2011 (initial user space implementation on which this is based) + */ + +#include +#include +#include +#include +#include +#include +#include + #include "palacios.h" #include "util-hashtable.h" #include "linux-exts.h" +#include "vm.h" #define sint64_t int64_t #include +#include "iface-keyed-stream-user.h" + /* - Streams are stored in a hash table - The values for this hash table are hash tables associted with - each stream. A keyed stream for a "mem:" stream is - an instance of the structure given here + This is an implementation of the Palacios keyed stream interface + that supports three flavors of streams: + + "mem:" Streams are stored in a hash table + The values for this hash table are hash tables associated with + each stream. + + "file:" Streams are stored in files. Each high-level + open corresponds to a directory, while key corresponds to + a distinct file in that directory. + + "user:" Stream requests are bounced to user space to be + handled there. A rendezvous approach similar to the host + device userland support is used + */ +#define STREAM_GENERIC 0 +#define STREAM_MEM 1 +#define STREAM_FILE 2 +#define STREAM_USER 3 + +/* + All keyed streams and streams indicate their implementation type within the first field + */ +struct generic_keyed_stream { + int stype; +}; + +struct generic_stream { + int stype; +}; + + + + +/**************************************************************************************** + Memory-based implementation ("mem:") +****************************************************************************************/ + #define DEF_NUM_STREAMS 16 #define DEF_NUM_KEYS 128 #define DEF_SIZE 128 +/* + A memory keyed stream is a pointer to the underlying hash table + while a memory stream contains an extensible buffer for the stream + */ +struct mem_keyed_stream { + int stype; + v3_keyed_stream_open_t ot; + struct hashtable *ht; +}; + struct mem_stream { + int stype; char *data; uint32_t size; uint32_t data_max; uint32_t ptr; }; -static struct mem_stream *create_mem_stream(void) +static struct mem_stream *create_mem_stream_internal(uint64_t size) { struct mem_stream *m = kmalloc(sizeof(struct mem_stream),GFP_KERNEL); @@ -31,20 +96,28 @@ static struct mem_stream *create_mem_stream(void) return 0; } - m->data = kmalloc(DEF_SIZE,GFP_KERNEL); + + m->data = vmalloc(size); if (!m->data) { kfree(m); return 0; } - m->size=DEF_SIZE; + m->stype = STREAM_MEM; + m->size=size; m->ptr=0; m->data_max=0; return m; } + +static struct mem_stream *create_mem_stream(void) +{ + return create_mem_stream_internal(DEF_SIZE); +} + static void destroy_mem_stream(struct mem_stream *m) { if (m) { @@ -58,7 +131,7 @@ static void destroy_mem_stream(struct mem_stream *m) static int expand_mem_stream(struct mem_stream *m, uint32_t new_size) { - void *data = kmalloc(new_size,GFP_KERNEL); + void *data = vmalloc(new_size); uint32_t nc; if (!data) { @@ -129,76 +202,128 @@ static inline int hash_comp(addr_t k1, addr_t k2) } -// This stores all the streams -static struct hashtable *streams=0; +// This stores all the memory keyed streams streams +static struct hashtable *mem_streams=0; -static v3_keyed_stream_t open_stream(char *url, - v3_keyed_stream_open_t ot) +static v3_keyed_stream_t open_stream_mem(char *url, + v3_keyed_stream_open_t ot) { + if (strncasecmp(url,"mem:",4)) { - printk("Only in-memory streams are currently supported\n"); + printk("palacios: illegitimate attempt to open memory stream \"%s\"\n",url); return 0; } switch (ot) { case V3_KS_RD_ONLY: - case V3_KS_WR_ONLY: - return (v3_keyed_stream_t) palacios_htable_search(streams,(addr_t)(url+4)); + case V3_KS_WR_ONLY: { + struct mem_keyed_stream *mks = (struct mem_keyed_stream *) palacios_htable_search(mem_streams,(addr_t)(url+4)); + if (mks) { + mks->ot=ot; + } + return (v3_keyed_stream_t) mks; + } break; + case V3_KS_WR_ONLY_CREATE: { - struct hashtable *s = (struct hashtable *) palacios_htable_search(streams,(addr_t)(url+4)); - - if (!s) { - s = palacios_create_htable(DEF_NUM_KEYS,hash_func,hash_comp); - if (!s) { - printk("Cannot allocate in-memory keyed stream %s\n",url); - return 0; - } - if (!palacios_htable_insert(streams,(addr_t)(url+4),(addr_t)s)) { - printk("Cannot insert in-memory keyed stream %s\n",url); - return 0; - } + struct mem_keyed_stream *mks = (struct mem_keyed_stream *) palacios_htable_search(mem_streams,(addr_t)(url+4)); + if (!mks) { + char *mykey; + + mykey = kmalloc(strlen(url+4)+1,GFP_KERNEL); + + if (!mykey) { + printk("palacios: cannot allocate space for new in-memory keyed stream %s\n",url); + return 0; + } + + strcpy(mykey,url+4); + + mks = (struct mem_keyed_stream *) kmalloc(sizeof(struct mem_keyed_stream),GFP_KERNEL); + + if (!mks) { + kfree(mykey); + printk("palacios: cannot allocate in-memory keyed stream %s\n",url); + return 0; + } + + mks->ht = (void*) palacios_create_htable(DEF_NUM_KEYS,hash_func,hash_comp); + if (!mks->ht) { + kfree(mks); + kfree(mykey); + printk("palacios: cannot allocate in-memory keyed stream %s\n",url); + return 0; + } + + + if (!palacios_htable_insert(mem_streams,(addr_t)(mykey),(addr_t)mks)) { + palacios_free_htable(mks->ht,1,1); + kfree(mks); + kfree(mykey); + printk("palacios: cannot insert in-memory keyed stream %s\n",url); + return 0; + } + mks->stype=STREAM_MEM; } - return s; + mks->ot=V3_KS_WR_ONLY; + + return mks; } + break; + default: + printk("palacios: unsupported open type in open_stream_mem\n"); break; } return 0; - + } -static void close_stream(v3_keyed_stream_t stream) + +static void close_stream_mem(v3_keyed_stream_t stream) { // nothing to do return; } -static v3_keyed_stream_key_t open_key(v3_keyed_stream_t stream, - char *key) + +static v3_keyed_stream_key_t open_key_mem(v3_keyed_stream_t stream, + char *key) { - struct hashtable *s = (struct hashtable *) stream; + struct mem_keyed_stream *mks = (struct mem_keyed_stream *) stream; + struct hashtable *s = mks->ht; struct mem_stream *m; m = (struct mem_stream *) palacios_htable_search(s,(addr_t)key); if (!m) { + char *mykey = kmalloc(strlen(key)+1,GFP_KERNEL); + + if (!mykey) { + printk("palacios: cannot allocate copy of key for key %s\n",key); + return 0; + } + + strcpy(mykey,key); + m = create_mem_stream(); if (!m) { - printk("Cannot allocate keyed stream for key %s\n",key); + kfree(mykey); + printk("palacios: cannot allocate mem keyed stream for key %s\n",key); return 0; } - if (!palacios_htable_insert(s,(addr_t)key,(addr_t)m)) { - printk("Cannot insert keyed stream for key %s\n",key); + if (!palacios_htable_insert(s,(addr_t)mykey,(addr_t)m)) { destroy_mem_stream(m); + kfree(mykey); + printk("palacios: cannot insert mem keyed stream for key %s\n",key); return 0; } } @@ -208,24 +333,82 @@ static v3_keyed_stream_key_t open_key(v3_keyed_stream_t stream, } -static void close_key(v3_keyed_stream_t stream, - v3_keyed_stream_key_t key) + +static void preallocate_hint_key_mem(v3_keyed_stream_t stream, + char *key, + uint64_t size) +{ + struct mem_keyed_stream *mks = (struct mem_keyed_stream *) stream; + struct hashtable *s = mks->ht; + + struct mem_stream *m; + + if (mks->ot != V3_KS_WR_ONLY) { + return; + } + + m = (struct mem_stream *) palacios_htable_search(s,(addr_t)key); + + if (!m) { + char *mykey; + + mykey=kmalloc(strlen(key)+1,GFP_KERNEL); + + if (!mykey) { + printk("palacios: cannot allocate key spce for preallocte for key %s\n",key); + return; + } + + strcpy(mykey,key); + + m = create_mem_stream_internal(size); + + if (!m) { + printk("palacios: cannot preallocate mem keyed stream for key %s\n",key); + return; + } + + if (!palacios_htable_insert(s,(addr_t)mykey,(addr_t)m)) { + printk("palacios: cannot insert preallocated mem keyed stream for key %s\n",key); + destroy_mem_stream(m); + return; + } + } else { + if (m->data_max < size) { + if (expand_mem_stream(m,size)) { + printk("palacios: cannot expand key for preallocation for key %s\n",key); + return; + } + } + } + + return; + +} + +static void close_key_mem(v3_keyed_stream_t stream, + v3_keyed_stream_key_t key) { // nothing to do return; } -static sint64_t write_key(v3_keyed_stream_t stream, - v3_keyed_stream_key_t key, - void *buf, - sint64_t len) +static sint64_t write_key_mem(v3_keyed_stream_t stream, + v3_keyed_stream_key_t key, + void *buf, + sint64_t len) { + struct mem_keyed_stream *mks = (struct mem_keyed_stream *) stream; struct mem_stream *m = (struct mem_stream *) key; uint32_t mylen; uint32_t writelen; + if (mks->ot!=V3_KS_WR_ONLY) { + return -1; + } + if (len<0) { - return len; + return -1; } mylen = (uint32_t) len; @@ -233,41 +416,1090 @@ static sint64_t write_key(v3_keyed_stream_t stream, writelen=write_mem_stream(m,buf,mylen); if (writelen!=mylen) { - printk("Failed to write all data for key\n"); + printk("palacios: failed to write all data for key\n"); return -1; } else { return (sint64_t)writelen; } } -static sint64_t read_key(v3_keyed_stream_t stream, - v3_keyed_stream_key_t key, - void *buf, - sint64_t len) +static sint64_t read_key_mem(v3_keyed_stream_t stream, + v3_keyed_stream_key_t key, + void *buf, + sint64_t len) { + struct mem_keyed_stream *mks = (struct mem_keyed_stream *) stream; struct mem_stream *m = (struct mem_stream *) key; uint32_t mylen; uint32_t readlen; + if (mks->ot!=V3_KS_RD_ONLY) { + return -1; + } + if (len<0) { - return len; + return -1; } mylen = (uint32_t) len; - + readlen=read_mem_stream(m,buf,mylen); - + if (readlen!=mylen) { - printk("Failed to read all data for key\n"); + printk("palacios: failed to read all data for key\n"); return -1; } else { return (sint64_t)readlen; } } + + +/*************************************************************************************************** + File-based implementation ("file:") +*************************************************************************************************/ + +/* + A file keyed stream contains the fd of the directory + and a path +*/ + +struct file_keyed_stream { + int stype; + v3_keyed_stream_open_t ot; + char *path; +}; + +struct file_stream { + int stype; + struct file *f; // the opened file +}; + + +static v3_keyed_stream_t open_stream_file(char *url, + v3_keyed_stream_open_t ot) +{ + struct file_keyed_stream *fks; + struct nameidata nd; + + if (strncasecmp(url,"file:",5)) { + printk("palacios: illegitimate attempt to open file stream \"%s\"\n",url); + return 0; + } + + fks = kmalloc(sizeof(struct file_keyed_stream),GFP_KERNEL); + + if (!fks) { + printk("palacios: cannot allocate space for file stream\n"); + return 0; + } + + fks->path = (char*)kmalloc(strlen(url+5)+1,GFP_KERNEL); + + if (!(fks->path)) { + printk("palacios: cannot allocate space for file stream\n"); + kfree(fks); + return 0; + } + + strcpy(fks->path,url+5); + + fks->stype=STREAM_FILE; + + fks->ot= ot==V3_KS_WR_ONLY_CREATE ? V3_KS_WR_ONLY : ot; + + // Does the directory exist, and can we read/write it? + + if (path_lookup(fks->path,LOOKUP_DIRECTORY|LOOKUP_FOLLOW,&nd)) { + + // directory does does not exist. + + if (ot==V3_KS_RD_ONLY || ot==V3_KS_WR_ONLY) { + + // we are not being asked to create it + printk("palacios: attempt to open %s, which does not exist\n",fks->path); + goto fail_out; + + } else { + + // We are being asked to create it + + struct dentry *de; + int err; + + // Find its parent + if (path_lookup(fks->path,LOOKUP_PARENT|LOOKUP_FOLLOW,&nd)) { + printk("palacios: attempt to create %s failed because its parent cannot be looked up\n",fks->path); + goto fail_out; + } + + // Can we write to the parent? + + if (inode_permission(nd.path.dentry->d_inode, MAY_WRITE | MAY_EXEC)) { + printk("palacios: attempt to open %s, which has the wrong permissions for directory creation\n",fks->path); + goto fail_out; + } + + // OK, we can, so let's create it + + de = lookup_create(&nd,1); + + if (IS_ERR(de)) { + printk("palacios: cannot allocate dentry\n"); + goto fail_out; + } + + err = vfs_mkdir(nd.path.dentry->d_inode, de, 0700); + + // lookup_create locks this for us! + + mutex_unlock(&(nd.path.dentry->d_inode->i_mutex)); + + if (err) { + printk("palacios: attempt to create %s failed because mkdir failed\n",fks->path); + goto fail_out; + } + + // now the directory should exist and have reasonable permissions + return (v3_keyed_stream_t) fks; + } + } + + + // we must be in V3_KS_RD_ONLY or V3_KS_WR_ONLY, + // and the directory exists, so we must check the permissions + + if (inode_permission(nd.path.dentry->d_inode, MAY_EXEC | (ot==V3_KS_RD_ONLY ? MAY_READ : MAY_WRITE))) { + printk("palacios: attempt to open %s, which has the wrong permissions\n",fks->path); + goto fail_out; + } else { + return (v3_keyed_stream_t) fks; + } + + + fail_out: + kfree(fks->path); + kfree(fks); + return 0; + +} + +static void close_stream_file(v3_keyed_stream_t stream) +{ + struct file_keyed_stream *fks = (struct file_keyed_stream *) stream; + + kfree(fks->path); + kfree(fks); + +} + +static void preallocate_hint_key_file(v3_keyed_stream_t stream, + char *key, + uint64_t size) +{ + return; +} + +static v3_keyed_stream_key_t open_key_file(v3_keyed_stream_t stream, + char *key) +{ + struct file_keyed_stream *fks = (struct file_keyed_stream *) stream; + struct file_stream *fs; + char *path; + + // the path is the stream's path plus the key name + // file:/home/foo + "regext" => "/home/foo/regext" + path = (char *) kmalloc(strlen(fks->path)+strlen(key)+2,GFP_KERNEL); + if (!path) { + printk("palacios: cannot allocate file keyed stream for key %s\n",key); + return 0; + } + strcpy(path,fks->path); + strcat(path,"/"); + strcat(path,key); + + fs = (struct file_stream *) kmalloc(sizeof(struct file_stream *),GFP_KERNEL); + + if (!fs) { + printk("palacios: cannot allocate file keyed stream for key %s\n",key); + kfree(path); + return 0; + } + + fs->stype=STREAM_FILE; + + fs->f = filp_open(path,O_RDWR|O_CREAT,0600); + + if (IS_ERR(fs->f)) { + printk("palacios: cannot open relevent file \"%s\" for stream \"file:%s\" and key \"%s\"\n",path,fks->path,key); + kfree(fs); + kfree(path); + return 0; + } + + kfree(path); + + return fs; +} + + +static void close_key_file(v3_keyed_stream_t stream, + v3_keyed_stream_key_t key) +{ + struct file_stream *fs = (struct file_stream *) key; + + filp_close(fs->f,NULL); + + kfree(fs); +} + +static sint64_t write_key_file(v3_keyed_stream_t stream, + v3_keyed_stream_key_t key, + void *buf, + sint64_t len) +{ + struct file_keyed_stream *fks = (struct file_keyed_stream *) stream; + struct file_stream *fs = (struct file_stream *) key; + mm_segment_t old_fs; + ssize_t done, left, total; + + if (fks->ot!=V3_KS_WR_ONLY) { + return -1; + } + + if (len<0) { + return -1; + } + + total=len; + left=len; + + old_fs = get_fs(); + set_fs(get_ds()); + + while (left>0) { + done = fs->f->f_op->write(fs->f, buf+(total-left), left, &(fs->f->f_pos)); + if (done<=0) { + return -1; + } else { + left -= done; + } + } + set_fs(old_fs); + + return len; +} + + + +static sint64_t read_key_file(v3_keyed_stream_t stream, + v3_keyed_stream_key_t key, + void *buf, + sint64_t len) +{ + struct file_keyed_stream *fks = (struct file_keyed_stream *) stream; + struct file_stream *fs = (struct file_stream *) key; + mm_segment_t old_fs; + ssize_t done, left, total; + + if (fks->ot!=V3_KS_RD_ONLY) { + return -1; + } + + if (len<0) { + return -1; + } + + total=len; + left=len; + + old_fs = get_fs(); + set_fs(get_ds()); + + while (left>0) { + done = fs->f->f_op->read(fs->f, buf+(total-left), left, &(fs->f->f_pos)); + if (done<=0) { + return -1; + } else { + left -= done; + } + } + set_fs(old_fs); + + return len; + +} + + + + +/*************************************************************************************************** + User implementation ("user:") +*************************************************************************************************/ + + +// List of all user keyed stream connections for the guest +struct user_keyed_streams { + spinlock_t lock; + struct list_head streams; +}; + + +// A single keyed stream connection to user space +struct user_keyed_stream { + int stype; + v3_keyed_stream_open_t otype; + + char *url; + spinlock_t lock; + int waiting; + + wait_queue_head_t user_wait_queue; + wait_queue_head_t host_wait_queue; + + struct palacios_user_keyed_stream_op *op; + + struct list_head node; +}; + + +// +// List of all of the user streams +// +static struct user_keyed_streams *user_streams; + + + +static int resize_op(struct palacios_user_keyed_stream_op **op, uint64_t buf_len) +{ + struct palacios_user_keyed_stream_op *old = *op; + struct palacios_user_keyed_stream_op *new; + + if (!old) { + new = kmalloc(sizeof(struct palacios_user_keyed_stream_op)+buf_len,GFP_ATOMIC); + if (!new) { + return -1; + } else { + new->len=sizeof(struct palacios_user_keyed_stream_op)+buf_len; + new->buf_len=buf_len; + *op=new; + return 0; + } + } else { + if ((old->len-sizeof(struct palacios_user_keyed_stream_op)) >= buf_len) { + old->buf_len=buf_len; + return 0; + } else { + kfree(old); + *op = 0 ; + return resize_op(op,buf_len); + } + } +} + +// +// The assumption is that we enter this with the stream locked +// and we will return with it locked; additionally, the op structure +// will be overwritten with the response +// +static int do_request_to_response(struct user_keyed_stream *s, unsigned long *flags) +{ + + if (s->waiting) { + printk("palacios: user keyed stream request attempted while one is already in progress on %s\n",s->url); + return -1; + } + + // we are now waiting for a response + s->waiting = 1; + + // release the stream + spin_unlock_irqrestore(&(s->lock), *flags); + + // wake up anyone waiting on it + wake_up_interruptible(&(s->user_wait_queue)); + + // wait for someone to give us a response + while (wait_event_interruptible(s->host_wait_queue, (s->waiting == 0)) != 0) {} + + // reacquire the lock for our called + spin_lock_irqsave(&(s->lock), *flags); + + return 0; +} + +// +// The assumption is that we enter this with the stream locked +// and we will return with it UNlocked +// +static int do_response_to_request(struct user_keyed_stream *s, unsigned long *flags) +{ + + if (!(s->waiting)) { + printk("palacios: user keyed stream response while no request is in progress on %s\n",s->url); + return -1; + } + + // we are now waiting for a request + s->waiting = 0; + + // release the stream + spin_unlock_irqrestore(&(s->lock), *flags); + + // wake up anyone waiting on it + wake_up_interruptible(&(s->host_wait_queue)); + + return 0; +} + + + +static unsigned int keyed_stream_poll_user(struct file *filp, poll_table *wait) +{ + struct user_keyed_stream *s = (struct user_keyed_stream *) (filp->private_data); + unsigned long flags; + + if (!s) { + return POLLERR; + } + + spin_lock_irqsave(&(s->lock), flags); + + if (s->waiting) { + spin_unlock_irqrestore(&(s->lock), flags); + return POLLIN | POLLRDNORM; + } + + poll_wait(filp, &(s->user_wait_queue), wait); + + spin_unlock_irqrestore(&(s->lock), flags); + + return 0; +} + + +static int keyed_stream_ioctl_user(struct inode *inode, struct file *filp, unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + unsigned long flags; + uint64_t size; + + struct user_keyed_stream *s = (struct user_keyed_stream *) (filp->private_data); + + switch (ioctl) { + + case V3_KSTREAM_REQUEST_SIZE_IOCTL: + + // inform request size + + spin_lock_irqsave(&(s->lock), flags); + + if (!(s->waiting)) { + spin_unlock_irqrestore(&(s->lock), flags); + return 0; + } + + size = sizeof(struct palacios_user_keyed_stream_op) + s->op->buf_len; + + if (copy_to_user((void * __user) argp, &size, sizeof(uint64_t))) { + spin_unlock_irqrestore(&(s->lock), flags); + printk("palacios: palacios user key size request failed to copy data\n"); + return -EFAULT; + } + + spin_unlock_irqrestore(&(s->lock), flags); + + return 1; + + break; + + case V3_KSTREAM_REQUEST_PULL_IOCTL: + + // pull the request + + spin_lock_irqsave(&(s->lock), flags); + + if (!(s->waiting)) { + spin_unlock_irqrestore(&(s->lock), flags); + printk("palacios: palacios user key pull request when not waiting\n"); + return 0; + } + + size = sizeof(struct palacios_user_keyed_stream_op) + s->op->buf_len; + + + if (copy_to_user((void __user *) argp, s->op, size)) { + spin_unlock_irqrestore(&(s->lock), flags); + printk("palacios: palacios user key pull request failed to copy data\n"); + return -EFAULT; + } + + spin_unlock_irqrestore(&(s->lock), flags); + + return 1; + + + break; + + case V3_KSTREAM_RESPONSE_PUSH_IOCTL: + + // push the response + + spin_lock_irqsave(&(s->lock), flags); + + if (!(s->waiting)) { + spin_unlock_irqrestore(&(s->lock), flags); + printk("palacios: palacios user key push response when not waiting\n"); + return 0; + } + + if (copy_from_user(&size, (void __user *) argp, sizeof(uint64_t))) { + printk("palacios: palacios user key push response failed to copy size\n"); + spin_unlock_irqrestore(&(s->lock), flags); + return -EFAULT; + } + + if (resize_op(&(s->op),size-sizeof(struct palacios_user_keyed_stream_op))) { + printk("palacios: unable to resize op in user key push response\n"); + spin_unlock_irqrestore(&(s->lock), flags); + return -EFAULT; + } + + if (copy_from_user(s->op, (void __user *) argp, size)) { + spin_unlock_irqrestore(&(s->lock), flags); + return -EFAULT; + } + + do_response_to_request(s,&flags); + // this will have unlocked s for us + + return 1; + + break; + + default: + printk("palacios: unknown ioctl in user keyed stream\n"); + + return -EFAULT; + + break; + + } +} + +static int keyed_stream_release_user(struct inode *inode, struct file *filp) +{ + struct user_keyed_stream *s = filp->private_data; + unsigned long f1,f2; + + spin_lock_irqsave(&(user_streams->lock),f1); + spin_lock_irqsave(&(s->lock), f2); + + list_del(&(s->node)); + + spin_unlock_irqrestore(&(s->lock), f2); + spin_unlock_irqrestore(&(user_streams->lock), f1); + + kfree(s->url); + kfree(s); + + return 0; +} + +static struct file_operations user_keyed_stream_fops = { + .poll = keyed_stream_poll_user, + .ioctl = keyed_stream_ioctl_user, + .release = keyed_stream_release_user, +}; + + +/* + user_keyed_streams are allocated on user connect, and deallocated on user release + + palacios-side opens and closes only manipulate the open type +*/ + +int keyed_stream_connect_user(struct v3_guest *guest, unsigned int cmd, unsigned long arg, void *priv_data) +{ + int fd; + unsigned long flags; + char *url; + uint64_t len; + struct user_keyed_stream *s; + + if (!user_streams) { + printk("palacios: no user space keyed streams!\n"); + return -1; + } + + // get the url + if (copy_from_user(&len,(void __user *)arg,sizeof(len))) { + printk("palacios: cannot copy url len from user\n"); + return -1; + } + + url = kmalloc(len,GFP_KERNEL); + + if (!url) { + printk("palacios: cannot allocate url for user keyed stream\n"); + return -1; + } + + if (copy_from_user(url,((void __user *)arg)+sizeof(len),len)) { + printk("palacios: cannot copy url from user\n"); + return -1; + } + url[len-1]=0; + + + // Check for duplicate handler + spin_lock_irqsave(&(user_streams->lock), flags); + list_for_each_entry(s, &(user_streams->streams), node) { + if (!strncasecmp(url, s->url, len)) { + printk("palacios: user keyed stream connection with url \"%s\" already exists\n", url); + kfree(url); + return -1; + } + } + spin_unlock_irqrestore(&(user_streams->lock), flags); + + // Create connection + s = kmalloc(sizeof(struct user_keyed_stream), GFP_KERNEL); + + if (!s) { + printk("palacios: cannot allocate new user keyed stream for %s\n",url); + kfree(url); + return -1; + } + + + // Get file descriptor + fd = anon_inode_getfd("v3-kstream", &user_keyed_stream_fops, s, 0); + + if (fd < 0) { + printk("palacios: cannot allocate file descriptor for new user keyed stream for %s\n",url); + kfree(s); + kfree(url); + return -1; + } + + memset(s, 0, sizeof(struct user_keyed_stream)); + + s->stype=STREAM_USER; + s->url=url; + + init_waitqueue_head(&(s->user_wait_queue)); + init_waitqueue_head(&(s->host_wait_queue)); + + // Insert connection into list + spin_lock_irqsave(&(user_streams->lock), flags); + list_add(&(s->node), &(user_streams->streams)); + spin_unlock_irqrestore(&(user_streams->lock), flags); + + return fd; +} + +static struct user_keyed_stream *keyed_stream_user_find(char *url) +{ + unsigned long flags; + struct user_keyed_stream *s; + + if (!user_streams) { + printk("palacios: no user space keyed streams available\n"); + return NULL; + } + + spin_lock_irqsave(&(user_streams->lock), flags); + list_for_each_entry(s, &(user_streams->streams), node) { + if (!strcasecmp(url, s->url)) { + spin_unlock_irqrestore(&(user_streams->lock), flags); + return s; + } + } + + spin_unlock_irqrestore(&(user_streams->lock), flags); + + return NULL; +} + + +static v3_keyed_stream_t open_stream_user(char *url, v3_keyed_stream_open_t ot) +{ + unsigned long flags; + struct user_keyed_stream *s; + + s = keyed_stream_user_find(url); + + if (!s) { + printk("palacios: cannot open user stream %s as it does not exist yet\n",url); + return NULL; + } + + spin_lock_irqsave(&(s->lock), flags); + + if (s->waiting) { + spin_unlock_irqrestore(&(s->lock), flags); + printk("palacios: cannot open user stream %s as it is already in waiting state\n",url); + return NULL; + } + + s->otype = ot==V3_KS_WR_ONLY_CREATE ? V3_KS_WR_ONLY : ot; + + spin_unlock_irqrestore(&(s->lock), flags); + + return s; + +} + +// close stream does not do anything. Creation of the stream and its cleanup +// are driven by the user side, not the palacios side +// might eventually want to reference count this, though +static void close_stream_user(v3_keyed_stream_t stream) +{ + return; +} + +static void preallocate_hint_key_user(v3_keyed_stream_t stream, + char *key, + uint64_t size) +{ + return; +} + + + + +static v3_keyed_stream_key_t open_key_user(v3_keyed_stream_t stream, char *key) +{ + unsigned long flags; + struct user_keyed_stream *s = (struct user_keyed_stream *) stream; + uint64_t len = strlen(key)+1; + void *user_key; + + spin_lock_irqsave(&(s->lock), flags); + + + if (resize_op(&(s->op),len)) { + spin_unlock_irqrestore(&(s->lock),flags); + printk("palacios: cannot resize op in opening key %s on user keyed stream %s\n",key,s->url); + return NULL; + } + + s->op->type = PALACIOS_KSTREAM_OPEN_KEY; + s->op->buf_len = len; + strncpy(s->op->buf,key,len); + + // enter with it locked + if (do_request_to_response(s,&flags)) { + spin_unlock_irqrestore(&(s->lock),flags); + printk("palacios: request/response handling failed\n"); + return NULL; + } + // return with it locked + + user_key=s->op->user_key; + + spin_unlock_irqrestore(&(s->lock),flags); + + return user_key; +} + +static void close_key_user(v3_keyed_stream_t stream, v3_keyed_stream_key_t key) +{ + struct user_keyed_stream *s = (struct user_keyed_stream *) stream; + uint64_t len = 0; + unsigned long flags; + + spin_lock_irqsave(&(s->lock), flags); + + if (resize_op(&(s->op),len)) { + spin_unlock_irqrestore(&(s->lock),flags); + printk("palacios: cannot resize op in closing key 0x%p on user keyed stream %s\n",key,s->url); + return; + } + + s->op->type = PALACIOS_KSTREAM_CLOSE_KEY; + s->op->buf_len = len; + s->op->user_key = key; + + // enter with it locked + if (do_request_to_response(s,&flags)) { + spin_unlock_irqrestore(&(s->lock),flags); + printk("palacios: request/response handling failed\n"); + return; + } + // return with it locked + + spin_unlock_irqrestore(&(s->lock),flags); + + return; +} + + + +static sint64_t read_key_user(v3_keyed_stream_t stream, v3_keyed_stream_key_t key, + void *buf, sint64_t rlen) +{ + + struct user_keyed_stream *s = (struct user_keyed_stream *) stream; + uint64_t len = 0 ; + sint64_t xfer; + unsigned long flags; + + spin_lock_irqsave(&(s->lock), flags); + + if (s->otype != V3_KS_RD_ONLY) { + spin_unlock_irqrestore(&(s->lock),flags); + printk("palacios: attempt to read key from stream that is not in read state on %s\n",s->url); + } + + if (resize_op(&(s->op),len)) { + spin_unlock_irqrestore(&(s->lock),flags); + printk("palacios: cannot resize op in reading key 0x%p on user keyed stream %s\n",key,s->url); + return -1; + } + + s->op->type = PALACIOS_KSTREAM_READ_KEY; + s->op->buf_len = len ; + s->op->xfer = rlen; + s->op->user_key = key; + + // enter with it locked + if (do_request_to_response(s,&flags)) { + spin_unlock_irqrestore(&(s->lock),flags); + printk("palacios: request/response handling failed\n"); + return -1; + } + // return with it locked + + + if (s->op->xfer>0) { + memcpy(buf,s->op->buf,s->op->xfer); + } + + xfer=s->op->xfer; + + spin_unlock_irqrestore(&(s->lock),flags); + + return xfer; +} + + +static sint64_t write_key_user(v3_keyed_stream_t stream, v3_keyed_stream_key_t key, + void *buf, sint64_t wlen) +{ + + struct user_keyed_stream *s = (struct user_keyed_stream *) stream; + struct palacios_user_keyed_stream_op *op; + uint64_t len = wlen ; + sint64_t xfer; + unsigned long flags; + + spin_lock_irqsave(&(s->lock), flags); + + if (s->otype != V3_KS_WR_ONLY) { + spin_unlock_irqrestore(&(s->lock),flags); + printk("palacios: attempt to write key on stream that is not in write state on %s\n",s->url); + } + + if (resize_op(&(s->op),len)) { + spin_unlock_irqrestore(&(s->lock),flags); + printk("palacios: cannot resize op in reading key 0x%p on user keyed stream %s\n",key,s->url); + return -1; + } + + op = s->op; + + s->op->type = PALACIOS_KSTREAM_WRITE_KEY; + s->op->buf_len = len; + s->op->xfer = wlen; + s->op->user_key = key; + + memcpy(s->op->buf,buf,wlen); + + // enter with it locked + if (do_request_to_response(s,&flags)) { + spin_unlock_irqrestore(&(s->lock),flags); + printk("palacios: request/response handling failed\n"); + return -1; + } + // return with it locked + + xfer=s->op->xfer; + + spin_unlock_irqrestore(&(s->lock),flags); + + return xfer; +} + + + + +/*************************************************************************************************** + Generic interface +*************************************************************************************************/ + +static v3_keyed_stream_t open_stream(char *url, + v3_keyed_stream_open_t ot) +{ + if (!strncasecmp(url,"mem:",4)) { + return open_stream_mem(url,ot); + } else if (!strncasecmp(url,"file:",5)) { + return open_stream_file(url,ot); + } else if (!strncasecmp(url,"user:",5)) { + return open_stream_user(url,ot); + } else { + printk("palacios: unsupported type in attempt to open keyed stream \"%s\"\n",url); + return 0; + } +} + +static void close_stream(v3_keyed_stream_t stream) +{ + struct generic_keyed_stream *gks = (struct generic_keyed_stream *) stream; + switch (gks->stype){ + case STREAM_MEM: + return close_stream_mem(stream); + break; + case STREAM_FILE: + return close_stream_file(stream); + break; + case STREAM_USER: + return close_stream_user(stream); + break; + default: + printk("palacios: unknown stream type %d in close\n",gks->stype); + break; + } +} + +static void preallocate_hint_key(v3_keyed_stream_t stream, + char *key, + uint64_t size) +{ + struct generic_keyed_stream *gks = (struct generic_keyed_stream *) stream; + switch (gks->stype){ + case STREAM_MEM: + preallocate_hint_key_mem(stream,key,size); + break; + case STREAM_FILE: + preallocate_hint_key_file(stream,key,size); + break; + case STREAM_USER: + return preallocate_hint_key_user(stream,key,size); + break; + default: + printk("palacios: unknown stream type %d in preallocate_hint_key\n",gks->stype); + break; + } + return; +} + + +static v3_keyed_stream_key_t open_key(v3_keyed_stream_t stream, + char *key) +{ + struct generic_keyed_stream *gks = (struct generic_keyed_stream *) stream; + switch (gks->stype){ + case STREAM_MEM: + return open_key_mem(stream,key); + break; + case STREAM_FILE: + return open_key_file(stream,key); + break; + case STREAM_USER: + return open_key_user(stream,key); + break; + default: + printk("palacios: unknown stream type %d in open_key\n",gks->stype); + break; + } + return 0; +} + + +static void close_key(v3_keyed_stream_t stream, + v3_keyed_stream_key_t key) +{ + struct generic_keyed_stream *gks = (struct generic_keyed_stream *) stream; + switch (gks->stype){ + case STREAM_MEM: + return close_key_mem(stream,key); + break; + case STREAM_FILE: + return close_key_file(stream,key); + break; + case STREAM_USER: + return close_key_user(stream,key); + break; + default: + printk("palacios: unknown stream type %d in close_key\n",gks->stype); + break; + } + // nothing to do + return; +} + +static sint64_t write_key(v3_keyed_stream_t stream, + v3_keyed_stream_key_t key, + void *buf, + sint64_t len) +{ + struct generic_keyed_stream *gks = (struct generic_keyed_stream *) stream; + switch (gks->stype){ + case STREAM_MEM: + return write_key_mem(stream,key,buf,len); + break; + case STREAM_FILE: + return write_key_file(stream,key,buf,len); + break; + case STREAM_USER: + return write_key_user(stream,key,buf,len); + break; + default: + printk("palacios: unknown stream type %d in write_key\n",gks->stype); + return -1; + break; + } + return -1; +} + + +static sint64_t read_key(v3_keyed_stream_t stream, + v3_keyed_stream_key_t key, + void *buf, + sint64_t len) +{ + struct generic_keyed_stream *gks = (struct generic_keyed_stream *) stream; + switch (gks->stype){ + case STREAM_MEM: + return read_key_mem(stream,key,buf,len); + break; + case STREAM_FILE: + return read_key_file(stream,key,buf,len); + break; + case STREAM_USER: + return read_key_user(stream,key,buf,len); + break; + default: + printk("palacios: unknown stream type %d in read_key\n",gks->stype); + return -1; + break; + } + return -1; +} + + + + +/*************************************************************************************************** + Hooks to palacios and inititialization +*************************************************************************************************/ + static struct v3_keyed_stream_hooks hooks = { .open = open_stream, .close = close_stream, + .preallocate_hint_key = preallocate_hint_key, .open_key = open_key, .close_key = close_key, .read_key = read_key, @@ -277,32 +1509,66 @@ static struct v3_keyed_stream_hooks hooks = { static int init_keyed_streams( void ) { - streams = palacios_create_htable(DEF_NUM_STREAMS,hash_func,hash_comp); + mem_streams = palacios_create_htable(DEF_NUM_STREAMS,hash_func,hash_comp); - if (!streams) { - printk("Failed to allocated stream pool\n"); + if (!mem_streams) { + printk("palacios: failed to allocated stream pool for in-memory streams\n"); return -1; } - V3_Init_Keyed_Streams(&hooks); + user_streams = kmalloc(sizeof(struct user_keyed_streams),GFP_KERNEL); + + if (!user_streams) { + printk("palacios: failed to allocated list for user streams\n"); + return -1; + } + + INIT_LIST_HEAD(&(user_streams->streams)); + spin_lock_init(&(user_streams->lock)); + + V3_Init_Keyed_Streams(&hooks); + return 0; } static int deinit_keyed_streams( void ) { - printk("DEINIT OF PALACIOS KEYED STREAMS NOT IMPLEMENTED - WE HAVE JUST LEAKED MEMORY!\n"); - return -1; + palacios_free_htable(mem_streams,1,1); + + kfree(user_streams); + + printk("Deinit of Palacios Keyed Streams likely leaked memory\n"); + + return 0; +} + + +static int guest_init_keyed_streams(struct v3_guest * guest, void ** vm_data ) +{ + + add_guest_ctrl(guest, V3_VM_KSTREAM_USER_CONNECT, keyed_stream_connect_user, 0); + + return 0; } +static int guest_deinit_keyed_streams(struct v3_guest * guest, void * vm_data) +{ + + return 0; +} + + + + static struct linux_ext key_stream_ext = { .name = "KEYED_STREAM_INTERFACE", .init = init_keyed_streams, .deinit = deinit_keyed_streams, - .guest_init = NULL, - .guest_deinit = NULL + .guest_init = guest_init_keyed_streams, + .guest_deinit = guest_deinit_keyed_streams, }; diff --git a/linux_module/iface-stream.c b/linux_module/iface-stream.c index 36582d9..9cb89e5 100644 --- a/linux_module/iface-stream.c +++ b/linux_module/iface-stream.c @@ -21,17 +21,20 @@ #include "iface-stream.h" -// This is going to need to be a lot bigger... -#define STREAM_BUF_SIZE 1024 +// This is probably overkill +#define STREAM_RING_LEN 4096 static struct list_head global_streams; -struct stream_buffer { + + +struct stream_state { char name[STREAM_NAME_LEN]; - struct ringbuf * buf; + + struct ringbuf * out_ring; int connected; @@ -40,19 +43,24 @@ struct stream_buffer { struct v3_guest * guest; struct list_head stream_node; + + struct v3_stream * v3_stream; }; // Currently just the list of open streams -struct vm_stream_state { +struct vm_global_streams { struct list_head open_streams; }; -static struct stream_buffer * find_stream_by_name(struct v3_guest * guest, const char * name) { - struct stream_buffer * stream = NULL; + + + +static struct stream_state * find_stream_by_name(struct v3_guest * guest, const char * name) { + struct stream_state * stream = NULL; struct list_head * stream_list = NULL; - struct vm_stream_state * vm_state = NULL; + struct vm_global_streams * vm_state = NULL; if (guest == NULL) { stream_list = &global_streams; @@ -78,28 +86,119 @@ static struct stream_buffer * find_stream_by_name(struct v3_guest * guest, const +#define TMP_BUF_LEN 128 + static ssize_t stream_read(struct file * filp, char __user * buf, size_t size, loff_t * offset) { - struct stream_buffer * stream = filp->private_data; + struct stream_state * stream = filp->private_data; + ssize_t bytes_read = 0; + ssize_t bytes_left = size; + unsigned long flags; + char tmp_buf[TMP_BUF_LEN]; + ssize_t total_bytes_left = 0; + + // memset(tmp_buf, 0, TMP_BUF_LEN); + + while (bytes_left > 0) { + int tmp_len = (TMP_BUF_LEN > bytes_left) ? bytes_left : TMP_BUF_LEN; + int tmp_read = 0; + + spin_lock_irqsave(&(stream->lock), flags); + tmp_read = ringbuf_read(stream->out_ring, tmp_buf, tmp_len); + spin_unlock_irqrestore(&(stream->lock), flags); + + if (tmp_read == 0) { + // If userspace reads more than we have + break; + } + + if (copy_to_user(buf + bytes_read, tmp_buf, tmp_read)) { + printk("Read Fault\n"); + return -EFAULT; + } + + bytes_left -= tmp_read; + bytes_read += tmp_read; + } - wait_event_interruptible(stream->intr_queue, (ringbuf_data_len(stream->buf) != 0)); - return ringbuf_read(stream->buf, buf, size); + spin_lock_irqsave(&(stream->lock), flags); + total_bytes_left = ringbuf_data_len(stream->out_ring); + spin_unlock_irqrestore(&(stream->lock), flags); + + if (total_bytes_left > 0) { + wake_up_interruptible(&(stream->intr_queue)); + } + + return bytes_read; } +static unsigned int +stream_poll(struct file * filp, struct poll_table_struct * poll_tb) { + struct stream_state * stream = filp->private_data; + unsigned int mask = POLLIN | POLLRDNORM; + unsigned long flags; + int data_avail = 0; + + poll_wait(filp, &(stream->intr_queue), poll_tb); + + spin_lock_irqsave(&(stream->lock), flags); + data_avail = ringbuf_data_len(stream->out_ring); + spin_unlock_irqrestore(&(stream->lock), flags); + + if (data_avail > 0) { + return mask; + } + return 0; + +} + +static ssize_t stream_write(struct file * filp, const char __user * buf, size_t size, loff_t * offset) { + struct stream_state * stream = filp->private_data; + char * kern_buf = NULL; + ssize_t bytes_written = 0; + + kern_buf = kmalloc(size, GFP_KERNEL); + + if (copy_from_user(kern_buf, buf, size)) { + printk("Stream Write Failed\n"); + return -EFAULT; + }; + + bytes_written = stream->v3_stream->input(stream->v3_stream, kern_buf, size); + + kfree(kern_buf); + + return bytes_written; +} + + +static int stream_release(struct inode * i, struct file * filp) { + struct stream_state * stream = filp->private_data; + unsigned long flags; + + spin_lock_irqsave(&(stream->lock), flags); + stream->connected = 0; + spin_unlock_irqrestore(&(stream->lock), flags); + + + return 0; + +} static struct file_operations stream_fops = { .read = stream_read, - // .release = stream_close, - // .poll = stream_poll, + .write = stream_write, + .release = stream_release, + .poll = stream_poll, }; -static void * palacios_stream_open(const char * name, void * private_data) { +static void * palacios_stream_open(struct v3_stream * v3_stream, const char * name, void * private_data) { struct v3_guest * guest = (struct v3_guest *)private_data; - struct stream_buffer * stream = NULL; - struct vm_stream_state * vm_state = NULL; + struct stream_state * stream = NULL; + struct vm_global_streams * vm_state = NULL; if (guest != NULL) { vm_state = get_vm_ext_data(guest, "STREAM_INTERFACE"); @@ -115,10 +214,13 @@ static void * palacios_stream_open(const char * name, void * private_data) { return NULL; } - stream = kmalloc(sizeof(struct stream_buffer), GFP_KERNEL); - - stream->buf = create_ringbuf(STREAM_BUF_SIZE); + stream = kmalloc(sizeof(struct stream_state), GFP_KERNEL); + memset(stream, 0, sizeof(struct stream_state)); + + stream->out_ring = create_ringbuf(STREAM_RING_LEN); + stream->v3_stream = v3_stream; stream->guest = guest; + stream->connected = 0; strncpy(stream->name, name, STREAM_NAME_LEN - 1); @@ -135,24 +237,38 @@ static void * palacios_stream_open(const char * name, void * private_data) { } -static int palacios_stream_write(void * stream_ptr, char * buf, int len) { - struct stream_buffer * stream = (struct stream_buffer *)stream_ptr; - int ret = 0; +static uint64_t palacios_stream_output(struct v3_stream * v3_stream, char * buf, int len) { + struct stream_state * stream = (struct stream_state *)v3_stream->host_stream_data; + int bytes_written = 0; + unsigned long flags; + - ret = ringbuf_write(stream->buf, buf, len); + if (stream->connected == 0) { + return 0; + } + + while (bytes_written < len) { + spin_lock_irqsave(&(stream->lock), flags); + bytes_written += ringbuf_write(stream->out_ring, buf + bytes_written, len - bytes_written); + spin_unlock_irqrestore(&(stream->lock), flags); - if (ret > 0) { wake_up_interruptible(&(stream->intr_queue)); + + if (bytes_written < len) { + // not enough space in ringbuffer, activate user space to drain it + schedule(); + } } - return ret; + + return bytes_written; } -static void palacios_stream_close(void * stream_ptr) { - struct stream_buffer * stream = (struct stream_buffer *)stream_ptr; +static void palacios_stream_close(struct v3_stream * v3_stream) { + struct stream_state * stream = (struct stream_state *)v3_stream->host_stream_data; - free_ringbuf(stream->buf); + free_ringbuf(stream->out_ring); list_del(&(stream->stream_node)); kfree(stream); @@ -160,7 +276,7 @@ static void palacios_stream_close(void * stream_ptr) { static struct v3_stream_hooks palacios_stream_hooks = { .open = palacios_stream_open, - .write = palacios_stream_write, + .output = palacios_stream_output, .close = palacios_stream_close, }; @@ -188,7 +304,7 @@ static int stream_deinit( void ) { static int stream_connect(struct v3_guest * guest, unsigned int cmd, unsigned long arg, void * priv_data) { void __user * argp = (void __user *)arg; - struct stream_buffer * stream = NULL; + struct stream_state * stream = NULL; int stream_fd = 0; char name[STREAM_NAME_LEN]; unsigned long flags = 0; @@ -221,7 +337,7 @@ static int stream_connect(struct v3_guest * guest, unsigned int cmd, unsigned lo } - stream_fd = anon_inode_getfd("v3-stream", &stream_fops, stream, 0); + stream_fd = anon_inode_getfd("v3-stream", &stream_fops, stream, O_RDWR); if (stream_fd < 0) { printk("Error creating stream inode for (%s)\n", name); @@ -235,12 +351,11 @@ static int stream_connect(struct v3_guest * guest, unsigned int cmd, unsigned lo static int guest_stream_init(struct v3_guest * guest, void ** vm_data) { - struct vm_stream_state * state = kmalloc(sizeof(struct vm_stream_state), GFP_KERNEL); + struct vm_global_streams * state = kmalloc(sizeof(struct vm_global_streams), GFP_KERNEL); INIT_LIST_HEAD(&(state->open_streams)); *vm_data = state; - add_guest_ctrl(guest, V3_VM_STREAM_CONNECT, stream_connect, state); return 0; @@ -248,7 +363,7 @@ static int guest_stream_init(struct v3_guest * guest, void ** vm_data) { static int guest_stream_deinit(struct v3_guest * guest, void * vm_data) { - struct vm_stream_state * state = vm_data; + struct vm_global_streams * state = vm_data; if (!list_empty(&(state->open_streams))) { printk("Error shutting down VM with open streams\n"); } diff --git a/linux_module/palacios-stubs.c b/linux_module/palacios-stubs.c index be06001..0310a40 100644 --- a/linux_module/palacios-stubs.c +++ b/linux_module/palacios-stubs.c @@ -85,7 +85,11 @@ static void * palacios_alloc(unsigned int size) { void * addr = NULL; - addr = kmalloc(size, GFP_KERNEL); + if (irqs_disabled()) { + addr = kmalloc(size, GFP_ATOMIC); + } else { + addr = kmalloc(size, GFP_KERNEL); + } mallocs++; return addr; diff --git a/linux_module/palacios.h b/linux_module/palacios.h index 56d5abf..633ede1 100644 --- a/linux_module/palacios.h +++ b/linux_module/palacios.h @@ -15,6 +15,9 @@ #define V3_VM_CONSOLE_CONNECT 20 #define V3_VM_STOP 22 +#define V3_VM_PAUSE 23 +#define V3_VM_CONTINUE 24 + #define V3_VM_INSPECT 30 @@ -23,6 +26,8 @@ #define V3_VM_HOST_DEV_CONNECT (10244+1) +#define V3_VM_KSTREAM_USER_CONNECT (11244+1) + struct v3_guest_img { unsigned long long size; diff --git a/linux_module/vm.c b/linux_module/vm.c index 852733c..967ecf6 100644 --- a/linux_module/vm.c +++ b/linux_module/vm.c @@ -129,10 +129,20 @@ static long v3_vm_ioctl(struct file * filp, switch (ioctl) { case V3_VM_STOP: { - printk("Stopping VM\n"); + printk("Stopping VM (%s)\n", guest->name); stop_palacios_vm(guest); break; } + case V3_VM_PAUSE: { + printk("Pausing VM (%s)\n", guest->name); + v3_pause_vm(guest->v3_ctx); + break; + } + case V3_VM_CONTINUE: { + printk("Continuing VM (%s)\n", guest->name); + v3_continue_vm(guest->v3_ctx); + break; + } default: { struct vm_ctrl * ctrl = get_ctrl(guest, ioctl); diff --git a/linux_usr/Makefile b/linux_usr/Makefile index 439494c..3f4aedd 100644 --- a/linux_usr/Makefile +++ b/linux_usr/Makefile @@ -1,4 +1,4 @@ -all: v3_ctrl v3_stop v3_cons v3_mem v3_monitor v3_serial v3_net v3_user_host_dev_example v3_os_debug +all: v3_ctrl v3_stop v3_cons v3_mem v3_monitor v3_stream v3_user_host_dev_example v3_os_debug v3_user_keyed_stream_example v3_user_keyed_stream_file @@ -15,21 +15,27 @@ v3_mem : v3_mem.c v3_ctrl.h v3_cons : v3_cons.c v3_ctrl.h gcc -static v3_cons.c -o v3_cons -lcurses -v3_serial : v3_serial.c v3_ctrl.h - gcc -static v3_serial.c -pthread -o v3_serial +v3_stream : v3_stream.c v3_ctrl.h + gcc -static v3_stream.c -o v3_stream v3_monitor : v3_cons.c v3_ctrl.h gcc -static v3_monitor.c -o v3_monitor -v3_net : v3_net.c v3_ctrl.h - gcc -static v3_net.c -o v3_net - v3_user_host_dev_example: v3_user_host_dev_example.c v3_user_host_dev.h v3_user_host_dev.c gcc -static -I../linux_module v3_user_host_dev_example.c v3_user_host_dev.c -o v3_user_host_dev_example v3_os_debug: v3_os_debug.c v3_user_host_dev.h v3_user_host_dev.c gcc -static -I../linux_module v3_os_debug.c v3_user_host_dev.c -o v3_os_debug +v3_user_keyed_stream_example: v3_user_keyed_stream_example.c v3_user_keyed_stream.h v3_user_keyed_stream.c + gcc -static -I../linux_module v3_user_keyed_stream_example.c v3_user_keyed_stream.c -o v3_user_keyed_stream_example + + +v3_user_keyed_stream_file: v3_user_keyed_stream_file.c v3_user_keyed_stream.h v3_user_keyed_stream.c + gcc -static -I../linux_module v3_user_keyed_stream_file.c v3_user_keyed_stream.c -o v3_user_keyed_stream_file + + + clean: - rm -f v3_ctrl v3_cons v3_mem v3_monitor v3_serial v3_net v3_user_host_dev_example v3_os_debug + rm -f v3_ctrl v3_cons v3_mem v3_monitor v3_stream v3_user_host_dev_example v3_os_debug v3_user_keyed_stream_example v3_user_keyed_stream_file diff --git a/linux_usr/v3_net.c b/linux_usr/v3_net.c deleted file mode 100644 index 5449cd4..0000000 --- a/linux_usr/v3_net.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * V3 Control utility for Palacios network services - * (c) Lei Xia, 2010 - */ - - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "v3_ctrl.h" - -struct v3_network { - unsigned char socket; - unsigned char packet; - unsigned char vnet; -}; - -int main(int argc, char* argv[]) { - int v3_fd = 0; - struct v3_network net; - int i; - - if (argc <= 1) { - printf("Usage: ./v3_mem [socket] [packet] [vnet]\n"); - return -1; - } - - for (i = 1; i < argc; i++){ - if(!strcasecmp (argv[i], "packet")){ - net.packet = 1; - }else if(!strcasecmp (argv[i], "socket")){ - net.socket = 1; - }else if(!strcasecmp (argv[i], "vnet")){ - net.vnet = 1; - }else { - printf("unknown v3 network service: %s, ignored\n", argv[i]); - } - } - - printf("Network service: socket: %d, packet: %d, vnet: %d\n", net.socket, net.packet, net.vnet); - - v3_fd = open(v3_dev, O_RDONLY); - - if (v3_fd == -1) { - printf("Error opening V3Vee control device\n"); - return -1; - } - - ioctl(v3_fd, V3_START_NETWORK, &net); - - - /* Close the file descriptor. */ - close(v3_fd); - - - return 0; -} - diff --git a/linux_usr/v3_serial.c b/linux_usr/v3_serial.c deleted file mode 100644 index 2f66c7f..0000000 --- a/linux_usr/v3_serial.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * V3 Console utility - * (c) Jack lange & Lei Xia, 2010 - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include "v3_ctrl.h" - -static int cons_fd = -1; -static pthread_t input_handler; - -void *write_handler(void *val){ - char read; - printf("Write handler active\n"); - fflush(stdout); - while(1){ - read = getchar(); - if(write(cons_fd, &read, sizeof(char)) < 0){ - printf("WRITE ERROR"); - } - } -} - - -int main(int argc, char* argv[]) { - int vm_fd; - fd_set rset; - char *vm_dev = NULL; - char *stream; - - if (argc < 2) { - printf("Usage: ./v3_cons vm_device serial_number\n"); - return -1; - } - - vm_dev = argv[1]; - stream = argv[2]; - - vm_fd = open(vm_dev, O_RDONLY); - if (vm_fd == -1) { - printf("Error opening VM device: %s\n", vm_dev); - return -1; - } - - cons_fd = ioctl(vm_fd, V3_VM_SERIAL_CONNECT, stream); - - /* Close the file descriptor. */ - close(vm_fd); - if (cons_fd < 0) { - printf("Error opening stream Console\n"); - return -1; - } - - - if(pthread_create(&input_handler,0,write_handler,0)){ - perror("pthread_create"); - exit(-1); - } - - - while (1) { - int ret; - char cons_buf[1024]; - memset(cons_buf, 0, sizeof(cons_buf)); - int bytes_read = 0; - - FD_ZERO(&rset); - FD_SET(cons_fd, &rset); - - ret = select(cons_fd + 1, &rset, NULL, NULL, NULL); - - if (ret == 1) { - bytes_read = read(cons_fd, cons_buf, 1024); - cons_buf[bytes_read]='\0'; - printf("%s", cons_buf); - } else { - printf("v3_cons ERROR: select returned %d\n", ret); - return -1; - } - } - - - return 0; -} - - diff --git a/linux_usr/v3_stream.c b/linux_usr/v3_stream.c new file mode 100644 index 0000000..0a1f46a --- /dev/null +++ b/linux_usr/v3_stream.c @@ -0,0 +1,113 @@ +/* + * V3 Console utility + * (c) Jack lange & Lei Xia, 2010 + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "v3_ctrl.h" + +#define BUF_LEN 1025 +#define STREAM_NAME_LEN 128 + +int main(int argc, char* argv[]) { + int vm_fd; + fd_set rset; + char * vm_dev = NULL; + char stream[STREAM_NAME_LEN]; + char cons_buf[BUF_LEN]; + int stream_fd = 0; + + if (argc < 2) { + printf("Usage: ./v3_cons vm_device serial_number\n"); + return -1; + } + + vm_dev = argv[1]; + + if (strlen(argv[2]) >= STREAM_NAME_LEN) { + printf("ERROR: Stream name longer than maximum size (%d)\n", STREAM_NAME_LEN); + return -1; + } + + memcpy(stream, argv[2], strlen(argv[2])); + + vm_fd = open(vm_dev, O_RDONLY); + if (vm_fd == -1) { + printf("Error opening VM device: %s\n", vm_dev); + return -1; + } + + stream_fd = ioctl(vm_fd, V3_VM_SERIAL_CONNECT, stream); + + /* Close the file descriptor. */ + close(vm_fd); + + if (stream_fd < 0) { + printf("Error opening stream Console\n"); + return -1; + } + + while (1) { + int ret; + int bytes_read = 0; + char in_buf[512]; + + memset(cons_buf, 0, BUF_LEN); + + + FD_ZERO(&rset); + FD_SET(stream_fd, &rset); + FD_SET(STDIN_FILENO, &rset); + + ret = select(stream_fd + 1, &rset, NULL, NULL, NULL); + + if (ret == 0) { + continue; + } else if (ret == -1) { + perror("Select returned error\n"); + return -1; + } + + if (FD_ISSET(stream_fd, &rset)) { + + bytes_read = read(stream_fd, cons_buf, BUF_LEN - 1); + + cons_buf[bytes_read]='\0'; + printf("%s", cons_buf); + fflush(stdout); + + } else if (FD_ISSET(STDIN_FILENO, &rset)) { + fgets(in_buf, 512, stdin); + + if (write(stream_fd, in_buf, strlen(in_buf)) != strlen(in_buf)) { + fprintf(stderr, "Error sending input bufer\n"); + return -1; + } + } else { + printf("v3_cons ERROR: select returned %d\n", ret); + return -1; + } + + + } + + + return 0; +} + + diff --git a/linux_usr/v3_user_keyed_stream.c b/linux_usr/v3_user_keyed_stream.c new file mode 100644 index 0000000..27e76ee --- /dev/null +++ b/linux_usr/v3_user_keyed_stream.c @@ -0,0 +1,104 @@ +#include +#include +#include +#include +#include +#include + +#include "v3_user_keyed_stream.h" + + +int v3_user_keyed_stream_attach(char *vmdev, char *url) +{ + int vmfd; + int devfd; + + struct palacios_user_keyed_stream_url *u; + + u=malloc(sizeof(struct palacios_user_keyed_stream_url)+strlen(url)+1); + + if (!u) { + return -1; + } + + strcpy(u->url,url); + u->len = strlen(url)+1; + + + if ((vmfd=open(vmdev,O_RDWR))<0) { + free(u); + return -1; + } + + devfd = ioctl(vmfd,V3_VM_KSTREAM_USER_CONNECT,u); + + close(vmfd); + + free(u); + + return devfd; + +} +int v3_user_keyed_stream_detach(int devfd) +{ + return close(devfd); +} + + +int v3_user_keyed_stream_have_request(int devfd) +{ + uint64_t len; + + int rc=ioctl(devfd,V3_KSTREAM_REQUEST_SIZE_IOCTL,&len); + + return rc==1; +} + +int v3_user_keyed_stream_pull_request(int devfd, struct palacios_user_keyed_stream_op **req) +{ + uint64_t len; + int rc; + + rc=ioctl(devfd,V3_KSTREAM_REQUEST_SIZE_IOCTL,&len); + + if (rc<=0) { + return -1; + } else { + struct palacios_user_keyed_stream_op *r = malloc(len); + + if (!r) { + fprintf(stderr,"malloc failed\n"); + return -1; + } + + rc=ioctl(devfd, V3_KSTREAM_REQUEST_PULL_IOCTL,r); + + + if (rc<=0) { + free(r); + return -1; + } else { + *req=r; + return 0; + } + } +} + + +int v3_user_keyed_stream_push_response(int devfd, struct palacios_user_keyed_stream_op *resp) +{ + int rc; + + rc=ioctl(devfd,V3_KSTREAM_RESPONSE_PUSH_IOCTL,resp); + + if (rc<=0) { + return -1; + } else { + return 0; + } +} + + + + + diff --git a/linux_usr/v3_user_keyed_stream.h b/linux_usr/v3_user_keyed_stream.h new file mode 100644 index 0000000..23d5943 --- /dev/null +++ b/linux_usr/v3_user_keyed_stream.h @@ -0,0 +1,17 @@ +#ifndef __V3_USER_KSTREAM_H__ +#define __V3_USER_KSTREAM_H__ + +#include +#define sint64_t int64_t + +#include "iface-keyed-stream-user.h" + +int v3_user_keyed_stream_attach(char *dev, char *url); +int v3_user_keyed_stream_detach(int devfd); + +int v3_user_keyed_stream_have_request(int devfd); +int v3_user_keyed_stream_pull_request(int devfd, struct palacios_user_keyed_stream_op **req); +int v3_user_keyed_stream_push_response(int devfd, struct palacios_user_keyed_stream_op *resp); + + +#endif diff --git a/linux_usr/v3_user_keyed_stream_example.c b/linux_usr/v3_user_keyed_stream_example.c new file mode 100644 index 0000000..1e126da --- /dev/null +++ b/linux_usr/v3_user_keyed_stream_example.c @@ -0,0 +1,146 @@ +#include +#include +#include +#include +#include +#include +#include + +#define sint64_t int64_t + +#include "v3_user_keyed_stream.h" + +void usage() +{ + fprintf(stderr,"v3_user_keyed_stream_example /dev/v3-vm0 user:mystreamtype:mystream busywait|select\n"); +} + + +int do_work(struct palacios_user_keyed_stream_op *req, + struct palacios_user_keyed_stream_op **resp) +{ + uint64_t datasize; + + // + // + // Process request here + // + // req->len : total structure length + // req->type : request type (currently open/close key and read/write key + // req->xfer : unused + // req->user_key : the opaque key previously provided by you by an open key + // req->buf_len : length of data + // req->buf : buffer (contains key name (open key) or value (write key)) + // + + // now built a response + *resp = malloc(sizeof(struct palacios_user_keyed_stream_op) + datasize); + (*resp)->len = sizeof(struct palacios_user_keyed_stream_op) + datasize; + (*resp)->buf_len = datasize; + (*resp)->type = req->type; + (*resp)->user_key = req->user_key; + + // + // The response + // + // resp->len : total structure length + // resp->type : response type - must match the request + // resp->xfer : contains the size of data read or written (in read key or write key) + // resp->user_key : unused + // resp->buf_len : length of data following + // resp->buf : buffer (contains the data (read key)) + + + return 0; +} + +int main(int argc, char *argv[]) +{ + int devfd; + int mode=0; + char *vm, *url; + + if (argc!=4) { + usage(); + exit(-1); + } + + vm=argv[1]; + url=argv[2]; + mode = argv[3][0]=='s'; + + // The URL should begin with user: + // the remainder can be used to demultiplex internally + // for example user:file:foo might refer to a user-side file-based implementation + // + + if (strncmp(url,"user:",5)) { + fprintf(stderr, "URL %s is not a user: url\n"); + exit(-1); + } + + fprintf(stderr,"Attempting to attach to vm %s as url %s\n", vm, url); + + if ((devfd = v3_user_keyed_stream_attach(vm,url))<0) { + perror("failed to attach"); + exit(-1); + } + + fprintf(stderr,"Attachment succeeded, I will now operate in %s mode\n", mode==0 ? "busywait" : "select"); + + if (mode==0) { + //busywait + + struct palacios_user_keyed_stream_op *req; + struct palacios_user_keyed_stream_op *resp; + uint64_t datasize; + + while (1) { + while (!(v3_user_keyed_stream_have_request(devfd))) { + } + v3_user_keyed_stream_pull_request(devfd, &req); + + do_work(req, &resp); + + v3_user_keyed_stream_push_response(devfd, resp); + + free(resp); + free(req); + } + } else { + + struct palacios_user_keyed_stream_op *req; + struct palacios_user_keyed_stream_op *resp; + uint64_t datasize; + fd_set readset; + int rc; + + // select-based operation so that you can wait for multiple things + + while (1) { + FD_ZERO(&readset); + FD_SET(devfd,&readset); + + rc = select(devfd+1, &readset, 0, 0, 0); // pick whatever you want to select on, just include devfd + + if (rc>0) { + if (FD_ISSET(devfd,&readset)) { + // a request is read for us! + v3_user_keyed_stream_pull_request(devfd, &req); + + do_work(req, &resp); + + v3_user_keyed_stream_push_response(devfd, resp); + + free(resp); + free(req); + } + } + } + } + + v3_user_keyed_stream_detatch(devfd); + + return 0; + +} diff --git a/linux_usr/v3_user_keyed_stream_file.c b/linux_usr/v3_user_keyed_stream_file.c new file mode 100644 index 0000000..275983c --- /dev/null +++ b/linux_usr/v3_user_keyed_stream_file.c @@ -0,0 +1,352 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define sint64_t int64_t + +#include "v3_user_keyed_stream.h" + +void usage() +{ + fprintf(stderr,"v3_user_keyed_stream_file /dev/v3-vm0 user:file:stream\n"); +} + +char *dir; + +int dir_setup(char *dir) +{ + DIR *d; + int created=0; + int fd; + + char buf[strlen(dir)+strlen("/.palacios_keyed_stream_user_file")+1]; + + + strcpy(buf,dir); + strcat(buf,"/.palacios_keyed_stream_user_file"); + + d=opendir(dir); + + // does the directory exist or can we create it + if (d) { + closedir(d); + } else { + if (mkdir(dir,0700)<0) { + perror("cannot create directory"); + return -1; + } else { + created=1; + } + } + + // can we write to it? + + fd = open(buf,O_RDWR | O_CREAT,0600); + + if (fd<0) { + perror("cannot write directory"); + if (created) { + rmdir(dir); + } + return -1; + } + + // ok, we are done + + close(fd); + + return 0; +} + + + +int handle_open_key(struct palacios_user_keyed_stream_op *req, + struct palacios_user_keyed_stream_op **resp, + char *dir) +{ + int fd; + char fn[strlen(dir)+req->buf_len+1]; + + strcpy(fn,dir); + strcat(fn,"/"); + strcat(fn,req->buf); + + + fd = open(fn,O_RDWR | O_CREAT,0600); + + (*resp) = malloc(sizeof(struct palacios_user_keyed_stream_op)+0); + + if (!(*resp)) { + return -1; + } + + (*resp)->len=sizeof(struct palacios_user_keyed_stream_op); + (*resp)->type=req->type; + (*resp)->xfer=0; + (*resp)->user_key=(void*)fd; + (*resp)->buf_len=0; + + return 0; + +} + +int handle_close_key(struct palacios_user_keyed_stream_op *req, + struct palacios_user_keyed_stream_op **resp, + char *dir) +{ + int fd; + int rc; + + fd = (int) (req->user_key); + + rc = close(fd); + + (*resp) = malloc(sizeof(struct palacios_user_keyed_stream_op)+0); + + if (!(*resp)) { + return -1; + } + + (*resp)->len=sizeof(struct palacios_user_keyed_stream_op); + (*resp)->type=req->type; + (*resp)->xfer=rc; + (*resp)->user_key=(void*)fd; + (*resp)->buf_len=0; + + return 0; + +} + +int read_all(int fd, char *buf, int len) +{ + int xfer; + int left; + + left=len; + + while (left>0) { + xfer=read(fd, buf+len-left,left); + if (xfer<0) { + perror("cannot read file"); + return -1; + } else { + left-=xfer; + } + } + return len; +} + +int write_all(int fd, char *buf, int len) +{ + int xfer; + int left; + + left=len; + + while (left>0) { + xfer=write(fd, buf+len-left,left); + if (xfer<0) { + perror("cannot write file"); + return -1; + } else { + left-=xfer; + } + } + return len; +} + + +int handle_write_key(struct palacios_user_keyed_stream_op *req, + struct palacios_user_keyed_stream_op **resp, + char *dir) +{ + int fd; + int rc; + + fd = (int) (req->user_key); + + rc = write_all(fd,req->buf,req->xfer); + + (*resp) = malloc(sizeof(struct palacios_user_keyed_stream_op)+0); + + if (!(*resp)) { + return -1; + } + + (*resp)->len=sizeof(struct palacios_user_keyed_stream_op); + (*resp)->type=req->type; + (*resp)->xfer=rc; + (*resp)->user_key=(void*)fd; + (*resp)->buf_len=0; + + + return 0; + +} + +int handle_read_key(struct palacios_user_keyed_stream_op *req, + struct palacios_user_keyed_stream_op **resp, + char *dir) +{ + int fd; + int rc; + + fd = (int) (req->user_key); + + (*resp) = malloc(sizeof(struct palacios_user_keyed_stream_op)+req->xfer); + + if (!(*resp)) { + return -1; + } + + rc = read_all(fd,(*resp)->buf,req->xfer); + + (*resp)->len=sizeof(struct palacios_user_keyed_stream_op) + (rc>0 ? rc : 0); + (*resp)->type=req->type; + (*resp)->xfer=rc; + (*resp)->user_key=(void*)fd; + (*resp)->buf_len=rc>0 ? rc : 0; + + + return 0; + +} + + + + +int handle_request(struct palacios_user_keyed_stream_op *req, + struct palacios_user_keyed_stream_op **resp, + char *dir) +{ + uint64_t datasize; + + switch (req->type) { + case PALACIOS_KSTREAM_OPEN: + case PALACIOS_KSTREAM_CLOSE: + fprintf(stderr,"unsupported stream open or close\n"); + return -1; + break; + + case PALACIOS_KSTREAM_OPEN_KEY: + return handle_open_key(req,resp,dir); + break; + case PALACIOS_KSTREAM_CLOSE_KEY: + return handle_close_key(req,resp,dir); + break; + case PALACIOS_KSTREAM_READ_KEY: + return handle_read_key(req,resp,dir); + break; + case PALACIOS_KSTREAM_WRITE_KEY: + return handle_write_key(req,resp,dir); + break; + default: + fprintf(stderr,"unknown request type\n"); + return -1; + break; + } + + return 0; +} + + +int run(int devfd, char *dir) +{ + struct palacios_user_keyed_stream_op *req; + struct palacios_user_keyed_stream_op *resp; + fd_set readset; + int rc; + + while (1) { + FD_ZERO(&readset); + FD_SET(devfd,&readset); + + rc = select(devfd+1, &readset, 0, 0, 0); + + if (rc>0) { + if (FD_ISSET(devfd,&readset)) { + + int err; + + if (v3_user_keyed_stream_pull_request(devfd, &req)) { + fprintf(stderr, "could not get request\n"); + free(req); + return -1; + } + + err=handle_request(req, &resp, dir); + + if (v3_user_keyed_stream_push_response(devfd, resp)) { + fprintf(stderr,"could not send response\n"); + free(req); + free(resp); + return -1; + } + + if (err) { + fprintf(stderr, "request handling resulted in an error, continuing\n"); + } + + free(req); + free(resp); + } + } + } + + return 0; +} + + + +int main(int argc, char *argv[]) +{ + int devfd; + char *vm, *url; + char *dir; + + if (argc!=3) { + usage(); + exit(-1); + } + + vm=argv[1]; + url=argv[2]; + + if (strncmp(url,"user:file:",10)) { + fprintf(stderr, "Url %s is not a user:file: url\n",url); + exit(-1); + } + + dir = url+10; + + if (dir_setup(dir)) { + fprintf(stderr,"Unable to open or create directory %s\n",dir); + return -1; + } + + fprintf(stderr,"Attempting to attach to vm %s as url %s\n", vm, url); + + if ((devfd = v3_user_keyed_stream_attach(vm,url))<0) { + perror("failed to attach"); + exit(-1); + } + + fprintf(stderr,"Attached and running\n"); + + run(devfd,dir); + + v3_user_keyed_stream_detach(devfd); + + return 0; + +} + diff --git a/palacios/include/devices/lnx_virtio_pci.h b/palacios/include/devices/lnx_virtio_pci.h index 5379479..e48df2c 100644 --- a/palacios/include/devices/lnx_virtio_pci.h +++ b/palacios/include/devices/lnx_virtio_pci.h @@ -40,11 +40,13 @@ #define VIRTIO_NET_SUBDEVICE_ID 1 #define VIRTIO_BLOCK_SUBDEVICE_ID 2 +#define VIRTIO_CONSOLE_SUBDEVICE_ID 3 #define VIRTIO_BALLOON_SUBDEVICE_ID 5 #define VIRTIO_SYMBIOTIC_SUBDEVICE_ID 10 #define VIRTIO_SYMMOD_SUBDEVICE_ID 11 #define VIRTIO_VNET_SUBDEVICE_ID 12 + #define HOST_FEATURES_PORT 0 #define GUEST_FEATURES_PORT 4 #define VRING_PG_NUM_PORT 8 diff --git a/palacios/include/interfaces/vmm_keyed_stream.h b/palacios/include/interfaces/vmm_keyed_stream.h index e06535d..06fafd5 100644 --- a/palacios/include/interfaces/vmm_keyed_stream.h +++ b/palacios/include/interfaces/vmm_keyed_stream.h @@ -49,6 +49,7 @@ typedef enum {V3_KS_RD_ONLY,V3_KS_WR_ONLY,V3_KS_WR_ONLY_CREATE} v3_keyed_stream_ v3_keyed_stream_t v3_keyed_stream_open(char *url, v3_keyed_stream_open_t open_type); void v3_keyed_stream_close(v3_keyed_stream_t stream); +void v3_keyed_stream_preallocate_hint_key(v3_keyed_stream_t stream, char *key, uint64_t size); v3_keyed_stream_key_t v3_keyed_stream_open_key(v3_keyed_stream_t stream, char *key); void v3_keyed_stream_close_key(v3_keyed_stream_t stream, char *key); sint64_t v3_keyed_stream_write_key(v3_keyed_stream_t stream, @@ -85,6 +86,10 @@ struct v3_keyed_stream_hooks { void (*close)(v3_keyed_stream_t stream); + void (*preallocate_hint_key)(v3_keyed_stream_t stream, + char *key, + uint64_t size); + v3_keyed_stream_key_t (*open_key)(v3_keyed_stream_t stream, char *key); diff --git a/palacios/include/interfaces/vmm_stream.h b/palacios/include/interfaces/vmm_stream.h index 513cf73..e9c49c7 100644 --- a/palacios/include/interfaces/vmm_stream.h +++ b/palacios/include/interfaces/vmm_stream.h @@ -23,24 +23,34 @@ +struct v3_stream { + void * host_stream_data; + void * guest_stream_data; + uint64_t (*input)(struct v3_stream * stream, uint8_t * buf, uint64_t len); +}; + + #ifdef __V3VEE__ #include -typedef void * v3_stream_t; + /* VM Can be NULL */ -v3_stream_t v3_stream_open(struct v3_vm_info * vm, const char * name); -int v3_stream_write(v3_stream_t stream, uint8_t * buf, uint32_t len); +struct v3_stream * v3_stream_open(struct v3_vm_info * vm, const char * name, + uint64_t (*input)(struct v3_stream * stream, uint8_t * buf, uint64_t len), + void * guest_stream_data); + +uint64_t v3_stream_output(struct v3_stream * stream, uint8_t * buf, uint32_t len); -void v3_stream_close(v3_stream_t stream); +void v3_stream_close(struct v3_stream * stream); #endif struct v3_stream_hooks { - void *(*open)(const char * name, void * private_data); - int (*write)(void * stream, char * buf, int len); - void (*close)(void * stream); + void *(*open)(struct v3_stream * stream, const char * name, void * host_vm_data); + uint64_t (*output)(struct v3_stream * stream, char * buf, int len); + void (*close)(struct v3_stream * stream); }; diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h index 4360a64..51ca2de 100644 --- a/palacios/include/palacios/vm_guest.h +++ b/palacios/include/palacios/vm_guest.h @@ -37,7 +37,7 @@ #include #include #include - +#include #ifdef V3_CONFIG_TELEMETRY @@ -170,7 +170,7 @@ struct v3_vm_info { v3_vm_operating_mode_t run_state; - + struct v3_barrier barrier; struct v3_extensions extensions; diff --git a/palacios/include/palacios/vmcs.h b/palacios/include/palacios/vmcs.h index f8028b5..5d50355 100644 --- a/palacios/include/palacios/vmcs.h +++ b/palacios/include/palacios/vmcs.h @@ -36,6 +36,7 @@ /* Control register exit masks */ #define CR4_VMXE 0x00002000 +#define CR4_PAE 0x00000020 diff --git a/palacios/include/palacios/vmm.h b/palacios/include/palacios/vmm.h index 337226f..4c95f2c 100644 --- a/palacios/include/palacios/vmm.h +++ b/palacios/include/palacios/vmm.h @@ -190,7 +190,7 @@ struct guest_info; if ((os_hooks) && (os_hooks)->start_kernel_thread) { \ (os_hooks)->start_kernel_thread(fn, arg, name); \ } \ - } + } while (0) @@ -331,6 +331,10 @@ void Shutdown_V3( void ); struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name); int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask); int v3_stop_vm(struct v3_vm_info * vm); +int v3_pause_vm(struct v3_vm_info * vm); +int v3_continue_vm(struct v3_vm_info * vm); + + int v3_free_vm(struct v3_vm_info * vm); int v3_deliver_irq(struct v3_vm_info * vm, struct v3_interrupt * intr); diff --git a/palacios/include/palacios/vmm_barrier.h b/palacios/include/palacios/vmm_barrier.h index d4b7eda..19f54fc 100644 --- a/palacios/include/palacios/vmm_barrier.h +++ b/palacios/include/palacios/vmm_barrier.h @@ -22,22 +22,30 @@ #ifdef __V3VEE__ +#include +#include + -#include struct v3_barrier { - uint64_t cpus; int active; // If 1, barrier is active, everyone must wait // If 0, barrier is clear, can proceed + struct v3_bitmap cpu_map; + v3_lock_t lock; }; +struct v3_vm_info; +struct guest_info; + +int v3_init_barrier(struct v3_vm_info * vm_info); +int v3_deinit_barrier(struct v3_vm_info * vm_info); -int v3_init_barrier(struct v3_barrier * barrier); +int v3_raise_barrier(struct v3_vm_info * vm_info, struct guest_info * local_core); +int v3_lower_barrier(struct v3_vm_info * vm_info); -int v3_activate_barrier(struct guest_info * core); -int v3_check_barrier(struct guest_info * core); +int v3_wait_at_barrier(struct guest_info * core); #endif diff --git a/palacios/include/palacios/vmm_bitmap.h b/palacios/include/palacios/vmm_bitmap.h new file mode 100644 index 0000000..4842585 --- /dev/null +++ b/palacios/include/palacios/vmm_bitmap.h @@ -0,0 +1,48 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2011, Jack Lange + * Copyright (c) 2011, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + + + +#ifndef __VMM_BITMAP_H__ +#define __VMM_BITMAP_H__ + +#ifdef __V3VEE__ +#include + + + +struct v3_bitmap { + int num_bits; // number of valid bit positions in the bitmap + uint8_t * bits; // actual bitmap. Dynamically allocated... ugly +}; + + +int v3_bitmap_init(struct v3_bitmap * bitmap, int num_bits); +void v3_bitmap_deinit(struct v3_bitmap * bitmap); +int v3_bitmap_reset(struct v3_bitmap * bitmap); + +int v3_bitmap_set(struct v3_bitmap * bitmap, int index); +int v3_bitmap_clear(struct v3_bitmap * bitmap, int index); +int v3_bitmap_check(struct v3_bitmap * bitmap, int index); + + + +#endif + +#endif diff --git a/palacios/include/palacios/vmm_dev_mgr.h b/palacios/include/palacios/vmm_dev_mgr.h index eeb0e56..1f10874 100644 --- a/palacios/include/palacios/vmm_dev_mgr.h +++ b/palacios/include/palacios/vmm_dev_mgr.h @@ -210,11 +210,11 @@ struct v3_dev_console_ops { struct v3_dev_char_ops { /* Backend implemented functions */ - int (*write)(uint8_t * buf, uint64_t len, void * private_data); + uint64_t (*output)(uint8_t * buf, uint64_t len, void * private_data); // int (*read)(uint8_t * buf, uint64_t len, void * private_data); /* Frontend Implemented functions */ - int (*push)(struct v3_vm_info * vm, uint8_t * buf, uint64_t len, void * private_data); + uint64_t (*input)(struct v3_vm_info * vm, uint8_t * buf, uint64_t len, void * private_data); }; diff --git a/palacios/include/palacios/vmm_types.h b/palacios/include/palacios/vmm_types.h index 1a17997..8d45bcd 100644 --- a/palacios/include/palacios/vmm_types.h +++ b/palacios/include/palacios/vmm_types.h @@ -27,7 +27,7 @@ typedef enum {SHADOW_PAGING, NESTED_PAGING} v3_paging_mode_t; -typedef enum {VM_RUNNING, VM_STOPPED, VM_SUSPENDED, VM_ERROR, VM_EMULATING} v3_vm_operating_mode_t; +typedef enum {VM_RUNNING, VM_STOPPED, VM_PAUSED, VM_ERROR} v3_vm_operating_mode_t; typedef enum {CORE_RUNNING, CORE_STOPPED} v3_core_operating_mode_t; typedef enum {REAL, /*UNREAL,*/ PROTECTED, PROTECTED_PAE, LONG, LONG_32_COMPAT, LONG_16_COMPAT} v3_cpu_mode_t; diff --git a/palacios/include/palacios/vmx_ctrl_regs.h b/palacios/include/palacios/vmx_ctrl_regs.h index 4a8f0d8..789ff4e 100644 --- a/palacios/include/palacios/vmx_ctrl_regs.h +++ b/palacios/include/palacios/vmx_ctrl_regs.h @@ -31,7 +31,8 @@ int v3_vmx_handle_cr0_access(struct guest_info * info, struct vmx_exit_info * exit_info); int v3_vmx_handle_cr3_access(struct guest_info * info, struct vmx_exit_cr_qual * cr_qual); - +int v3_vmx_handle_cr4_access(struct guest_info * info, + struct vmx_exit_cr_qual * cr_qual); #endif diff --git a/palacios/src/devices/Kconfig b/palacios/src/devices/Kconfig index 575223b..23a88ff 100644 --- a/palacios/src/devices/Kconfig +++ b/palacios/src/devices/Kconfig @@ -153,6 +153,14 @@ config DEBUG_VIRTIO_SYM help Enable debugging for the Linux Virtio Symbiotic Device +config LINUX_VIRTIO_CONSOLE + bool "Enable Virtio Console Device" + default n + depends on PCI + help + Enable the Virtio Console + + config LINUX_VIRTIO_NET bool "Enable Virtio Network Device" default n @@ -451,6 +459,13 @@ config SERIAL_UART help Include virtual serial port +config DEBUG_SERIAL + bool "Debug Serial Port" + default n + depends on SERIAL_UART + help + Debugging virtual serial port + config CHAR_STREAM bool "Stream based character frontend" depends on STREAM diff --git a/palacios/src/devices/Makefile b/palacios/src/devices/Makefile index 0ab7f7f..1154059 100644 --- a/palacios/src/devices/Makefile +++ b/palacios/src/devices/Makefile @@ -15,6 +15,7 @@ obj-$(V3_CONFIG_LINUX_VIRTIO_BLOCK) += lnx_virtio_blk.o obj-$(V3_CONFIG_LINUX_VIRTIO_SYM) += lnx_virtio_sym.o obj-$(V3_CONFIG_LINUX_VIRTIO_NET) += lnx_virtio_nic.o obj-$(V3_CONFIG_LINUX_VIRTIO_VNET) += lnx_virtio_vnet.o +obj-$(V3_CONFIG_LINUX_VIRTIO_CONSOLE) += lnx_virtio_console.o obj-$(V3_CONFIG_VNET_NIC) += vnet_nic.o obj-$(V3_CONFIG_NVRAM) += nvram.o obj-$(V3_CONFIG_OS_DEBUG) += os_debug.o diff --git a/palacios/src/devices/cga.c b/palacios/src/devices/cga.c index e9556c8..7997818 100644 --- a/palacios/src/devices/cga.c +++ b/palacios/src/devices/cga.c @@ -1153,12 +1153,14 @@ static int cga_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { PrintDebug("Enabling CGA Passthrough\n"); if (v3_hook_write_mem(vm, V3_MEM_CORE_ANY, START_ADDR, END_ADDR, START_ADDR, &video_write_mem, dev) == -1) { - PrintDebug("\n\nVideo Hook failed.\n\n"); + PrintError("\n\nVideo Hook failed.\n\n"); + return -1; } } else { if (v3_hook_write_mem(vm, V3_MEM_CORE_ANY, START_ADDR, END_ADDR, video_state->framebuf_pa, &video_write_mem, dev) == -1) { - PrintDebug("\n\nVideo Hook failed.\n\n"); + PrintError("\n\nVideo Hook failed.\n\n"); + return -1; } } diff --git a/palacios/src/devices/char_stream.c b/palacios/src/devices/char_stream.c index f0c762d..2bde181 100644 --- a/palacios/src/devices/char_stream.c +++ b/palacios/src/devices/char_stream.c @@ -27,30 +27,27 @@ struct stream_state { - v3_stream_t stream; + struct v3_stream * stream; struct v3_dev_char_ops char_ops; + struct v3_vm_info * vm; + void * push_fn_arg; }; -static int serial_event_handler(struct v3_vm_info * vm, - struct v3_serial_event * evt, - void * private_data) { - struct stream_state * state = (struct stream_state *)private_data; +static uint64_t stream_input(struct v3_stream * stream, uint8_t * buf, uint64_t len) { + struct stream_state * state = stream->guest_stream_data; - if (state->char_ops.push != NULL){ - state->char_ops.push(vm, evt->data, evt->len, state->push_fn_arg); - } + return state->char_ops.input(state->vm, buf, len, state->push_fn_arg); - return 0; } -static int stream_write(uint8_t * buf, uint64_t length, void * private_data) { +static uint64_t stream_output(uint8_t * buf, uint64_t length, void * private_data) { struct stream_state * state = (struct stream_state *)private_data; - - return v3_stream_write(state->stream, buf, length); + + return v3_stream_output(state->stream, buf, length); } static int stream_free(struct stream_state * state) { @@ -93,8 +90,7 @@ static int stream_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { } - - state->stream = v3_stream_open(vm, stream_name); + state->stream = v3_stream_open(vm, stream_name, stream_input, state); if (state->stream == NULL) { PrintError("Could not open stream %s\n", stream_name); @@ -102,7 +98,8 @@ static int stream_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { return -1; } - state->char_ops.write = stream_write; + state->vm = vm; + state->char_ops.output = stream_output; if (v3_dev_connect_char(vm, v3_cfg_val(frontend_cfg, "tag"), &(state->char_ops), frontend_cfg, @@ -113,7 +110,7 @@ static int stream_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { return -1; } - v3_hook_host_event(vm, HOST_SERIAL_EVT, V3_HOST_EVENT_HANDLER(serial_event_handler), state); + return 0; } diff --git a/palacios/src/devices/lnx_virtio_console.c b/palacios/src/devices/lnx_virtio_console.c new file mode 100644 index 0000000..f7f2a11 --- /dev/null +++ b/palacios/src/devices/lnx_virtio_console.c @@ -0,0 +1,532 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#include +#include +#include +#include + +#include + + + +struct console_config { + uint16_t cols; + uint16_t rows; +} __attribute__((packed)); + + + + +#define QUEUE_SIZE 128 + +/* Host Feature flags */ +#define VIRTIO_CONSOLE_F_SIZE 0x1 + +struct virtio_console_state { + struct console_config cons_cfg; + struct virtio_config virtio_cfg; + + struct vm_device * pci_bus; + struct pci_device * pci_dev; + + struct virtio_queue queue[2]; + + struct v3_stream * stream; + + + struct virtio_queue * cur_queue; + + struct v3_vm_info * vm; + + int io_range_size; + + void * backend_data; + struct v3_dev_char_ops * ops; +}; + + +struct virtio_console_state * cons_state = NULL; + +static int virtio_reset(struct virtio_console_state * virtio) { + + memset(virtio->queue, 0, sizeof(struct virtio_queue) * 2); + + virtio->cur_queue = &(virtio->queue[0]); + + virtio->virtio_cfg.status = 0; + virtio->virtio_cfg.pci_isr = 0; + + /* Console configuration */ + // virtio->virtio_cfg.host_features = VIRTIO_NOTIFY_HOST; + + // Virtio Console uses two queues + virtio->queue[0].queue_size = QUEUE_SIZE; + virtio->queue[1].queue_size = QUEUE_SIZE; + + + memset(&(virtio->cons_cfg), 0, sizeof(struct console_config)); + + return 0; +} + +static int get_desc_count(struct virtio_queue * q, int index) { + struct vring_desc * tmp_desc = &(q->desc[index]); + int cnt = 1; + + while (tmp_desc->flags & VIRTIO_NEXT_FLAG) { + tmp_desc = &(q->desc[tmp_desc->next]); + cnt++; + } + + return cnt; +} + + +static int handle_kick(struct guest_info * core, struct virtio_console_state * virtio) { + struct virtio_queue * q = virtio->cur_queue; + + PrintDebug("VIRTIO CONSOLE KICK: cur_index=%d (mod=%d), avail_index=%d\n", + q->cur_avail_idx, q->cur_avail_idx % QUEUE_SIZE, q->avail->index); + + while (q->cur_avail_idx < q->avail->index) { + struct vring_desc * tmp_desc = NULL; + uint16_t desc_idx = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE]; + int desc_cnt = get_desc_count(q, desc_idx); + int i = 0; + uint32_t req_len = 0; + + + PrintDebug("Descriptor Count=%d, index=%d\n", desc_cnt, q->cur_avail_idx % QUEUE_SIZE); + + for (i = 0; i < desc_cnt; i++) { + addr_t page_addr; + tmp_desc = &(q->desc[desc_idx]); + + + PrintDebug("Console output (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", + tmp_desc, + (void *)(addr_t)(tmp_desc->addr_gpa), tmp_desc->length, + tmp_desc->flags, tmp_desc->next); + + if (v3_gpa_to_hva(core, tmp_desc->addr_gpa, (addr_t *)&(page_addr)) == -1) { + PrintError("Could not translate block header address\n"); + return -1; + } + + virtio->ops->output((uint8_t *)page_addr, tmp_desc->length, virtio->backend_data); + + PrintDebug("Guest Console Currently Ignored\n"); + + req_len += tmp_desc->length; + desc_idx = tmp_desc->next; + } + + q->used->ring[q->used->index % QUEUE_SIZE].id = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE]; + q->used->ring[q->used->index % QUEUE_SIZE].length = req_len; // What do we set this to???? + + q->used->index++; + q->cur_avail_idx++; + } + + if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) { + PrintDebug("Raising IRQ %d\n", virtio->pci_dev->config_header.intr_line); + v3_pci_raise_irq(virtio->pci_bus, 0, virtio->pci_dev); + virtio->virtio_cfg.pci_isr = VIRTIO_ISR_ACTIVE; + } + + return 0; +} + + +static uint64_t virtio_input(struct v3_vm_info * vm, uint8_t * buf, uint64_t len, void * private_data) { + struct virtio_console_state * cons_state = private_data; + struct virtio_queue * q = &(cons_state->queue[0]); + int xfer_len = 0; + + PrintDebug("VIRTIO CONSOLE Handle Input: cur_index=%d (mod=%d), avail_index=%d\n", + q->cur_avail_idx, q->cur_avail_idx % QUEUE_SIZE, q->avail->index); + + + if (q->cur_avail_idx != q->avail->index) { + uint16_t input_idx = q->avail->ring[q->cur_avail_idx % q->queue_size]; + struct vring_desc * input_desc = NULL; + uint8_t * input_buf = NULL; + + + input_desc = &(q->desc[input_idx]); + + if (v3_gpa_to_hva(&(cons_state->vm->cores[0]), input_desc->addr_gpa, (addr_t *)&(input_buf)) == -1) { + PrintError("Could not translate receive buffer address\n"); + return 0; + } + + memset(input_buf, 0, input_desc->length); + + xfer_len = (input_desc->length > len) ? len : input_desc->length; + + memcpy(input_buf, buf, xfer_len); + + + q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size]; + q->used->ring[q->used->index % q->queue_size].length = xfer_len; + + q->used->index++; + q->cur_avail_idx++; + } + + + // say hello + if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) { + v3_pci_raise_irq(cons_state->pci_bus, 0, cons_state->pci_dev); + cons_state->virtio_cfg.pci_isr = 0x1; + } + + return xfer_len; +} + + +static int virtio_io_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * private_data) { + struct virtio_console_state * virtio = (struct virtio_console_state *)private_data; + int port_idx = port % virtio->io_range_size; + + + PrintDebug("VIRTIO CONSOLE Write for port %d (index=%d) len=%d, value=%x\n", + port, port_idx, length, *(uint32_t *)src); + + + + switch (port_idx) { + case GUEST_FEATURES_PORT: + if (length != 4) { + PrintError("Illegal write length for guest features\n"); + return -1; + } + + virtio->virtio_cfg.guest_features = *(uint32_t *)src; + + break; + case VRING_PG_NUM_PORT: + if (length == 4) { + addr_t pfn = *(uint32_t *)src; + addr_t page_addr = (pfn << VIRTIO_PAGE_SHIFT); + + + virtio->cur_queue->pfn = pfn; + + virtio->cur_queue->ring_desc_addr = page_addr ; + virtio->cur_queue->ring_avail_addr = page_addr + (QUEUE_SIZE * sizeof(struct vring_desc)); + virtio->cur_queue->ring_used_addr = ( virtio->cur_queue->ring_avail_addr + \ + sizeof(struct vring_avail) + \ + (QUEUE_SIZE * sizeof(uint16_t))); + + // round up to next page boundary. + virtio->cur_queue->ring_used_addr = (virtio->cur_queue->ring_used_addr + 0xfff) & ~0xfff; + + if (v3_gpa_to_hva(core, virtio->cur_queue->ring_desc_addr, (addr_t *)&(virtio->cur_queue->desc)) == -1) { + PrintError("Could not translate ring descriptor address\n"); + return -1; + } + + + if (v3_gpa_to_hva(core, virtio->cur_queue->ring_avail_addr, (addr_t *)&(virtio->cur_queue->avail)) == -1) { + PrintError("Could not translate ring available address\n"); + return -1; + } + + + if (v3_gpa_to_hva(core, virtio->cur_queue->ring_used_addr, (addr_t *)&(virtio->cur_queue->used)) == -1) { + PrintError("Could not translate ring used address\n"); + return -1; + } + + PrintDebug("RingDesc_addr=%p, Avail_addr=%p, Used_addr=%p\n", + (void *)(virtio->cur_queue->ring_desc_addr), + (void *)(virtio->cur_queue->ring_avail_addr), + (void *)(virtio->cur_queue->ring_used_addr)); + + PrintDebug("RingDesc=%p, Avail=%p, Used=%p\n", + virtio->cur_queue->desc, virtio->cur_queue->avail, virtio->cur_queue->used); + + } else { + PrintError("Illegal write length for page frame number\n"); + return -1; + } + break; + case VRING_Q_SEL_PORT: + virtio->virtio_cfg.vring_queue_selector = *(uint16_t *)src; + + if (virtio->virtio_cfg.vring_queue_selector > 1) { + PrintError("Virtio Console device only uses 2 queue, selected %d\n", + virtio->virtio_cfg.vring_queue_selector); + return -1; + } + + virtio->cur_queue = &(virtio->queue[virtio->virtio_cfg.vring_queue_selector]); + + break; + case VRING_Q_NOTIFY_PORT: + PrintDebug("Handling Kick\n"); + if (handle_kick(core, virtio) == -1) { + PrintError("Could not handle Console Notification\n"); + return -1; + } + break; + case VIRTIO_STATUS_PORT: + virtio->virtio_cfg.status = *(uint8_t *)src; + + if (virtio->virtio_cfg.status == 0) { + PrintDebug("Resetting device\n"); + virtio_reset(virtio); + } + + break; + + case VIRTIO_ISR_PORT: + virtio->virtio_cfg.pci_isr = *(uint8_t *)src; + break; + default: + return -1; + break; + } + + return length; +} + + +static int virtio_io_read(struct guest_info * core, uint16_t port, void * dst, uint_t length, void * private_data) { + struct virtio_console_state * virtio = (struct virtio_console_state *)private_data; + int port_idx = port % virtio->io_range_size; + + + PrintDebug("VIRTIO CONSOLE Read for port %d (index =%d), length=%d\n", + port, port_idx, length); + + switch (port_idx) { + case HOST_FEATURES_PORT: + if (length != 4) { + PrintError("Illegal read length for host features\n"); + return -1; + } + + *(uint32_t *)dst = virtio->virtio_cfg.host_features; + + break; + case VRING_PG_NUM_PORT: + if (length != 4) { + PrintError("Illegal read length for page frame number\n"); + return -1; + } + + *(uint32_t *)dst = virtio->cur_queue->pfn; + + break; + case VRING_SIZE_PORT: + if (length != 2) { + PrintError("Illegal read length for vring size\n"); + return -1; + } + + *(uint16_t *)dst = virtio->cur_queue->queue_size; + + break; + + case VIRTIO_STATUS_PORT: + if (length != 1) { + PrintError("Illegal read length for status\n"); + return -1; + } + + *(uint8_t *)dst = virtio->virtio_cfg.status; + break; + + case VIRTIO_ISR_PORT: + *(uint8_t *)dst = virtio->virtio_cfg.pci_isr; + virtio->virtio_cfg.pci_isr = 0; + v3_pci_lower_irq(virtio->pci_bus, 0, virtio->pci_dev); + break; + + default: + if ( (port_idx >= sizeof(struct virtio_config)) && + (port_idx < (sizeof(struct virtio_config) + sizeof(struct console_config))) ) { + int cfg_offset = port_idx - sizeof(struct virtio_config); + uint8_t * cfg_ptr = (uint8_t *)&(virtio->cons_cfg); + + memcpy(dst, cfg_ptr + cfg_offset, length); + + } else { + PrintError("Read of Unhandled Virtio Read\n"); + return -1; + } + + break; + } + + return length; +} + + + + +static int connect_fn(struct v3_vm_info * vm, + void * frontend_data, + struct v3_dev_char_ops * ops, + v3_cfg_tree_t * cfg, + void * private_data, + void ** push_fn_arg) { + + struct virtio_console_state * state = (struct virtio_console_state *)frontend_data; + + state->ops = ops; + state->backend_data = private_data; + + state->ops->input = virtio_input; + *push_fn_arg = state; + + return 0; +} + +static int virtio_free(struct virtio_console_state * virtio) { + + // unregister from PCI + + V3_Free(virtio); + return 0; +} + + +static struct v3_device_ops dev_ops = { + .free = (int (*)(void *))virtio_free, + +}; + + + + +static int virtio_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { + struct vm_device * pci_bus = v3_find_dev(vm, v3_cfg_val(cfg, "bus")); + struct virtio_console_state * virtio_state = NULL; + struct pci_device * pci_dev = NULL; + char * dev_id = v3_cfg_val(cfg, "ID"); + + PrintDebug("Initializing VIRTIO Console device\n"); + + if (pci_bus == NULL) { + PrintError("VirtIO devices require a PCI Bus"); + return -1; + } + + + virtio_state = (struct virtio_console_state *)V3_Malloc(sizeof(struct virtio_console_state)); + memset(virtio_state, 0, sizeof(struct virtio_console_state)); + + + cons_state = virtio_state; + cons_state->vm = vm; + + + + struct vm_device * dev = v3_add_device(vm, dev_id, &dev_ops, virtio_state); + + if (dev == NULL) { + PrintError("Could not attach device %s\n", dev_id); + V3_Free(virtio_state); + return -1; + } + + // PCI initialization + { + struct v3_pci_bar bars[6]; + int num_ports = sizeof(struct virtio_config) + sizeof(struct console_config); + int tmp_ports = num_ports; + int i; + + + // This gets the number of ports, rounded up to a power of 2 + virtio_state->io_range_size = 1; // must be a power of 2 + + while (tmp_ports > 0) { + tmp_ports >>= 1; + virtio_state->io_range_size <<= 1; + } + + // this is to account for any low order bits being set in num_ports + // if there are none, then num_ports was already a power of 2 so we shift right to reset it + if ((num_ports & ((virtio_state->io_range_size >> 1) - 1)) == 0) { + virtio_state->io_range_size >>= 1; + } + + + for (i = 0; i < 6; i++) { + bars[i].type = PCI_BAR_NONE; + } + + bars[0].type = PCI_BAR_IO; + bars[0].default_base_port = -1; + bars[0].num_ports = virtio_state->io_range_size; + + bars[0].io_read = virtio_io_read; + bars[0].io_write = virtio_io_write; + bars[0].private_data = virtio_state; + + + pci_dev = v3_pci_register_device(pci_bus, PCI_STD_DEVICE, + 0, PCI_AUTO_DEV_NUM, 0, + "LNX_VIRTIO_CONSOLE", bars, + NULL, NULL, NULL, virtio_state); + + if (!pci_dev) { + PrintError("Could not register PCI Device\n"); + v3_remove_device(dev); + return -1; + } + + pci_dev->config_header.vendor_id = VIRTIO_VENDOR_ID; + pci_dev->config_header.subsystem_vendor_id = VIRTIO_SUBVENDOR_ID; + + + pci_dev->config_header.device_id = VIRTIO_CONSOLE_DEV_ID; + pci_dev->config_header.class = PCI_CLASS_DISPLAY; + pci_dev->config_header.subclass = PCI_DISPLAY_SUBCLASS_OTHER; + + pci_dev->config_header.subsystem_id = VIRTIO_CONSOLE_SUBDEVICE_ID; + + pci_dev->config_header.intr_pin = 1; + + pci_dev->config_header.max_latency = 1; // ?? (qemu does it...) + + + virtio_state->pci_dev = pci_dev; + virtio_state->pci_bus = pci_bus; + } + + virtio_reset(virtio_state); + + if (v3_dev_add_char_frontend(vm, dev_id, connect_fn, (void *)virtio_state) == -1) { + PrintError("Could not register %s as frontend\n", dev_id); + v3_remove_device(dev); + return -1; + } + + + return 0; +} + + +device_register("LNX_VIRTIO_CONSOLE", virtio_init) diff --git a/palacios/src/devices/serial.c b/palacios/src/devices/serial.c index f0d405d..52c3202 100644 --- a/palacios/src/devices/serial.c +++ b/palacios/src/devices/serial.c @@ -393,6 +393,8 @@ static int queue_data(struct v3_vm_info * vm, struct serial_port * com, com->lsr.oe = 1; //overrun error bit set } + updateIRQ(vm, com); + return 0; } @@ -411,9 +413,7 @@ static int queue_data(struct v3_vm_info * vm, struct serial_port * com, com->lsr.thre = 0; //reset thre and temt bits. com->lsr.temt = 0; } - - updateIRQ(vm, com); - + return 0; } @@ -486,9 +486,10 @@ static int write_data_port(struct guest_info * core, uint16_t port, /* JRL: Some buffering would probably be a good idea here.... */ if (com_port->ops) { - com_port->ops->write(val, 1, com_port->backend_data); + com_port->ops->output(val, 1, com_port->backend_data); } else { queue_data(core->vm_info, com_port, &(com_port->tx_buffer), *val); + updateIRQ(core->vm_info, com_port); } } @@ -896,7 +897,7 @@ static int init_serial_port(struct serial_port * com) { return 0; } -static int serial_input(struct v3_vm_info * vm, uint8_t * buf, uint64_t len, void * priv_data){ +static uint64_t serial_input(struct v3_vm_info * vm, uint8_t * buf, uint64_t len, void * priv_data){ struct serial_port * com_port = (struct serial_port *)priv_data; int i; @@ -904,6 +905,8 @@ static int serial_input(struct v3_vm_info * vm, uint8_t * buf, uint64_t len, voi queue_data(vm, com_port, &(com_port->rx_buffer), buf[i]); } + updateIRQ(vm, com_port); + return len; } @@ -937,7 +940,7 @@ static int connect_fn(struct v3_vm_info * vm, com->ops = ops; com->backend_data = private_data; - com->ops->push = serial_input; + com->ops->input = serial_input; *push_fn_arg = com; return 0; diff --git a/palacios/src/extensions/ext_mtrr.c b/palacios/src/extensions/ext_mtrr.c new file mode 100644 index 0000000..7ff0dc0 --- /dev/null +++ b/palacios/src/extensions/ext_mtrr.c @@ -0,0 +1,625 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#include +#include +#include + + + +#define MTRR_CAP 0xfe + +#define MTRR_PHYS_BASE_0 0x200 +#define MTRR_PHYS_MASK_0 0x201 +#define MTRR_PHYS_BASE_1 0x202 +#define MTRR_PHYS_MASK_1 0x203 +#define MTRR_PHYS_BASE_2 0x204 +#define MTRR_PHYS_MASK_2 0x205 +#define MTRR_PHYS_BASE_3 0x206 +#define MTRR_PHYS_MASK_3 0x207 +#define MTRR_PHYS_BASE_4 0x208 +#define MTRR_PHYS_MASK_4 0x209 +#define MTRR_PHYS_BASE_5 0x20a +#define MTRR_PHYS_MASK_5 0x20b +#define MTRR_PHYS_BASE_6 0x20c +#define MTRR_PHYS_MASK_6 0x20d +#define MTRR_PHYS_BASE_7 0x20e +#define MTRR_PHYS_MASK_7 0x20f + +#define MTRR_FIX_64K_00000 0x250 +#define MTRR_FIX_16K_80000 0x258 +#define MTRR_FIX_16K_A0000 0x259 +#define MTRR_FIX_4K_C0000 0x268 +#define MTRR_FIX_4K_C8000 0x269 +#define MTRR_FIX_4K_D0000 0x26a +#define MTRR_FIX_4K_D8000 0x26b +#define MTRR_FIX_4K_E0000 0x26c +#define MTRR_FIX_4K_E8000 0x26d +#define MTRR_FIX_4K_F0000 0x26e +#define MTRR_FIX_4K_F8000 0x26f + +#define PAT 0x277 + +#define MTRR_DEF_TYPE 0x2ff + + + + +struct ia32_pat { + union { + uint64_t value; + + struct { + uint64_t pa_0 : 3; + uint64_t rsvd0 : 5; + uint64_t pa_1 : 3; + uint64_t rsvd1 : 5; + uint64_t pa_2 : 3; + uint64_t rsvd2 : 5; + uint64_t pa_3 : 3; + uint64_t rsvd3 : 5; + uint64_t pa_4 : 3; + uint64_t rsvd4 : 5; + uint64_t pa_5 : 3; + uint64_t rsvd5 : 5; + uint64_t pa_6 : 3; + uint64_t rsvd6 : 5; + uint64_t pa_7 : 3; + uint64_t rsvd7 : 5; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + + +struct mtrr_cap { + union { + uint64_t value; + + struct { + uint64_t var_reg_cnt : 8; + uint64_t fix : 1; + uint64_t rsvd0 : 1; + uint64_t wr_combine : 1; + uint64_t rsvd1 : 53; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + +struct mtrr_def_type { + union { + uint64_t value; + + struct { + uint64_t def_type : 8; + uint64_t rsvd0 : 2; + uint64_t fixed_enable : 1; + uint64_t mtrr_emable : 1; + uint64_t rsvd1 : 52; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + + +struct mtrr_phys_base { + union { + uint64_t value; + + struct { + uint64_t type : 8; + uint64_t rsvd0 : 4; + uint64_t base : 40; + uint64_t rsvd1 : 12; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + + +struct mtrr_phys_mask { + union { + uint64_t value; + + struct { + uint64_t rsvd0 : 11; + uint64_t valid : 1; + uint64_t mask : 40; + uint64_t rsvd1 : 12; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + +struct mtrr_fixed { + union { + uint64_t value; + uint8_t types[8]; + } __attribute__((packed)); +} __attribute__((packed)); + + + +/* AMD Specific Registers */ +#define SYSCONFIG 0xc0010010 +#define TOP_MEM 0xc001001a +#define TOP_MEM2 0xc001001d + +#define IORR_BASE0 0xc0010016 +#define IORR_MASK0 0xc0010017 +#define IORR_BASE1 0xc0010018 +#define IORR_MASK1 0xc0010019 + +struct syscfg_reg { + union { + uint64_t value; + + struct { + uint64_t rsvd0 : 18; + uint64_t mfde : 1; // 1 = enables RdMem and WrMem bits in fixed-range MTRRs + uint64_t mfdm : 1; // 1 = software can modify RdMem and WrMem bits + uint64_t mvdm : 1; // 1 = enables TOP_MEM reg and var range MTRRs + uint64_t tom2 : 1; // 1 = enables TOP_MEM2 reg + uint64_t tom2_force_wb : 1; // 1 = enables default mem type for 4GB-TOP_MEM2 range + uint64_t rsvd1 : 41; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + +struct top_of_mem_reg { + union { + uint64_t value; + + struct { + uint64_t rsvd0 : 23; + uint64_t phys_addr : 29; + uint64_t rsvd1 : 12; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + + +struct iorr_base { + union { + uint64_t value; + + struct { + uint64_t rsvd0 : 3; + uint64_t wrmem : 1; // 1 = writes go to memory, 0 = writes go to mmap IO + uint64_t rdmem : 1; // 1 = reads go to memory, 0 = reads go to mmap IO + uint64_t rsvd1 : 7; + uint64_t base : 40; + uint64_t rsvd2 : 12; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + +struct iorr_mask { + union { + uint64_t value; + + struct { + uint64_t rsvd0 : 11; + uint64_t valid : 1; + uint64_t mask : 40; + uint64_t rsvd1 : 12; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + +/* Intel Specific Registers */ +#define SMRR_PHYS_BASE 0x1f2 +#define SMRR_PHYS_MASK 0x1f3 + +struct smrr_phys_base { + union { + uint64_t value; + + struct { + uint64_t type : 8; + uint64_t rsvd0 : 4; + uint64_t base : 20; + uint64_t rsvd1 : 32; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + +struct smrr_phys_mask { + union { + uint64_t value; + + struct { + uint64_t rsvd0 : 11; + uint64_t valid : 1; + uint64_t mask : 20; + uint64_t rsvd1 : 32; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + + + +struct mtrr_state { + struct ia32_pat pat; + struct mtrr_cap cap; + struct mtrr_def_type def_type; + struct mtrr_phys_base bases[8]; + struct mtrr_phys_mask masks[8]; + + struct mtrr_fixed fixed_64k; + struct mtrr_fixed fixed_16k[2]; + struct mtrr_fixed fixed_4k[8]; + + /* AMD specific registers */ + struct syscfg_reg amd_syscfg; + struct top_of_mem_reg amd_tom; + struct top_of_mem_reg amd_tom2; + + struct iorr_base iorr_bases[2]; + struct iorr_mask iorr_masks[2]; + + /* Intel Specific registers */ + struct smrr_phys_base intel_smrr_base; + struct smrr_phys_mask intel_smrr_mask; + +}; + +static void init_state(struct mtrr_state * state) { + state->pat.value = 0x0007040600070406LL; + state->cap.value = 0x0000000000000508LL; + + state->amd_syscfg.value = 0x0000000000020601LL; + state->amd_tom.value = 0x0000000004000000LL; + + return; +} + +static int mtrr_cap_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + dst->value = state->cap.value; + return 0; +} + +static int mtrr_cap_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + state->cap.value = src.value; + return 0; +} + +static int pat_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + dst->value = state->pat.value; + return 0; +} + +static int pat_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + state->pat.value = src.value; + return 0; +} + +static int def_type_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + dst->value = state->def_type.value; + return 0; +} + +static int def_type_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + state->def_type.value = src.value; + return 0; +} + + +static int mtrr_phys_base_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + int base_index = (msr - MTRR_PHYS_BASE_0) / 2; + dst->value = state->bases[base_index].value; + return 0; +} + +static int mtrr_phys_base_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + int base_index = (msr - MTRR_PHYS_BASE_0) / 2; + state->bases[base_index].value = src.value; + return 0; +} + +static int mtrr_phys_mask_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + int mask_index = (msr - MTRR_PHYS_MASK_0) / 2; + dst->value = state->masks[mask_index].value; + return 0; +} + +static int mtrr_phys_mask_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + int mask_index = (msr - MTRR_PHYS_MASK_0) / 2; + state->masks[mask_index].value = src.value; + return 0; +} + +static int mtrr_fix_64k_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + dst->value = state->fixed_64k.value; + return 0; +} + +static int mtrr_fix_64k_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + state->fixed_64k.value = src.value; + return 0; +} + +static int mtrr_fix_16k_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + int index = msr - MTRR_FIX_16K_80000; + dst->value = state->fixed_16k[index].value; + return 0; +} + +static int mtrr_fix_16k_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + int index = msr - MTRR_FIX_16K_80000; + state->fixed_16k[index].value = src.value; + return 0; +} + +static int mtrr_fix_4k_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + int index = msr - MTRR_FIX_4K_C0000; + dst->value = state->fixed_4k[index].value; + return 0; +} + +static int mtrr_fix_4k_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + int index = msr - MTRR_FIX_4K_C0000; + state->fixed_4k[index].value = src.value; + return 0; +} + +/* AMD specific registers */ +static int amd_syscfg_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + dst->value = state->amd_syscfg.value; + return 0; +} + +static int amd_syscfg_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + state->amd_syscfg.value = src.value; + return 0; +} + +static int amd_top_mem_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + + if (msr == TOP_MEM) { + dst->value = state->amd_tom.value; + } else if (msr == TOP_MEM2) { + dst->value = state->amd_tom2.value; + } + + return 0; +} + +static int amd_top_mem_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + + if (msr == TOP_MEM) { + state->amd_tom.value = src.value; + } else if (msr == TOP_MEM2) { + state->amd_tom2.value = src.value; + } + + return 0; +} + + +static int amd_iorr_base_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + int base_index = (msr - IORR_BASE0) / 2; + dst->value = state->iorr_bases[base_index].value; + return 0; +} + +static int amd_iorr_base_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + int base_index = (msr - IORR_BASE0) / 2; + state->iorr_bases[base_index].value = src.value; + return 0; +} + +static int amd_iorr_mask_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + int mask_index = (msr - IORR_MASK0) / 2; + dst->value = state->iorr_masks[mask_index].value; + return 0; +} + +static int amd_iorr_mask_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + int mask_index = (msr - IORR_MASK0) / 2; + state->iorr_masks[mask_index].value = src.value; + return 0; +} + + +static int intel_smrr_base_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + dst->value = state->intel_smrr_base.value; + return 0; +} + +static int intel_smrr_base_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + state->intel_smrr_base.value = src.value; + return 0; +} + +static int intel_smrr_mask_read(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + dst->value = state->intel_smrr_mask.value; + return 0; +} + +static int intel_smrr_mask_write(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + state->intel_smrr_mask.value = src.value; + return 0; +} + + +static int deinit_mtrrs(struct v3_vm_info * vm, void * priv_data) { + struct mtrr_state * state = (struct mtrr_state *)priv_data; + + v3_unhook_msr(vm, MTRR_CAP); + v3_unhook_msr(vm, PAT); + v3_unhook_msr(vm, MTRR_DEF_TYPE); + + v3_unhook_msr(vm, MTRR_PHYS_BASE_0); + v3_unhook_msr(vm, MTRR_PHYS_BASE_1); + v3_unhook_msr(vm, MTRR_PHYS_BASE_2); + v3_unhook_msr(vm, MTRR_PHYS_BASE_3); + v3_unhook_msr(vm, MTRR_PHYS_BASE_4); + v3_unhook_msr(vm, MTRR_PHYS_BASE_5); + v3_unhook_msr(vm, MTRR_PHYS_BASE_6); + v3_unhook_msr(vm, MTRR_PHYS_BASE_7); + v3_unhook_msr(vm, MTRR_PHYS_MASK_0); + v3_unhook_msr(vm, MTRR_PHYS_MASK_1); + v3_unhook_msr(vm, MTRR_PHYS_MASK_2); + v3_unhook_msr(vm, MTRR_PHYS_MASK_3); + v3_unhook_msr(vm, MTRR_PHYS_MASK_4); + v3_unhook_msr(vm, MTRR_PHYS_MASK_5); + v3_unhook_msr(vm, MTRR_PHYS_MASK_6); + v3_unhook_msr(vm, MTRR_PHYS_MASK_7); + + v3_unhook_msr(vm, MTRR_FIX_64K_00000); + v3_unhook_msr(vm, MTRR_FIX_16K_80000); + v3_unhook_msr(vm, MTRR_FIX_16K_A0000); + v3_unhook_msr(vm, MTRR_FIX_4K_C0000); + v3_unhook_msr(vm, MTRR_FIX_4K_C8000); + v3_unhook_msr(vm, MTRR_FIX_4K_D0000); + v3_unhook_msr(vm, MTRR_FIX_4K_D8000); + v3_unhook_msr(vm, MTRR_FIX_4K_E0000); + v3_unhook_msr(vm, MTRR_FIX_4K_E8000); + v3_unhook_msr(vm, MTRR_FIX_4K_F0000); + v3_unhook_msr(vm, MTRR_FIX_4K_F8000); + + /* AMD specific */ + v3_unhook_msr(vm, SYSCONFIG); + v3_unhook_msr(vm, TOP_MEM); + v3_unhook_msr(vm, TOP_MEM2); + + v3_unhook_msr(vm, IORR_BASE0); + v3_unhook_msr(vm, IORR_BASE1); + v3_unhook_msr(vm, IORR_MASK0); + v3_unhook_msr(vm, IORR_MASK1); + + /* Intel Specfic */ + v3_unhook_msr(vm, SMRR_PHYS_BASE); + v3_unhook_msr(vm, SMRR_PHYS_MASK); + + + V3_Free(state); + return 0; +} + + +static int init_mtrrs(struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data) { + struct mtrr_state * state = NULL; + int ret = 0; + + state = V3_Malloc(sizeof(struct mtrr_state)); + memset(state, 0, sizeof(struct mtrr_state)); + + *priv_data = state; + + init_state(state); + + // hook MSRs + ret |= v3_hook_msr(vm, MTRR_CAP, mtrr_cap_read, mtrr_cap_write, state); + ret |= v3_hook_msr(vm, PAT, pat_read, pat_write, state); + ret |= v3_hook_msr(vm, MTRR_DEF_TYPE, def_type_read, def_type_write, state); + + ret |= v3_hook_msr(vm, MTRR_PHYS_BASE_0, mtrr_phys_base_read, mtrr_phys_base_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_BASE_1, mtrr_phys_base_read, mtrr_phys_base_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_BASE_2, mtrr_phys_base_read, mtrr_phys_base_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_BASE_3, mtrr_phys_base_read, mtrr_phys_base_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_BASE_4, mtrr_phys_base_read, mtrr_phys_base_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_BASE_5, mtrr_phys_base_read, mtrr_phys_base_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_BASE_6, mtrr_phys_base_read, mtrr_phys_base_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_BASE_7, mtrr_phys_base_read, mtrr_phys_base_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_MASK_0, mtrr_phys_mask_read, mtrr_phys_mask_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_MASK_1, mtrr_phys_mask_read, mtrr_phys_mask_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_MASK_2, mtrr_phys_mask_read, mtrr_phys_mask_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_MASK_3, mtrr_phys_mask_read, mtrr_phys_mask_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_MASK_4, mtrr_phys_mask_read, mtrr_phys_mask_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_MASK_5, mtrr_phys_mask_read, mtrr_phys_mask_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_MASK_6, mtrr_phys_mask_read, mtrr_phys_mask_write, state); + ret |= v3_hook_msr(vm, MTRR_PHYS_MASK_7, mtrr_phys_mask_read, mtrr_phys_mask_write, state); + + ret |= v3_hook_msr(vm, MTRR_FIX_64K_00000, mtrr_fix_64k_read, mtrr_fix_64k_write, state); + ret |= v3_hook_msr(vm, MTRR_FIX_16K_80000, mtrr_fix_16k_read, mtrr_fix_16k_write, state); + ret |= v3_hook_msr(vm, MTRR_FIX_16K_A0000, mtrr_fix_16k_read, mtrr_fix_16k_write, state); + ret |= v3_hook_msr(vm, MTRR_FIX_4K_C0000, mtrr_fix_4k_read, mtrr_fix_4k_write, state); + ret |= v3_hook_msr(vm, MTRR_FIX_4K_C8000, mtrr_fix_4k_read, mtrr_fix_4k_write, state); + ret |= v3_hook_msr(vm, MTRR_FIX_4K_D0000, mtrr_fix_4k_read, mtrr_fix_4k_write, state); + ret |= v3_hook_msr(vm, MTRR_FIX_4K_D8000, mtrr_fix_4k_read, mtrr_fix_4k_write, state); + ret |= v3_hook_msr(vm, MTRR_FIX_4K_E0000, mtrr_fix_4k_read, mtrr_fix_4k_write, state); + ret |= v3_hook_msr(vm, MTRR_FIX_4K_E8000, mtrr_fix_4k_read, mtrr_fix_4k_write, state); + ret |= v3_hook_msr(vm, MTRR_FIX_4K_F0000, mtrr_fix_4k_read, mtrr_fix_4k_write, state); + ret |= v3_hook_msr(vm, MTRR_FIX_4K_F8000, mtrr_fix_4k_read, mtrr_fix_4k_write, state); + + /* AMD Specific */ + ret |= v3_hook_msr(vm, SYSCONFIG, amd_syscfg_read, amd_syscfg_write, state); + ret |= v3_hook_msr(vm, TOP_MEM, amd_top_mem_read, amd_top_mem_write, state); + ret |= v3_hook_msr(vm, TOP_MEM2, amd_top_mem_read, amd_top_mem_write, state); + + ret |= v3_hook_msr(vm, IORR_BASE0, amd_iorr_base_read, amd_iorr_base_write, state); + ret |= v3_hook_msr(vm, IORR_BASE1, amd_iorr_base_read, amd_iorr_base_write, state); + ret |= v3_hook_msr(vm, IORR_MASK0, amd_iorr_mask_read, amd_iorr_mask_write, state); + ret |= v3_hook_msr(vm, IORR_MASK1, amd_iorr_mask_read, amd_iorr_mask_write, state); + + + /* INTEL specific */ + ret |= v3_hook_msr(vm, SMRR_PHYS_BASE, intel_smrr_base_read, intel_smrr_base_write, state); + ret |= v3_hook_msr(vm, SMRR_PHYS_MASK, intel_smrr_mask_read, intel_smrr_mask_write, state); + + if (ret != 0) { + PrintError("Failed to hook all MTRR MSRs. Aborting...\n"); + deinit_mtrrs(vm, state); + return -1; + } + + + return 0; +} + + + + +static struct v3_extension_impl mtrr_impl = { + .name = "MTRRS", + .init = init_mtrrs, + .deinit = deinit_mtrrs, + .core_init = NULL, + .core_deinit = NULL, + .on_entry = NULL, + .on_exit = NULL +}; + +register_extension(&mtrr_impl); diff --git a/palacios/src/interfaces/vmm_keyed_stream.c b/palacios/src/interfaces/vmm_keyed_stream.c index 4ba376b..79d69ac 100644 --- a/palacios/src/interfaces/vmm_keyed_stream.c +++ b/palacios/src/interfaces/vmm_keyed_stream.c @@ -46,6 +46,14 @@ void v3_keyed_stream_close(v3_keyed_stream_t stream) } +void v3_keyed_stream_preallocate_hint_key(v3_keyed_stream_t stream, char *key, uint64_t size) +{ + V3_ASSERT(keyed_stream_hooks != NULL); + V3_ASSERT(keyed_stream_hooks->preallocate_hint_key != NULL); + + return keyed_stream_hooks->preallocate_hint_key(stream,key,size); +} + v3_keyed_stream_key_t v3_keyed_stream_open_key(v3_keyed_stream_t stream, char *key) { V3_ASSERT(keyed_stream_hooks != NULL); diff --git a/palacios/src/interfaces/vmm_stream.c b/palacios/src/interfaces/vmm_stream.c index 66ce081..377a545 100644 --- a/palacios/src/interfaces/vmm_stream.c +++ b/palacios/src/interfaces/vmm_stream.c @@ -28,28 +28,38 @@ static struct v3_stream_hooks * stream_hooks = NULL; // VM can be NULL -v3_stream_t v3_stream_open(struct v3_vm_info * vm, const char * name) { +struct v3_stream * v3_stream_open(struct v3_vm_info * vm, const char * name, + uint64_t (*input)(struct v3_stream * stream, uint8_t * buf, uint64_t len), + void * guest_stream_data) { + struct v3_stream * stream = NULL; + V3_ASSERT(stream_hooks != NULL); V3_ASSERT(stream_hooks->open != NULL); - return stream_hooks->open(name, vm->host_priv_data); + stream = V3_Malloc(sizeof(struct v3_stream *)); + + stream->input = input; + stream->guest_stream_data = guest_stream_data; + stream->host_stream_data = stream_hooks->open(stream, name, vm->host_priv_data); + + return stream; } -int v3_stream_write(v3_stream_t stream, uint8_t * buf, uint32_t len) { +uint64_t v3_stream_output(struct v3_stream * stream, uint8_t * buf, uint32_t len) { V3_ASSERT(stream_hooks != NULL); - V3_ASSERT(stream_hooks->write != NULL); + V3_ASSERT(stream_hooks->output != NULL); - return stream_hooks->write(stream, buf, len); + return stream_hooks->output(stream, buf, len); } -void v3_stream_close(v3_stream_t stream) { +void v3_stream_close(struct v3_stream * stream) { V3_ASSERT(stream_hooks != NULL); V3_ASSERT(stream_hooks->close != NULL); - return stream_hooks->close(stream); -} - + stream_hooks->close(stream); + V3_Free(stream); +} diff --git a/palacios/src/palacios/Makefile b/palacios/src/palacios/Makefile index 4c55738..d067aae 100644 --- a/palacios/src/palacios/Makefile +++ b/palacios/src/palacios/Makefile @@ -35,7 +35,8 @@ obj-y := \ vmm_extensions.o \ vmm_mtrr.o \ vmm_multitree.o \ - + vmm_bitmap.o \ + vmm_barrier.o \ diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c index bdb9862..c3bce64 100644 --- a/palacios/src/palacios/svm.c +++ b/palacios/src/palacios/svm.c @@ -1,4 +1,3 @@ - /* * This file is part of the Palacios Virtual Machine Monitor developed * by the V3VEE Project with funding from the United States National @@ -36,6 +35,8 @@ #include #include +#include + #include @@ -81,6 +82,25 @@ static vmcb_t * Allocate_VMCB() { } +static int v3_svm_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) +{ + int status; + + // Call arch-independent handler + if ((status = v3_handle_efer_write(core, msr, src, priv_data)) != 0) { + return status; + } + + // SVM-specific code + { + // Ensure that hardware visible EFER.SVME bit is set (SVM Enable) + struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer); + hw_efer->svme = 1; + } + + return 0; +} + static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) { vmcb_ctrl_t * ctrl_area = GET_VMCB_CTRL_AREA(vmcb); @@ -221,7 +241,7 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) { v3_hook_msr(core->vm_info, EFER_MSR, &v3_handle_efer_read, - &v3_handle_efer_write, + &v3_svm_handle_efer_write, core); if (core->shdw_pg_mode == SHADOW_PAGING) { @@ -673,6 +693,8 @@ int v3_start_svm_guest(struct guest_info * info) { break; } + v3_wait_at_barrier(info); + if (info->vm_info->run_state == VM_STOPPED) { info->core_run_state = CORE_STOPPED; diff --git a/palacios/src/palacios/vm_guest.c b/palacios/src/palacios/vm_guest.c index 62d2a94..7bd275f 100644 --- a/palacios/src/palacios/vm_guest.c +++ b/palacios/src/palacios/vm_guest.c @@ -30,7 +30,7 @@ #include #include #include - +#include v3_cpu_mode_t v3_get_vm_cpu_mode(struct guest_info * info) { @@ -539,6 +539,8 @@ int v3_init_vm(struct v3_vm_info * vm) { v3_init_intr_routers(vm); v3_init_ext_manager(vm); + v3_init_barrier(vm); + // Initialize the memory map if (v3_init_mem_map(vm) == -1) { PrintError("Could not initialize shadow map\n"); @@ -637,6 +639,8 @@ int v3_free_vm_internal(struct v3_vm_info * vm) { v3_deinit_intr_routers(vm); v3_deinit_host_events(vm); + v3_deinit_barrier(vm); + v3_deinit_cpuid_map(vm); v3_deinit_msr_map(vm); v3_deinit_io_map(vm); diff --git a/palacios/src/palacios/vmcs.c b/palacios/src/palacios/vmcs.c index 9e5bd77..2d36a3f 100644 --- a/palacios/src/palacios/vmcs.c +++ b/palacios/src/palacios/vmcs.c @@ -260,6 +260,7 @@ int v3_vmx_restore_vmcs(struct guest_info * info) { #ifdef __V3_64BIT__ check_vmcs_write(VMCS_GUEST_EFER, info->ctrl_regs.efer); + check_vmcs_write(VMCS_ENTRY_CTRLS, vmx_info->entry_ctrls.value); #endif diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c index d60e55a..9e0fe08 100644 --- a/palacios/src/palacios/vmm.c +++ b/palacios/src/palacios/vmm.c @@ -404,6 +404,36 @@ int v3_stop_vm(struct v3_vm_info * vm) { } +int v3_pause_vm(struct v3_vm_info * vm) { + + if (vm->run_state != VM_RUNNING) { + PrintError("Tried to pause a VM that was not running\n"); + return -1; + } + + while (v3_raise_barrier(vm, NULL) == -1); + + vm->run_state = VM_PAUSED; + + return 0; +} + + +int v3_continue_vm(struct v3_vm_info * vm) { + + if (vm->run_state != VM_PAUSED) { + PrintError("Tried to continue a VM that was not paused\n"); + return -1; + } + + v3_lower_barrier(vm); + + vm->run_state = VM_RUNNING; + + return 0; +} + + int v3_free_vm(struct v3_vm_info * vm) { int i = 0; // deinitialize guest (free memory, etc...) diff --git a/palacios/src/palacios/vmm_barrier.c b/palacios/src/palacios/vmm_barrier.c index 614ceca..ab0aae4 100644 --- a/palacios/src/palacios/vmm_barrier.c +++ b/palacios/src/palacios/vmm_barrier.c @@ -18,22 +18,51 @@ */ -#include +#include +#include +#include +int v3_init_barrier(struct v3_vm_info * vm_info) { + struct v3_barrier * barrier = &(vm_info->barrier); - -int v3_init_barrier(struct v3_barrier * barrier) { memset(barrier, 0, sizeof(struct v3_barrier)); + v3_bitmap_init(&(barrier->cpu_map), vm_info->num_cores); v3_lock_init(&(barrier->lock)); return 0; } +int v3_deinit_barrier(struct v3_vm_info * vm_info) { + struct v3_barrier * barrier = &(vm_info->barrier); + + v3_bitmap_deinit(&(barrier->cpu_map)); + v3_lock_deinit(&(barrier->lock)); + + return 0; +} + + +/* Barrier synchronization primitive + * -- This call will block until all the guest cores are waiting at a common synchronization point + * in a yield loop. The core will block at the sync point until the barrier is lowered. + * + * ARGUMENTS: + * vm_info -- The VM for which the barrier is being activated + * local_core -- The core whose thread this function is being called from, or NULL + * if the calling thread is not associated with a VM's core context + */ -int v3_activate_barrier(struct guest_info * core, struct v3_barrier * barrier) { +int v3_raise_barrier(struct v3_vm_info * vm_info, struct guest_info * local_core) { + struct v3_barrier * barrier = &(vm_info->barrier); addr_t flag; int acquired = 0; - + int all_blocked = 0; + + int local_vcpu = -1; + int i = 0; + + + flag = v3_lock_irqsave(barrier->lock); if (barrier->active == 0) { @@ -44,11 +73,49 @@ int v3_activate_barrier(struct guest_info * core, struct v3_barrier * barrier) { v3_unlock_irqrestore(barrier->lock, flag); if (acquired == 0) { + /* If we are in a core context and the barrier has already been acquired + we'll be safe and let the other barrier proceed. We will still report an error + though to allow possible cleanups to occur at the call site. + */ + if (local_core != NULL) { + v3_wait_at_barrier(local_core); + } + return -1; } + // If we are raising the barrier from a core context + // we have to mark ourselves blocked first to avoid deadlock + if (local_core != NULL) { + local_vcpu = local_core->vcpu_id; + v3_bitmap_set(&(barrier->cpu_map), local_vcpu); + } + - // wait for barrier catch + // send out interrupts to force exits on all cores + for (i = 0; i < vm_info->num_cores; i++) { + if (vm_info->cores[i].vcpu_id != local_vcpu) { + v3_interrupt_cpu(vm_info, vm_info->cores[i].pcpu_id, 0); + } + } + + // wait for barrier catch on all cores + while (all_blocked == 0) { + all_blocked = 1; + + for (i = 0; i < vm_info->num_cores; i++) { + if (v3_bitmap_check(&(barrier->cpu_map), i) == 0) { + // There is still a core that is not waiting at the barrier + all_blocked = 0; + } + } + + if (all_blocked == 1) { + break; + } + + v3_yield(local_core); + } return 0; @@ -56,20 +123,52 @@ int v3_activate_barrier(struct guest_info * core, struct v3_barrier * barrier) { +/* Lowers a barrier that has already been raised + * guest cores will automatically resume execution + * once this has been called + * + * TODO: Need someway to check that the barrier is active + */ + +int v3_lower_barrier(struct v3_vm_info * vm_info) { + struct v3_barrier * barrier = &(vm_info->barrier); + + // Clear the active flag, so cores won't wait + barrier->active = 0; -int v3_deactivate_barrier(struct v3_barrier * barrier) { + // Clear all the cpu flags, so cores will proceed + v3_bitmap_reset(&(barrier->cpu_map)); + return 0; } -int v3_check_barrier(struct guest_info * core, struct v3_barrier * barrier) { +/* + * Syncronization point for guest cores + * -- called as part of the main VMM event loop for each core + * -- if a barrier has been activated then the core will signal + * it has reached the barrier and sit in a yield loop until the + * barrier has been lowered + */ +int v3_wait_at_barrier(struct guest_info * core) { + struct v3_barrier * barrier = &(core->vm_info->barrier); - if (barrier->activated == 0) { + if (barrier->active == 0) { return 0; } + + /* Barrier has been activated. + * Wait here until it's lowered + */ + - // set cpu bit + // set cpu bit in barrier bitmap + v3_bitmap_set(&(barrier->cpu_map), core->vcpu_id); // wait for cpu bit to clear + while (v3_bitmap_check(&(barrier->cpu_map), core->vcpu_id) == 1) { + v3_yield(core); + } + return 0; } diff --git a/palacios/src/palacios/vmm_bitmap.c b/palacios/src/palacios/vmm_bitmap.c new file mode 100644 index 0000000..c056a5b --- /dev/null +++ b/palacios/src/palacios/vmm_bitmap.c @@ -0,0 +1,101 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2011, Jack Lange + * Copyright (c) 2011, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#include +#include + + +int v3_bitmap_init(struct v3_bitmap * bitmap, int num_bits) { + int num_bytes = (num_bits / 8) + ((num_bits % 8) > 0); + + bitmap->num_bits = num_bits; + bitmap->bits = V3_Malloc(num_bytes); + + if (bitmap->bits == NULL) { + PrintError("Could not allocate bitmap of %d bits\n", num_bits); + return -1; + } + + memset(bitmap->bits, 0, num_bytes); + + return 0; +} + + +void v3_bitmap_deinit(struct v3_bitmap * bitmap) { + V3_Free(bitmap->bits); +} + + +int v3_bitmap_reset(struct v3_bitmap * bitmap) { + int num_bytes = (bitmap->num_bits / 8) + ((bitmap->num_bits % 8) > 0); + + memset(bitmap->bits, 0, num_bytes); + + return 0; +} + +int v3_bitmap_set(struct v3_bitmap * bitmap, int index) { + int major = index / 8; + int minor = index % 8; + int old_val = 0; + + if (index > (bitmap->num_bits - 1)) { + PrintError("Index out of bitmap range: (pos = %d) (num_bits = %d)\n", + index, bitmap->num_bits); + return -1; + } + + old_val = (bitmap->bits[major] & (0x1 << minor)); + bitmap->bits[major] |= (0x1 << minor); + + return old_val; +} + + +int v3_bitmap_clear(struct v3_bitmap * bitmap, int index) { + int major = index / 8; + int minor = index % 8; + int old_val = 0; + + if (index > (bitmap->num_bits - 1)) { + PrintError("Index out of bitmap range: (pos = %d) (num_bits = %d)\n", + index, bitmap->num_bits); + return -1; + } + + old_val = (bitmap->bits[major] & (0x1 << minor)); + bitmap->bits[major] &= ~(0x1 << minor); + + return old_val; +} + +int v3_bitmap_check(struct v3_bitmap * bitmap, int index) { + int major = index / 8; + int minor = index % 8; + + if (index > (bitmap->num_bits - 1)) { + PrintError("Index out of bitmap range: (pos = %d) (num_bits = %d)\n", + index, bitmap->num_bits); + return -1; + } + + return (bitmap->bits[major] & (0x1 << minor)); +} + diff --git a/palacios/src/palacios/vmm_ctrl_regs.c b/palacios/src/palacios/vmm_ctrl_regs.c index d5a8650..3616ae8 100644 --- a/palacios/src/palacios/vmm_ctrl_regs.c +++ b/palacios/src/palacios/vmm_ctrl_regs.c @@ -557,28 +557,37 @@ int v3_handle_efer_read(struct guest_info * core, uint_t msr, struct v3_msr * ds } - -// TODO: this is a disaster we need to clean this up... int v3_handle_efer_write(struct guest_info * core, uint_t msr, struct v3_msr src, void * priv_data) { - //struct efer_64 * new_efer = (struct efer_64 *)&(src.value); - struct efer_64 * shadow_efer = (struct efer_64 *)&(core->ctrl_regs.efer); - struct v3_msr * guest_efer = &(core->shdw_pg_state.guest_efer); + struct v3_msr * vm_efer = &(core->shdw_pg_state.guest_efer); + struct efer_64 * hw_efer = (struct efer_64 *)&(core->ctrl_regs.efer); + struct efer_64 old_hw_efer = *((struct efer_64 *)&core->ctrl_regs.efer); - PrintDebug("EFER Write\n"); - PrintDebug("EFER Write Values: HI=%x LO=%x\n", src.hi, src.lo); + PrintDebug("EFER Write HI=%x LO=%x\n", src.hi, src.lo); - //PrintDebug("Old EFER=%p\n", (void *)*(addr_t*)(shadow_efer)); - - // We virtualize the guests efer to hide the SVME and LMA bits - guest_efer->value = src.value; - - if (core->shdw_pg_mode == SHADOW_PAGING) { - // Enable/Disable Syscall - shadow_efer->sce = src.value & 0x1; - } else if (core->shdw_pg_mode == NESTED_PAGING) { - *(uint64_t *)shadow_efer = src.value; - shadow_efer->svme = 1; + // Set EFER value seen by guest if it reads EFER + vm_efer->value = src.value; + + // Set EFER value seen by hardware while the guest is running + *(uint64_t *)hw_efer = src.value; + + // Catch unsupported features + if ((old_hw_efer.lme == 1) && (hw_efer->lme == 0)) { + PrintError("Disabling long mode once it has been enabled is not supported\n"); + return -1; } + + // Set LME and LMA bits seen by hardware + if (old_hw_efer.lme == 0) { + // Long mode was not previously enabled, so the lme bit cannot + // be set yet. It will be set later when the guest sets CR0.PG + // to enable paging. + hw_efer->lme = 0; + } else { + // Long mode was previously enabled. Ensure LMA bit is set. + // VMX does not automatically set LMA, and this should not affect SVM. + hw_efer->lma = 1; + } + return 0; } diff --git a/palacios/src/palacios/vmm_mem_hook.c b/palacios/src/palacios/vmm_mem_hook.c index 298bc8f..a0753cf 100644 --- a/palacios/src/palacios/vmm_mem_hook.c +++ b/palacios/src/palacios/vmm_mem_hook.c @@ -394,7 +394,20 @@ static int free_hook(struct v3_vm_info * vm, struct mem_hook * hook) { // We do not support unhooking subregions int v3_unhook_mem(struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr_start) { struct v3_mem_region * reg = v3_get_mem_region(vm, core_id, guest_addr_start); - struct mem_hook * hook = reg->priv_data; + struct mem_hook * hook = NULL; + + if (reg == NULL) { + PrintError("Could not find region at %p\n", (void *)guest_addr_start); + return -1; + } + + hook = reg->priv_data; + + if (hook == NULL) { + PrintError("Trying to unhook region that is not a hook at %p\n", (void *)guest_addr_start); + return -1; + } + free_hook(vm, hook); diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c index 5963d93..20c0c5f 100644 --- a/palacios/src/palacios/vmx.c +++ b/palacios/src/palacios/vmx.c @@ -212,23 +212,25 @@ static int init_vmcs_bios(struct guest_info * core, struct vmx_data * vmx_state) #ifdef __V3_64BIT__ + // Ensure host runs in 64-bit mode at each VM EXIT vmx_state->exit_ctrls.host_64_on = 1; #endif - - /* Not sure how exactly to handle this... */ + // Hook all accesses to EFER register v3_hook_msr(core->vm_info, EFER_MSR, &v3_handle_efer_read, &v3_handle_efer_write, core); - // Or is it this??? - vmx_state->entry_ctrls.ld_efer = 1; + // Restore host's EFER register on each VM EXIT vmx_state->exit_ctrls.ld_efer = 1; + + // Save/restore guest's EFER register to/from VMCS on VM EXIT/ENTRY vmx_state->exit_ctrls.save_efer = 1; - /* *** */ + vmx_state->entry_ctrls.ld_efer = 1; - vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE); + // Cause VM_EXIT whenever CR4.VMXE or CR4.PAE bits are written + vmx_ret |= check_vmcs_write(VMCS_CR4_MASK, CR4_VMXE | CR4_PAE); /* Setup paging */ diff --git a/palacios/src/palacios/vmx_ctrl_regs.c b/palacios/src/palacios/vmx_ctrl_regs.c index 6bbde36..d83d51f 100644 --- a/palacios/src/palacios/vmx_ctrl_regs.c +++ b/palacios/src/palacios/vmx_ctrl_regs.c @@ -77,6 +77,29 @@ int v3_vmx_handle_cr3_access(struct guest_info * info, struct vmx_exit_cr_qual * return -1; } +int v3_vmx_handle_cr4_access(struct guest_info * info, struct vmx_exit_cr_qual * cr_qual) { + if (cr_qual->access_type < 2) { + + if (cr_qual->access_type == 0) { + if (v3_handle_cr4_write(info) != 0) { + PrintError("Could not handle CR4 write\n"); + return -1; + } + info->ctrl_regs.cr4 |= 0x2000; // no VMX allowed in guest, so mask CR4.VMXE + } else { + if (v3_handle_cr4_read(info) != 0) { + PrintError("Could not handle CR4 read\n"); + return -1; + } + } + + return 0; + } + + PrintError("Invalid CR4 Access type?? (type=%d)\n", cr_qual->access_type); + return -1; +} + static int handle_mov_to_cr3(struct guest_info * info, v3_reg_t * cr3_reg) { if (info->shdw_pg_mode == SHADOW_PAGING) { @@ -196,13 +219,14 @@ static int handle_mov_to_cr0(struct guest_info * info, v3_reg_t * new_cr0, struc // Paging transition if (v3_get_vm_mem_mode(info) == VIRTUAL_MEM) { - struct efer_64 * guest_efer = (struct efer_64 *)&(info->ctrl_regs.efer); + struct efer_64 * vm_efer = (struct efer_64 *)&(info->shdw_pg_state.guest_efer); + struct efer_64 * hw_efer = (struct efer_64 *)&(info->ctrl_regs.efer); - if (guest_efer->lme == 1) { + if (vm_efer->lme) { // PrintDebug("Enabling long mode\n"); - guest_efer->lma = 1; - guest_efer->lme = 1; + hw_efer->lma = 1; + hw_efer->lme = 1; vmx_info->entry_ctrls.guest_ia32e = 1; } diff --git a/palacios/src/palacios/vmx_handler.c b/palacios/src/palacios/vmx_handler.c index 71c1eab..5af3122 100644 --- a/palacios/src/palacios/vmx_handler.c +++ b/palacios/src/palacios/vmx_handler.c @@ -224,12 +224,22 @@ int v3_handle_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_inf return -1; } break; + case 4: + //PrintDebug("Handling CR4 Access\n"); + if (v3_vmx_handle_cr4_access(info, cr_qual) == -1) { + PrintError("Error in CR4 access handler\n"); + return -1; + } + break; default: PrintError("Unhandled CR access: %d\n", cr_qual->cr_id); return -1; } - info->rip += exit_info->instr_len; + // TODO: move RIP increment into all of the above individual CR + // handlers, not just v3_vmx_handle_cr4_access() + if (cr_qual->cr_id != 4) + info->rip += exit_info->instr_len; break; } diff --git a/palacios/src/palacios/vmx_hw_info.c b/palacios/src/palacios/vmx_hw_info.c index 478bfbc..d5fe494 100644 --- a/palacios/src/palacios/vmx_hw_info.c +++ b/palacios/src/palacios/vmx_hw_info.c @@ -100,7 +100,7 @@ int v3_init_vmx_hw(struct vmx_hw_info * hw_info) { v3_get_msr(VMX_BASIC_MSR, &(hw_info->basic_info.hi), &(hw_info->basic_info.lo)); v3_get_msr(VMX_MISC_MSR, &(hw_info->misc_info.hi), &(hw_info->misc_info.lo)); - v3_get_msr(VMX_EPT_VPID_CAP_MSR, &(hw_info->ept_info.hi), &(hw_info->ept_info.lo)); + PrintError("BASIC_MSR: Lo: %x, Hi: %x\n", hw_info->basic_info.lo, hw_info->basic_info.hi); @@ -109,13 +109,27 @@ int v3_init_vmx_hw(struct vmx_hw_info * hw_info) { get_ex_ctrl_caps(hw_info, &(hw_info->exit_ctrls), VMX_EXIT_CTLS_MSR, VMX_TRUE_EXIT_CTLS_MSR); get_ex_ctrl_caps(hw_info, &(hw_info->entry_ctrls), VMX_ENTRY_CTLS_MSR, VMX_TRUE_ENTRY_CTLS_MSR); + /* Get secondary PROCBASED controls if secondary controls are available (optional or required) */ /* Intel Manual 3B. Sect. G.3.3 */ if ( ((hw_info->proc_ctrls.req_mask & 0x80000000) == 0) || ((hw_info->proc_ctrls.req_val & 0x80000000) == 1) ) { + get_ctrl_caps(&(hw_info->sec_proc_ctrls), VMX_PROCBASED_CTLS2_MSR); + + /* Get EPT data only if available - Intel 3B, G.10 */ + /* EPT is available if processor has secondary controls (already tested) */ + /* and if procbased_ctls2[33]==1 or procbased_ctrls2[37]==1 */ + + struct v3_msr proc2; + + v3_get_msr(VMX_PROCBASED_CTLS2_MSR,&(proc2.hi),&(proc2.lo)); + + if ( (proc2.hi & 0x2) || (proc2.hi & 0x20) ) { + v3_get_msr(VMX_EPT_VPID_CAP_MSR, &(hw_info->ept_info.hi), &(hw_info->ept_info.lo)); + } } - + get_cr_fields(&(hw_info->cr0), VMX_CR0_FIXED1_MSR, VMX_CR0_FIXED0_MSR); get_cr_fields(&(hw_info->cr4), VMX_CR4_FIXED1_MSR, VMX_CR4_FIXED0_MSR);