2 Device File Virtualization Host Module
4 (c) Akhil Guliani and William Gross, 2015
6 Adapted from MPI module (c) 2012 Peter Dinda
10 #include <linux/namei.h>
11 #include <linux/version.h>
12 #include <linux/file.h>
13 #include <linux/spinlock.h>
14 #include <linux/uaccess.h>
15 #include <linux/module.h>
17 #include <linux/kernel.h>
18 #include <linux/slab.h>
20 #include <linux/sched.h>
22 #include <linux/syscalls.h>
23 #include <linux/init.h>
24 #include <linux/kmod.h>
25 #include <linux/delay.h>
26 #include <linux/wait.h>
27 #include <linux/poll.h>
29 #include <linux/file.h>
30 #include <linux/fcntl.h>
31 #include <linux/device.h>
32 #include <linux/cdev.h>
34 #include <asm/uaccess.h>
38 #include <palacios/vm_guest_mem.h>
39 #include <interfaces/vmm_host_hypercall.h>
42 #include "devfile_hc.h"
46 #define SHALLOW_DEBUG 1
49 #define DEEP_DEBUG_PRINT(fmt, args...) printk(("devfile: " fmt), ##args)
51 #define DEEP_DEBUG_PRINT(fmt, args...)
55 #define SHALLOW_DEBUG_PRINT(fmt, args...) printk(("devfile: " fmt), ##args)
57 #define SHALLOW_DEBUG_PRINT(fmt, args...)
61 #define ERROR(fmt, args...) printk(("devfile: " fmt), ##args)
62 #define INFO(fmt, args...) printk(("devfile: " fmt), ##args)
64 #define PRINT_CONSOLE(fmt,args...) printf(("devfile: " fmt),##args)
69 // Added to make unique id's for IOCTL
71 #define INIT_IOCTL _IOR(MY_MACIG, 0, int)
72 #define SHADOW_SYSCALL_DONE _IOW(MY_MACIG, 2, int)
74 #define DEVFILE_NAME "v3-devfile"
76 static int devfile_major_num = 0;
77 static struct class *devfile_class = 0;
78 static struct cdev devfile_dev;
82 struct devfile_state {
84 uint64_t shared_mem_uva;
85 uint64_t shared_mem_pa;
88 wait_queue_head_t user_wait_queue;
89 wait_queue_head_t host_wait_queue;
91 enum { WAIT_FOR_INIT, WAIT_ON_GUEST, WAIT_ON_SHADOW} state;
94 // Currently this proof of concept supports a single userland/VM binding
95 // and is serially reusable
96 static struct devfile_state *state=0;
98 static inline struct devfile_state *find_matching_state(palacios_core_t core) { return state; }
101 /* Hypercall helpers */
103 static void get_args_64(palacios_core_t core,
104 struct guest_accessors *acc,
114 *a1 = acc->get_rcx(core);
115 *a2 = acc->get_rdx(core);
116 *a3 = acc->get_rsi(core);
117 *a4 = acc->get_rdi(core);
118 *a5 = acc->get_r8(core);
119 *a6 = acc->get_r9(core);
120 *a7 = acc->get_r10(core);
121 *a8 = acc->get_r11(core);
124 static void get_args_32(palacios_core_t core,
125 struct guest_accessors *acc,
139 rsp = acc->get_rsp(core);
141 acc->read_gva(core,rsp,4,&temp); *a1=temp;
142 acc->read_gva(core,rsp+4,4,&temp); *a2=temp;
143 acc->read_gva(core,rsp+8,4,&temp); *a3=temp;
144 acc->read_gva(core,rsp+12,4,&temp); *a4=temp;
145 acc->read_gva(core,rsp+16,4,&temp); *a5=temp;
146 acc->read_gva(core,rsp+20,4,&temp); *a6=temp;
147 acc->read_gva(core,rsp+24,4,&temp); *a7=temp;
148 acc->read_gva(core,rsp+28,4,&temp); *a8=temp;
151 static void get_args(palacios_core_t core,
152 struct guest_accessors *acc,
165 rbx=acc->get_rbx(core);
170 DEEP_DEBUG_PRINT("64 bit hcall\n");
171 return get_args_64(core,acc,a1,a2,a3,a4,a5,a6,a7,a8);
174 DEEP_DEBUG_PRINT("32 bit hcall\n");
175 return get_args_32(core,acc,a1,a2,a3,a4,a5,a6,a7,a8);
178 ERROR("UNKNOWN hcall calling convention\n");
183 static void put_return(palacios_core_t core,
184 struct guest_accessors *acc,
188 acc->set_rax(core,rc);
189 acc->set_rbx(core,rc);
193 Convert all hypercall pointer arguments from GVAs to GPAs
194 The host userland is responsible for converting from
197 The assumption here is that any pointer argument
198 points to a structure that does NOT span a page
199 boundary. The guest userland is responsible for
200 assuring that this is the case.
202 static int deref_args(palacios_core_t core,
203 struct guest_accessors *acc,
204 uint64_t* a1, uint64_t* a2, uint64_t* a3, uint64_t* a4, uint64_t* a5,
205 uint64_t* a6, uint64_t bvec)
208 uint64_t a1tmp = *a1;
209 acc->gva_to_gpa(core,a1tmp,a1);
212 uint64_t a2tmp = *a2;
213 acc->gva_to_gpa(core,a2tmp,a2);
216 uint64_t a3tmp = *a3;
217 acc->gva_to_gpa(core,a3tmp,a3);
220 uint64_t a4tmp = *a4;
221 acc->gva_to_gpa(core,a4tmp,a4);
224 uint64_t a5tmp = *a5;
225 acc->gva_to_gpa(core,a5tmp,a5);
228 uint64_t a6tmp = *a6;
229 acc->gva_to_gpa(core,a6tmp,a6);
236 /* Create /dev/v3-devfile in the host */
238 // User mode helper call to create module private chardev for ioctls
239 static int setup_mknod_call(int major_num)
241 //www.ibm.com/developerworks/library/l-user-space-apps/
242 struct subprocess_info *sub_info;
245 snprintf(buf,20,"%d",major_num);
247 const char *argv[] = { "/bin/mknod", "/dev/" DEVFILE_NAME,"c", buf, "0", NULL };
248 static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL };
250 sub_info = call_usermodehelper_setup( (char*)argv[0], (char**)argv, envp, GFP_ATOMIC );
252 if (sub_info == NULL) {
253 ERROR("failed to create %s\n",DEVFILE_PATH);
257 SHALLOW_DEBUG_PRINT("set up usermode call\n");
259 return call_usermodehelper_exec( sub_info, UMH_WAIT_PROC );
264 static uint64_t devfile_syscall_return(struct devfile_state *s, uint64_t *errno)
267 uint64_t *shared_page = (uint64_t*)(s->shared_mem_va);
269 s->state=WAIT_ON_SHADOW;
271 // kick the the user if needed
272 //!! IDEA: We can add Usermode Helper to start shadow process instead
273 // and wait for it to send us an ioctl to wake up the module.
274 wake_up_interruptible(&(s->user_wait_queue));
275 // goto sleep until we see a message received
276 // part of a separate ioctl
277 SHALLOW_DEBUG_PRINT("waiting For Shadow Process\n");
278 while (wait_event_interruptible(s->host_wait_queue, (s->state==WAIT_ON_GUEST)) != 0) {}
279 SHALLOW_DEBUG_PRINT("waiting done\n");
280 // Get the returned value and errno
281 rc = *(shared_page +8);
282 *errno = *(shared_page +9);
284 SHALLOW_DEBUG_PRINT("waiting done %016llu (errno %016llu)\n",rc,*errno);
289 static int devfile_syscall_hcall(struct devfile_state *s,
290 palacios_core_t core,
292 uint64_t a1, uint64_t a2,uint64_t a3,
293 uint64_t a4, uint64_t a5, uint64_t a6,
297 //Using shared memory page
299 uint64_t *shared_page = (uint64_t*)(s->shared_mem_va);
301 *(shared_page +0) = sys_code;
302 *(shared_page +1) = a1;
303 *(shared_page +2) = a2;
304 *(shared_page +3) = a3;
305 *(shared_page +4) = a4;
306 *(shared_page +5) = a5;
307 *(shared_page +6) = a6;
308 *(shared_page +7) = bit_vec;
310 SHALLOW_DEBUG_PRINT("Host Module to wait on shadow\n");
312 //Now wait for rc and errno to be written to the shared page
313 ret = devfile_syscall_return(s, errno);
315 SHALLOW_DEBUG_PRINT("SYSCALL HCALL %016llu (errno %016llu)\n",ret,*errno);
322 // The main Interface for Hypercalls
323 int devfile_hypercall(palacios_core_t *core,
325 struct guest_accessors *acc,
328 uint64_t a1,a2,a3,a4,a5,a6,bit_vec,sys_code;
332 struct devfile_state *s = find_matching_state(core);
334 if (s->state == WAIT_FOR_INIT){
335 SHALLOW_DEBUG_PRINT("Shared Memory Not Yet Initialized, returning syscall hypercall\n");
342 DEEP_DEBUG_PRINT("devfile_hypercall(%p,0x%x,%p,%p)\n",
345 get_args(core,acc,&sys_code,&a1,&a2,&a3,&a4,&a5,&a6,&bit_vec);
347 DEEP_DEBUG_PRINT("original arguments: %016llu, %016llu, %016llu, %016llu, %016llu, %016llu, %016llu, %016llu\n",
348 sys_code,a1,a2,a3,a4,a5,a6,bit_vec);
350 // Convert any pointer arguments from GVAs to GPAs
351 deref_args(core,acc,&a1,&a2,&a3,&a4,&a5,&a6,bit_vec);
353 DEEP_DEBUG_PRINT("derefed arguments: %016llu, %016llu, %016llu, %016llu, %016llu, %016llu, %016llu, %016llu\n",
354 sys_code,a1,a2,a3,a4,a5,a6,bit_vec);
356 rc = devfile_syscall_hcall(s,core,sys_code,a1,a2,a3,a4,a5,a6,bit_vec,&errno);
358 SHALLOW_DEBUG_PRINT("Syscall rc: %016llu errno=%016llu\n",rc,errno);
360 put_return(core,acc,rc,errno);
367 static int devfile_open(struct inode * inode, struct file * filp)
369 struct devfile_state *s = state;
372 ERROR("attempting to open devfile that is already open\n");
376 s=(struct devfile_state*)kmalloc(sizeof(struct devfile_state),GFP_KERNEL);
379 ERROR("Failed to allocate space for open\n");
383 // This hideousness is here because in this POC we
384 // are simply allowing a single userland to be tied to
385 // a single VM. At the same time, we are making
386 // the rest of the code more flexible for the future
389 memset(s,0,sizeof(*s));
391 init_waitqueue_head(&s->user_wait_queue);
392 init_waitqueue_head(&s->host_wait_queue);
394 s->state = WAIT_FOR_INIT;
396 filp->private_data = (void*) s;
401 static int devfile_close(struct inode * inode, struct file * filp)
403 struct devfile_state *s = filp->private_data;
406 if (s->state==WAIT_ON_SHADOW) {
407 ERROR("Odd, userland is closing devfile while we are waiting for it\n");
418 static long devfile_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
420 struct devfile_state *s = filp->private_data;
424 s->shared_mem_pa = (uint64_t)arg;
425 s->shared_mem_va = __va(s->shared_mem_pa);
426 SHALLOW_DEBUG_PRINT("Shared Memory Physical Address: %016llu\n",s->shared_mem_pa);
427 SHALLOW_DEBUG_PRINT("Shared Memory Kernel VA: %p\n",s->shared_mem_va);
428 //Change State to wait on guest
429 s->state = WAIT_ON_GUEST;
432 case SHADOW_SYSCALL_DONE:
433 s->state = WAIT_ON_GUEST;
434 wake_up_interruptible(&(s->host_wait_queue));
446 static unsigned int devfile_poll(struct file * filp,
447 struct poll_table_struct * poll_tb)
449 struct devfile_state *s = filp->private_data;
451 SHALLOW_DEBUG_PRINT("poll\n");
453 // register ourselves on the user wait queue
454 poll_wait(filp, &(s->user_wait_queue), poll_tb);
456 if (s->state==WAIT_ON_SHADOW) {
457 // Yes, we have a request if you want it!
458 DEEP_DEBUG_PRINT("poll done immediate\n");
459 return POLLIN | POLLRDNORM;
461 // No request yet, so we need to wait for one to show up.
462 DEEP_DEBUG_PRINT("poll delayed\n");
463 // We will get called again when that queue is woken up
468 static struct file_operations devfile_fops = {
469 .open = devfile_open,
470 .release = devfile_close,
471 .poll = devfile_poll,
472 .unlocked_ioctl = devfile_ioctl,
473 .compat_ioctl = devfile_ioctl
476 EXPORT_SYMBOL(devfile_hypercall);
478 int init_module(void)
482 SHALLOW_DEBUG_PRINT("INIT\n");
484 devfile_class = class_create(THIS_MODULE,"devfile");
485 if (!devfile_class || IS_ERR(devfile_class)) {
486 ERROR("Cannot register devfile device class\n");
487 return PTR_ERR(devfile_class);
492 if (alloc_chrdev_region(&dev,0,1,"devfile")<0) {
493 ERROR("Failed to alloc chrdev region\n");
497 devfile_major_num = MAJOR(dev);
499 dev = MKDEV(devfile_major_num,1);
501 cdev_init(&devfile_dev, &devfile_fops);
502 devfile_dev.owner = THIS_MODULE;
503 devfile_dev.ops = &devfile_fops;
504 cdev_add(&devfile_dev, dev, 1);
506 device_create(devfile_class, NULL, dev, NULL, "v3-devfile");
509 // Setup chardev for IOCTL
510 major = register_chrdev(0,"dfvDev", &fops);
512 SHALLOW_DEBUG_PRINT("registering dfvDev char device failed with %d\n", major);
515 SHALLOW_DEBUG_PRINT("assigned major: %d\n", major);
516 SHALLOW_DEBUG_PRINT("creating node with mknod %s c %d 0\n", DEVFILE_PATH, major);
518 // Call Helper API function to setup chardev
519 rc = setup_mknod_call(major);
520 SHALLOW_DEBUG_PRINT("UMH api mknod %s c %d 0 -- ret: %d\n\n", DEVFILE_PATH, major,rc);
529 void cleanup_module(void)
531 dev_t dev = MKDEV(devfile_major_num,1);
533 unregister_chrdev_region(MKDEV(devfile_major_num,0),1);
534 cdev_del(&devfile_dev);
535 device_destroy(devfile_class,dev);
536 class_destroy(devfile_class);
539 unregister_chrdev(major, "dfvDev");