2 Palacios main control interface
6 #include <linux/kernel.h>
7 #include <linux/module.h>
8 #include <linux/moduleparam.h>
9 #include <linux/errno.h>
10 #include <linux/percpu.h>
12 #include <linux/uaccess.h>
13 #include <linux/device.h>
14 #include <linux/cdev.h>
18 #include <linux/file.h>
19 #include <linux/spinlock.h>
20 #include <linux/kthread.h>
22 #include <linux/proc_fs.h>
23 #include <linux/seq_file.h>
25 #include <palacios/vmm.h>
31 #include "allow_devmem.h"
33 #include "lockcheck.h"
35 #include "linux-exts.h"
37 MODULE_LICENSE("GPL");
40 int cpu_list[NR_CPUS] = {};
42 module_param_array(cpu_list, int, &cpu_list_len, 0644);
43 MODULE_PARM_DESC(cpu_list, "Comma-delimited list of CPUs that Palacios will run on");
45 static int allow_devmem = 0;
46 module_param(allow_devmem, int, 0);
47 MODULE_PARM_DESC(allow_devmem, "Allow general user-space /dev/mem access even if kernel is strict");
49 // Palacios options parameter
51 module_param(options, charp, 0);
52 MODULE_PARM_DESC(options, "Generic options to internal Palacios modules");
58 static int v3_major_num = 0;
60 static struct v3_guest * guest_map[MAX_VMS] = {[0 ... MAX_VMS - 1] = 0};
61 static struct proc_dir_entry * palacios_proc_dir = NULL;
63 struct class * v3_class = NULL;
64 static struct cdev ctrl_dev;
66 static int register_vm(struct v3_guest * guest) {
69 for (i = 0; i < MAX_VMS; i++) {
70 if (guest_map[i] == NULL) {
81 static long v3_dev_ioctl(struct file * filp,
82 unsigned int ioctl, unsigned long arg) {
83 void __user * argp = (void __user *)arg;
84 DEBUG("V3 IOCTL %d\n", ioctl);
88 case V3_CREATE_GUEST:{
90 struct v3_guest_img user_image;
91 struct v3_guest * guest = palacios_alloc(sizeof(struct v3_guest));
94 ERROR("Palacios: Error allocating Kernel guest_image\n");
98 memset(guest, 0, sizeof(struct v3_guest));
100 INFO("Palacios: Creating V3 Guest...\n");
102 vm_minor = register_vm(guest);
104 if (vm_minor == -1) {
105 ERROR("Palacios Error: Too many VMs are currently running\n");
109 guest->vm_dev = MKDEV(v3_major_num, vm_minor);
111 if (copy_from_user(&user_image, argp, sizeof(struct v3_guest_img))) {
112 ERROR("Palacios Error: copy from user error getting guest image...\n");
116 guest->img_size = user_image.size;
118 DEBUG("Palacios: Allocating kernel memory for guest image (%llu bytes)\n", user_image.size);
119 guest->img = palacios_valloc(guest->img_size);
121 if (IS_ERR(guest->img)) {
122 ERROR("Palacios Error: Could not allocate space for guest image\n");
126 if (copy_from_user(guest->img, user_image.guest_data, guest->img_size)) {
127 ERROR("Palacios: Error loading guest data\n");
131 strncpy(guest->name, user_image.name, 127);
133 INIT_LIST_HEAD(&(guest->exts));
135 if (create_palacios_vm(guest) == -1) {
136 ERROR("Palacios: Error creating guest\n");
144 palacios_vfree(guest->img);
146 guest_map[vm_minor] = NULL;
148 palacios_free(guest);
154 case V3_FREE_GUEST: {
155 unsigned long vm_idx = arg;
156 struct v3_guest * guest;
158 if (vm_idx > MAX_VMS) {
159 ERROR("Invalid VM index: %ld\n", vm_idx);
163 guest = guest_map[vm_idx];
166 ERROR("No VM at index %ld\n",vm_idx);
170 INFO("Freeing VM (%s) (%p)\n", guest->name, guest);
172 if (free_palacios_vm(guest)<0) {
173 ERROR("Cannot free guest at index %ld\n",vm_idx);
177 guest_map[vm_idx] = NULL;
180 case V3_ADD_MEMORY: {
181 struct v3_mem_region mem;
183 memset(&mem, 0, sizeof(struct v3_mem_region));
185 if (copy_from_user(&mem, argp, sizeof(struct v3_mem_region))) {
186 ERROR("copy from user error getting mem_region...\n");
190 DEBUG("Adding %llu pages to Palacios memory\n", mem.num_pages);
192 if (add_palacios_memory(&mem) == -1) {
193 ERROR("Error adding memory to Palacios\n");
200 case V3_REMOVE_MEMORY: {
201 struct v3_mem_region mem;
203 memset(&mem, 0, sizeof(struct v3_mem_region));
205 if (copy_from_user(&mem, argp, sizeof(struct v3_mem_region))) {
206 ERROR("copy from user error getting mem_region...\n");
210 DEBUG("Removing memory at address %p\n", (void*)(mem.base_addr));
212 if (remove_palacios_memory(&mem) == -1) {
213 ERROR("Error removing memory from Palacios\n");
222 case V3_RESET_MEMORY: {
223 DEBUG("Resetting memory\n");
224 if (palacios_deinit_mm() == -1) {
225 ERROR("Error deiniting the Palacios memory manager\n");
228 if (palacios_init_mm()) {
229 ERROR("Error initing the Palacios memory manager\n");
236 struct global_ctrl * ctrl = get_global_ctrl(ioctl);
239 return ctrl->handler(ioctl, arg);
242 WARNING("\tUnhandled global ctrl cmd: %d\n", ioctl);
253 static struct file_operations v3_ctrl_fops = {
254 .owner = THIS_MODULE,
255 .unlocked_ioctl = v3_dev_ioctl,
256 .compat_ioctl = v3_dev_ioctl,
261 struct proc_dir_entry *palacios_get_procdir(void)
263 // INFO("Returning procdir=%p\n",palacios_proc_dir);
264 return palacios_proc_dir;
268 #define MAX_VCORES 256
269 #define MAX_REGIONS 1024
273 static int read_guests_details(struct seq_file *s, void *v)
277 struct v3_vm_base_state *base=0;
278 struct v3_vm_core_state *core=0;
279 struct v3_vm_mem_state *mem=0;
281 base = palacios_alloc(sizeof(struct v3_vm_base_state));
284 ERROR("No space for base state structure\n");
288 core = palacios_alloc(sizeof(struct v3_vm_core_state) + MAX_VCORES*sizeof(struct v3_vm_vcore_state));
291 ERROR("No space for core state structure\n");
295 mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + MAX_REGIONS*sizeof(struct v3_vm_mem_region));
298 ERROR("No space for memory state structure\n");
302 for(i = 0; i < MAX_VMS; i++) {
303 if (guest_map[i] != NULL) {
305 "---------------------------------------------------------------------------------------\n");
309 "Device: /dev/v3-vm%d\n",
310 i,guest_map[i]->name, i);
313 core->num_vcores=MAX_VCORES; // max we can handle
314 mem->num_regions=MAX_REGIONS; // max we can handle
316 if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) {
317 ERROR("Cannot get VM info\n");
318 seq_printf(s, "<unable to get data for this VM>\n");
324 base->state==V3_VM_INVALID ? "INVALID" :
325 base->state==V3_VM_RUNNING ? "running" :
326 base->state==V3_VM_STOPPED ? "stopped" :
327 base->state==V3_VM_PAUSED ? "paused" :
328 base->state==V3_VM_ERROR ? "ERROR" :
329 base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
332 seq_printf(s, "Core States\n");
334 for (j=0;j<core->num_vcores;j++) {
336 " vcore %u %s on pcore %lu %llu exits rip=0x%p %s %s %s\n",
338 core->vcore[j].state==V3_VCORE_INVALID ? "INVALID" :
339 core->vcore[j].state==V3_VCORE_RUNNING ? "running" :
340 core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" : "UNKNOWN",
341 core->vcore[j].pcore,
342 core->vcore[j].num_exits,
343 core->vcore[j].last_rip,
344 core->vcore[j].cpu_mode==V3_VCORE_CPU_REAL ? "real" :
345 core->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED ? "protected" :
346 core->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED_PAE ? "protectedpae" :
347 core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG ? "long" :
348 core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_32_COMPAT ? "long32" :
349 core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_16_COMPAT ? "long16" : "UNKNOWN",
350 core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_PHYSICAL ? "physical" :
351 core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_VIRTUAL ? "virtual" : "UNKNOWN",
352 core->vcore[j].mem_state==V3_VCORE_MEM_STATE_SHADOW ? "shadow" :
353 core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN");
356 seq_printf(s, "\nMemory Regions\n");
357 for (j=0;j<mem->num_regions;j++) {
358 seq_printf(s," region %u has HPAs 0x%p-0x%p (node %d)\n",
359 j, mem->region[j].host_paddr, mem->region[j].host_paddr+mem->region[j].size,
360 numa_addr_to_node((uintptr_t)(mem->region[j].host_paddr)));
364 "---------------------------------------------------------------------------------------\n");
370 if (mem) { palacios_free(mem); }
371 if (core) { palacios_free(core); }
372 if (base) { palacios_free(base); }
377 static int read_guests(struct seq_file *s, void *v)
380 struct v3_vm_base_state *base=0;
381 struct v3_vm_core_state *core=0;
382 struct v3_vm_mem_state *mem=0;
384 base = palacios_alloc(sizeof(struct v3_vm_base_state));
387 ERROR("No space for base state structure\n");
391 core = palacios_alloc(sizeof(struct v3_vm_core_state) + MAX_VCORES*sizeof(struct v3_vm_vcore_state));
394 ERROR("No space for core state structure\n");
398 mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + MAX_REGIONS*sizeof(struct v3_vm_mem_region));
401 ERROR("No space for memory state structure\n");
405 for(i = 0; i < MAX_VMS; i++) {
406 if (guest_map[i] != NULL) {
407 seq_printf(s,"%s\t/dev/v3-vm%d", guest_map[i]->name, i);
409 core->num_vcores=MAX_VCORES; // max we can handle
410 mem->num_regions=MAX_REGIONS; // max we can handle
412 if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) {
413 ERROR("Cannot get VM info\n");
414 seq_printf(s, "\t<unable to get data for this VM>\n");
416 seq_printf(s,"\t%s\t%lu vcores\t%lu regions\n",
417 base->state==V3_VM_INVALID ? "INVALID" :
418 base->state==V3_VM_RUNNING ? "running" :
419 base->state==V3_VM_STOPPED ? "stopped" :
420 base->state==V3_VM_PAUSED ? "paused" :
421 base->state==V3_VM_ERROR ? "ERROR" :
422 base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
431 if (mem) { palacios_free(mem); }
432 if (core) { palacios_free(core); }
433 if (base) { palacios_free(base); }
439 static int guests_short_proc_open(struct inode * inode, struct file * filp)
441 struct proc_dir_entry * proc_entry = PDE(inode);
442 return single_open(filp, read_guests, proc_entry->data);
445 static int guests_full_proc_open(struct inode * inode, struct file * filp)
447 struct proc_dir_entry * proc_entry = PDE(inode);
448 return single_open(filp, read_guests_details, proc_entry->data);
454 static struct file_operations guest_full_proc_ops = {
455 .owner = THIS_MODULE,
456 .open = guests_full_proc_open,
459 .release = single_release,
462 static struct file_operations guest_short_proc_ops = {
463 .owner = THIS_MODULE,
464 .open = guests_short_proc_open,
467 .release = single_release,
470 // Supply basic information that the user-space tools need
471 // to manipulate Palacios. The current use case here is to
472 // convey memory information
473 static int read_info(struct seq_file *s, void *v)
475 uint64_t mem_block_size;
478 seq_printf(s,"kernel MAX_ORDER:\t%d\n",MAX_ORDER);
479 seq_printf(s,"number of nodes:\t%d\n", numa_num_nodes());
480 seq_printf(s,"number of cpus: \t%d\n", num_online_cpus());
481 seq_printf(s,"\npalacios compiled mem_block_size:\t%d\n", V3_CONFIG_MEM_BLOCK_SIZE);
482 if (!v3_lookup_option("mem_block_size")) {
483 mem_block_size = V3_CONFIG_MEM_BLOCK_SIZE;
485 if (strict_strtoull(v3_lookup_option("mem_block_size"), 0, &mem_block_size)) {
490 seq_printf(s,"palacios run-time mem_block_size:\t%llu\n", mem_block_size);
492 seq_printf(s,"\nCPU to node mappings\n");
493 for (i=0;i<num_online_cpus();i++) {
494 seq_printf(s,"cpu %d -> node %d\n", i, numa_cpu_to_node(i));
495 if (numa_cpu_to_node(i)>max_node) {
496 max_node=numa_cpu_to_node(i);
499 seq_printf(s,"\nNode to node distances\n");
500 for (j=0;j<=max_node;j++) {
501 seq_printf(s," \t%2d", j);
504 for (i=0;i<=max_node;i++) {
505 seq_printf(s,"%2d ",i);
506 for (j=0;j<=max_node;j++) {
507 seq_printf(s,"\t%2d", numa_get_distance(i,j));
511 seq_printf(s,"\nCPU to CPU distances\n");
512 for (j=0;j<num_online_cpus();j++) {
513 seq_printf(s," \t%2d", j);
516 for (i=0;i<num_online_cpus();i++) {
517 seq_printf(s,"%2d ",i);
518 for (j=0;j<num_online_cpus();j++) {
519 seq_printf(s,"\t%2d", numa_get_distance(numa_cpu_to_node(i),numa_cpu_to_node(j)));
526 static int info_proc_open(struct inode * inode, struct file * filp)
528 struct proc_dir_entry * proc_entry = PDE(inode);
529 return single_open(filp, read_info, proc_entry->data);
534 static struct file_operations info_proc_ops = {
535 .owner = THIS_MODULE,
536 .open = info_proc_open,
539 .release = single_release,
543 static int __init v3_init(void) {
545 dev_t dev = MKDEV(0, 0); // We dynamicallly assign the major number
551 palacios_proc_dir = proc_mkdir("v3vee", NULL);
552 if (!palacios_proc_dir) {
553 ERROR("Could not create proc entry\n");
558 // this will populate the v3vee tree...
559 if (palacios_init_mm()) {
564 palacios_allow_devmem();
567 // numa is now a required interface and we need it
568 // up before primary initiatilization
569 palacios_init_numa();
571 // Initialize Palacios
572 palacios_vmm_init(options);
574 // initialize extensions
575 init_lnx_extensions();
578 v3_class = class_create(THIS_MODULE, "vms");
579 if (IS_ERR(v3_class)) {
580 ERROR("Failed to register V3 VM device class\n");
581 ret = PTR_ERR(v3_class);
585 INFO("intializing V3 Control device\n");
587 ret = alloc_chrdev_region(&dev, 0, MAX_VMS + 1, "v3vee");
590 ERROR("Error registering device region for V3 devices\n");
594 v3_major_num = MAJOR(dev);
596 dev = MKDEV(v3_major_num, MAX_VMS + 1);
599 DEBUG("Creating V3 Control device: Major %d, Minor %d\n", v3_major_num, MINOR(dev));
600 cdev_init(&ctrl_dev, &v3_ctrl_fops);
601 ctrl_dev.owner = THIS_MODULE;
602 ctrl_dev.ops = &v3_ctrl_fops;
603 cdev_add(&ctrl_dev, dev, 1);
605 device_create(v3_class, NULL, dev, NULL, "v3vee");
608 ERROR("Error adding v3 control device\n");
613 struct proc_dir_entry *entry;
615 entry = create_proc_entry("v3-guests", 0444, palacios_proc_dir);
617 entry->proc_fops = &guest_short_proc_ops;
618 INFO("/proc/v3vee/v3-guests successfully created\n");
620 ERROR("Could not create proc entry\n");
623 entry = create_proc_entry("v3-guests-details", 0444, palacios_proc_dir);
625 entry->proc_fops = &guest_full_proc_ops;
626 INFO("/proc/v3vee/v3-guests-details successfully created\n");
628 ERROR("Could not create proc entry\n");
632 entry = create_proc_entry("v3-info", 0444, palacios_proc_dir);
634 entry->proc_fops = &info_proc_ops;
635 INFO("/proc/v3vee/v3-info successfully created\n");
637 ERROR("Could not create proc entry\n");
647 remove_proc_entry("v3-guests-details", palacios_proc_dir);
649 remove_proc_entry("v3-guests", palacios_proc_dir);
651 device_destroy(v3_class, dev);
653 unregister_chrdev_region(MKDEV(v3_major_num, 0), MAX_VMS + 1);
655 class_destroy(v3_class);
658 palacios_restore_devmem();
660 palacios_deinit_mm();
662 remove_proc_entry("v3vee", NULL);
671 static void __exit v3_exit(void) {
672 extern u32 pg_allocs;
679 struct v3_guest * guest;
683 /* Stop and free any running VMs */
684 for (i = 0; i < MAX_VMS; i++) {
685 if (guest_map[i] != NULL) {
686 guest = (struct v3_guest *)guest_map[i];
688 if (v3_stop_vm(guest->v3_ctx) < 0)
689 ERROR("Couldn't stop VM %d\n", i);
691 free_palacios_vm(guest);
696 dev = MKDEV(v3_major_num, MAX_VMS + 1);
698 INFO("Removing V3 Control device\n");
703 palacios_deinit_numa();
705 DEBUG("Palacios Mallocs = %d, Frees = %d\n", mallocs, frees);
706 DEBUG("Palacios Vmallocs = %d, Vfrees = %d\n", vmallocs, vfrees);
707 DEBUG("Palacios Page Allocs = %d, Page Frees = %d\n", pg_allocs, pg_frees);
709 unregister_chrdev_region(MKDEV(v3_major_num, 0), MAX_VMS + 1);
713 device_destroy(v3_class, dev);
714 class_destroy(v3_class);
717 deinit_lnx_extensions();
720 palacios_restore_devmem();
723 palacios_deinit_mm();
725 remove_proc_entry("v3-info", palacios_proc_dir);
726 remove_proc_entry("v3-guests-details", palacios_proc_dir);
727 remove_proc_entry("v3-guests", palacios_proc_dir);
728 remove_proc_entry("v3vee", NULL);
730 DEBUG("Palacios Module Mallocs = %d, Frees = %d\n", mod_allocs, mod_frees);
738 module_init(v3_init);
739 module_exit(v3_exit);
743 void * trace_malloc(size_t size, gfp_t flags) {
747 addr = palacios_alloc_extended(size, flags, -1);
753 void trace_free(const void * objp) {
755 palacios_free((void*)objp);