X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=linux_module%2Fmain.c;h=723e5ead320bfca9705cecd1f70c95ca278c6250;hb=6b9abb54ebafd8266f1711b803ccb027675a465f;hp=296d01bbc6171a60c1196cd18f537d00109b1957;hpb=298a05652b5704f9881af0683e3f16fc4cd03959;p=palacios.git diff --git a/linux_module/main.c b/linux_module/main.c index 296d01b..723e5ea 100644 --- a/linux_module/main.c +++ b/linux_module/main.c @@ -3,7 +3,7 @@ (c) Jack Lange, 2010 */ - +#include #include #include #include @@ -20,17 +20,20 @@ #include #include +#include #include #include "palacios.h" #include "mm.h" #include "vm.h" +#include "numa.h" +#include "allow_devmem.h" +#include "memcheck.h" +#include "lockcheck.h" #include "linux-exts.h" - - MODULE_LICENSE("GPL"); // Module parameter @@ -39,18 +42,23 @@ int cpu_list_len = 0; module_param_array(cpu_list, int, &cpu_list_len, 0644); MODULE_PARM_DESC(cpu_list, "Comma-delimited list of CPUs that Palacios will run on"); +static int allow_devmem = 0; +module_param(allow_devmem, int, 0); +MODULE_PARM_DESC(allow_devmem, "Allow general user-space /dev/mem access even if kernel is strict"); + // Palacios options parameter static char *options; module_param(options, charp, 0); MODULE_PARM_DESC(options, "Generic options to internal Palacios modules"); + int mod_allocs = 0; int mod_frees = 0; static int v3_major_num = 0; static struct v3_guest * guest_map[MAX_VMS] = {[0 ... MAX_VMS - 1] = 0}; -static struct proc_dir_entry *dir = 0; +static struct proc_dir_entry * palacios_proc_dir = NULL; struct class * v3_class = NULL; static struct cdev ctrl_dev; @@ -82,7 +90,7 @@ static long v3_dev_ioctl(struct file * filp, struct v3_guest_img user_image; struct v3_guest * guest = palacios_alloc(sizeof(struct v3_guest)); - if (IS_ERR(guest)) { + if (!(guest)) { ERROR("Palacios: Error allocating Kernel guest_image\n"); return -EFAULT; } @@ -108,9 +116,9 @@ static long v3_dev_ioctl(struct file * filp, guest->img_size = user_image.size; DEBUG("Palacios: Allocating kernel memory for guest image (%llu bytes)\n", user_image.size); - guest->img = vmalloc(guest->img_size); + guest->img = palacios_valloc(guest->img_size); - if (IS_ERR(guest->img)) { + if (!guest->img) { ERROR("Palacios Error: Could not allocate space for guest image\n"); goto out_err1; } @@ -133,7 +141,7 @@ static long v3_dev_ioctl(struct file * filp, out_err2: - vfree(guest->img); + palacios_vfree(guest->img); out_err1: guest_map[vm_minor] = NULL; out_err: @@ -181,7 +189,7 @@ out_err: DEBUG("Adding %llu pages to Palacios memory\n", mem.num_pages); - if (add_palacios_memory(mem.base_addr, mem.num_pages) == -1) { + if (add_palacios_memory(&mem) == -1) { ERROR("Error adding memory to Palacios\n"); return -EFAULT; } @@ -189,11 +197,38 @@ out_err: break; } + case V3_REMOVE_MEMORY: { + struct v3_mem_region mem; + + memset(&mem, 0, sizeof(struct v3_mem_region)); + + if (copy_from_user(&mem, argp, sizeof(struct v3_mem_region))) { + ERROR("copy from user error getting mem_region...\n"); + return -EFAULT; + } + + DEBUG("Removing memory at address %p\n", (void*)(mem.base_addr)); + + if (remove_palacios_memory(&mem) == -1) { + ERROR("Error removing memory from Palacios\n"); + return -EFAULT; + } + + break; + } + + + case V3_RESET_MEMORY: { - if (palacios_init_mm() == -1) { - ERROR("Error resetting Palacios memory\n"); + DEBUG("Resetting memory\n"); + if (palacios_deinit_mm() == -1) { + ERROR("Error deiniting the Palacios memory manager\n"); return -EFAULT; } + if (palacios_init_mm()) { + ERROR("Error initing the Palacios memory manager\n"); + return -EFAULT; + } break; } @@ -225,132 +260,354 @@ static struct file_operations v3_ctrl_fops = { struct proc_dir_entry *palacios_get_procdir(void) { - return dir; + // INFO("Returning procdir=%p\n",palacios_proc_dir); + return palacios_proc_dir; } -#define MAX_VCORES 32 +#define MAX_VCORES 256 +#define MAX_REGIONS 1024 -static int read_guests(char * buf, char ** start, off_t off, int count, - int * eof, void * data) + + +static int read_guests_details(struct seq_file *s, void *v) { - int len = 0; unsigned int i = 0; + unsigned int j = 0; + uint64_t num_vcores, num_regions; + struct v3_vm_base_state *base=0; + struct v3_vm_core_state *core=0; + struct v3_vm_mem_state *mem=0; - struct v3_vm_state *s =palacios_alloc(sizeof(struct v3_vm_state)+MAX_VCORES*sizeof(struct v3_vcore_state)); + base = palacios_alloc(sizeof(struct v3_vm_base_state)); - if (!s) { - ERROR("No space for state structure\n"); + if (!base) { + ERROR("No space for base state structure\n"); goto out; } - + for(i = 0; i < MAX_VMS; i++) { - if (guest_map[i] != NULL) { - if (len>=count) { - goto out; - } else { - len += snprintf(buf+len, count-len, - "%s\t/dev/v3-vm%d ", - guest_map[i]->name, i); - - if (len>=count) { - *(buf+len-1)='\n'; - goto out; - } else { + + if (guest_map[i] != NULL) { + + v3_get_state_sizes_vm(guest_map[i]->v3_ctx,&num_vcores,&num_regions); + + core = palacios_alloc(sizeof(struct v3_vm_core_state) + num_vcores*sizeof(struct v3_vm_vcore_state)); + + if (!core) { + ERROR("No space for core state structure\n"); + goto out; + } + + mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + num_regions*sizeof(struct v3_vm_mem_region)); + + if (!mem) { + ERROR("No space for memory state structure\n"); + goto out; + } + + seq_printf(s, + "---------------------------------------------------------------------------------------\n"); + seq_printf(s, + "Entry: %d\n" + "Name: %s\n" + "Device: /dev/v3-vm%d\n", + i,guest_map[i]->name, i); + // Get extended data - s->num_vcores=MAX_VCORES; // max we can handle - if (v3_get_state_vm(guest_map[i]->v3_ctx, s)) { - ERROR("Cannot get VM info\n"); - *(buf+len-1)='\n'; - goto out; + core->num_vcores=num_vcores; + mem->num_regions=num_regions; + + if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) { + ERROR("Cannot get VM info\n"); + seq_printf(s, "\n"); } else { - unsigned long j; - - len+=snprintf(buf+len, count-len, - "%s [0x%p-0x%p] %lu vcores ", - s->state==V3_VM_INVALID ? "INVALID" : - s->state==V3_VM_RUNNING ? "running" : - s->state==V3_VM_STOPPED ? "stopped" : - s->state==V3_VM_PAUSED ? "paused" : - s->state==V3_VM_ERROR ? "ERROR" : - s->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN", - s->mem_base_paddr, s->mem_base_paddr+s->mem_size-1, - s->num_vcores); - if (len>=count) { - *(buf+len-1)='\n'; - goto out; - } - for (j=0;jnum_vcores;j++) { - len+=snprintf(buf+len, count-len, - "[vcore %lu %s on pcore %lu %llu exits rip=0x%p %s %s %s] ", - j, - s->vcore[j].state==V3_VCORE_INVALID ? "INVALID" : - s->vcore[j].state==V3_VCORE_RUNNING ? "running" : - s->vcore[j].state==V3_VCORE_STOPPED ? "stopped" : "UNKNOWN", - s->vcore[j].pcore, - s->vcore[j].num_exits, - s->vcore[j].last_rip, - s->vcore[j].cpu_mode==V3_VCORE_CPU_REAL ? "real" : - s->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED ? "protected" : - s->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED_PAE ? "protectedpae" : - s->vcore[j].cpu_mode==V3_VCORE_CPU_LONG ? "long" : - s->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_32_COMPAT ? "long32" : - s->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_16_COMPAT ? "long16" : "UNKNOWN", - s->vcore[j].mem_mode==V3_VCORE_MEM_MODE_PHYSICAL ? "physical" : - s->vcore[j].mem_mode==V3_VCORE_MEM_MODE_VIRTUAL ? "virtual" : "UNKNOWN", - s->vcore[j].mem_state==V3_VCORE_MEM_STATE_SHADOW ? "shadow" : - s->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN"); - if (len>=count) { - *(buf+len-1)='\n'; - goto out; + seq_printf(s, + "Type: %s\n" + "State: %s\n" + "Cores: %llu\n" + "Regions: %llu\n" + "Memsize: %llu (%llu ROS)\n\n", + base->vm_type==V3_VM_GENERAL ? "general" : + base->vm_type==V3_VM_HVM ? "HVM" : "UNKNOWN", + base->state==V3_VM_INVALID ? "INVALID" : + base->state==V3_VM_RUNNING ? "running" : + base->state==V3_VM_STOPPED ? "stopped" : + base->state==V3_VM_PAUSED ? "paused" : + base->state==V3_VM_ERROR ? "ERROR" : + base->state==V3_VM_SIMULATING ? "simulating" : + base->state==V3_VM_RESETTING ? "resetting" : "UNKNOWN", + core->num_vcores, + mem->num_regions, + mem->mem_size, + mem->ros_mem_size); + + seq_printf(s, "Core States\n"); + + for (j=0;jnum_vcores;j++) { + seq_printf(s, + " vcore %u %s on pcore %lu %llu exits rip=0x%p %s %s %s %s\n", + j, + core->vcore[j].state==V3_VCORE_INVALID ? "INVALID" : + core->vcore[j].state==V3_VCORE_RUNNING ? "running" : + core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" : + core->vcore[j].state==V3_VCORE_RESETTING ? "resetting" : "UNKNOWN", + core->vcore[j].pcore, + core->vcore[j].num_exits, + core->vcore[j].last_rip, + core->vcore[j].cpu_mode==V3_VCORE_CPU_REAL ? "real" : + core->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED ? "protected" : + core->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED_PAE ? "protectedpae" : + core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG ? "long" : + core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_32_COMPAT ? "long32" : + core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_16_COMPAT ? "long16" : "UNKNOWN", + core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_PHYSICAL ? "physical" : + core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_VIRTUAL ? "virtual" : "UNKNOWN", + core->vcore[j].mem_state==V3_VCORE_MEM_STATE_SHADOW ? "shadow" : + core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN", + core->vcore[j].vcore_type==V3_VCORE_GENERAL ? "" : + core->vcore[j].vcore_type==V3_VCORE_ROS ? "ros" : + core->vcore[j].vcore_type==V3_VCORE_HRT ? "hrt" : "UNKNOWN"); + } + + seq_printf(s, "\nMemory Regions\n"); + for (j=0;jnum_regions;j++) { + seq_printf(s," region %u has HPAs 0x%016llx-0x%016llx (node %d) GPA 0x%016llx %s %s\n", + j, (uint64_t)mem->region[j].host_paddr, (uint64_t)mem->region[j].host_paddr+mem->region[j].size, + numa_addr_to_node((uintptr_t)(mem->region[j].host_paddr)), + (uint64_t)mem->region[j].guest_paddr, + mem->region[j].swapped ? "swapped" : "", + mem->region[j].pinned ? "pinned" : ""); } - } + } + seq_printf(s, + "---------------------------------------------------------------------------------------\n"); + + palacios_free(mem); mem=0; + palacios_free(core); core=0; + + } + + } + + + out: + if (mem) { palacios_free(mem); } + if (core) { palacios_free(core); } + if (base) { palacios_free(base); } + + return 0; +} - *(buf+len-1)='\n'; +static int read_guests(struct seq_file *s, void *v) +{ + unsigned int i = 0; + struct v3_vm_base_state *base=0; + struct v3_vm_core_state *core=0; + struct v3_vm_mem_state *mem=0; + + base = palacios_alloc(sizeof(struct v3_vm_base_state)); + + if (!base) { + ERROR("No space for base state structure\n"); + goto out; + } + core = palacios_alloc(sizeof(struct v3_vm_core_state) + MAX_VCORES*sizeof(struct v3_vm_vcore_state)); + + if (!core) { + ERROR("No space for core state structure\n"); + goto out; + } + + mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + MAX_REGIONS*sizeof(struct v3_vm_mem_region)); + + if (!mem) { + ERROR("No space for memory state structure\n"); + goto out; + } + + for(i = 0; i < MAX_VMS; i++) { + if (guest_map[i] != NULL) { + seq_printf(s,"%s\t/dev/v3-vm%d", guest_map[i]->name, i); + // Get extended data + core->num_vcores=MAX_VCORES; // max we can handle + mem->num_regions=MAX_REGIONS; // max we can handle + + if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) { + ERROR("Cannot get VM info\n"); + seq_printf(s, "\t\n"); + } else { + seq_printf(s,"\t%s\t%llu vcores\t%llu regions\t%llu mem\t%s\n", + base->state==V3_VM_INVALID ? "INVALID" : + base->state==V3_VM_RUNNING ? "running" : + base->state==V3_VM_STOPPED ? "stopped" : + base->state==V3_VM_PAUSED ? "paused" : + base->state==V3_VM_ERROR ? "ERROR" : + base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN", + core->num_vcores, + mem->num_regions, + mem->mem_size, + base->vm_type == V3_VM_GENERAL ? "general" : + base->vm_type == V3_VM_HVM ? "hvm" : "UNKNOWN"); } - } } - } } - + + out: - if (s) { palacios_free(s); } + if (mem) { palacios_free(mem); } + if (core) { palacios_free(core); } + if (base) { palacios_free(base); } + + return 0; +} + - return len; +static int guests_short_proc_open(struct inode * inode, struct file * filp) +{ + struct proc_dir_entry * proc_entry = PDE(inode); + return single_open(filp, read_guests, proc_entry->data); } -static int show_mem(char * buf, char ** start, off_t off, int count, - int * eof, void * data) +static int guests_full_proc_open(struct inode * inode, struct file * filp) { - int len = 0; - - len = snprintf(buf,count, "%p\n", (void *)get_palacios_base_addr()); - len += snprintf(buf+len,count-len, "%lld\n", get_palacios_num_pages()); + struct proc_dir_entry * proc_entry = PDE(inode); + return single_open(filp, read_guests_details, proc_entry->data); +} + + + + +static struct file_operations guest_full_proc_ops = { + .owner = THIS_MODULE, + .open = guests_full_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct file_operations guest_short_proc_ops = { + .owner = THIS_MODULE, + .open = guests_short_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +// Supply basic information that the user-space tools need +// to manipulate Palacios. The current use case here is to +// convey memory information +static int read_info(struct seq_file *s, void *v) +{ + uint64_t mem_block_size; + int i,j; + int max_node=-1; + seq_printf(s,"kernel MAX_ORDER:\t%d\n",MAX_ORDER); + seq_printf(s,"number of nodes:\t%d\n", numa_num_nodes()); + seq_printf(s,"number of cpus: \t%d\n", num_online_cpus()); + seq_printf(s,"\npalacios compiled mem_block_size:\t%d\n", V3_CONFIG_MEM_BLOCK_SIZE); + if (!v3_lookup_option("mem_block_size")) { + mem_block_size = V3_CONFIG_MEM_BLOCK_SIZE; + } else { + if (strict_strtoull(v3_lookup_option("mem_block_size"), 0, &mem_block_size)) { + // huh? + mem_block_size=-1; + } + } + seq_printf(s,"palacios run-time mem_block_size:\t%llu\n", mem_block_size); - return len; + seq_printf(s,"\nCPU to node mappings\n"); + for (i=0;i node %d\n", i, numa_cpu_to_node(i)); + if (numa_cpu_to_node(i)>max_node) { + max_node=numa_cpu_to_node(i); + } + } + seq_printf(s,"\nNode to node distances\n"); + for (j=0;j<=max_node;j++) { + seq_printf(s," \t%2d", j); + } + seq_printf(s,"\n"); + for (i=0;i<=max_node;i++) { + seq_printf(s,"%2d ",i); + for (j=0;j<=max_node;j++) { + seq_printf(s,"\t%2d", numa_get_distance(i,j)); + } + seq_printf(s,"\n"); + } + seq_printf(s,"\nCPU to CPU distances\n"); + for (j=0;jdata); +} + + + +static struct file_operations info_proc_ops = { + .owner = THIS_MODULE, + .open = info_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int __init v3_init(void) { + dev_t dev = MKDEV(0, 0); // We dynamicallly assign the major number int ret = 0; + LOCKCHECK_INIT(); + MEMCHECK_INIT(); + + palacios_proc_dir = proc_mkdir("v3vee", NULL); + if (!palacios_proc_dir) { + ERROR("Could not create proc entry\n"); + ret = -1; + goto failure1; + } + + // this will populate the v3vee tree... + if (palacios_init_mm()) { + goto failure2; + } + + if (allow_devmem) { + palacios_allow_devmem(); + } - palacios_init_mm(); + // numa is now a required interface and we need it + // up before primary initiatilization + palacios_init_numa(); // Initialize Palacios palacios_vmm_init(options); - // initialize extensions init_lnx_extensions(); v3_class = class_create(THIS_MODULE, "vms"); - if (IS_ERR(v3_class)) { + if (!v3_class || IS_ERR(v3_class)) { ERROR("Failed to register V3 VM device class\n"); - return PTR_ERR(v3_class); + ret = PTR_ERR(v3_class); + goto failure3; } INFO("intializing V3 Control device\n"); @@ -359,7 +616,7 @@ static int __init v3_init(void) { if (ret < 0) { ERROR("Error registering device region for V3 devices\n"); - goto failure2; + goto failure4; } v3_major_num = MAJOR(dev); @@ -377,41 +634,63 @@ static int __init v3_init(void) { if (ret != 0) { ERROR("Error adding v3 control device\n"); - goto failure1; + goto failure5; } - dir = proc_mkdir("v3vee", NULL); - if(dir) { + { struct proc_dir_entry *entry; - entry = create_proc_read_entry("v3-guests", 0444, dir, - read_guests, NULL); + entry = create_proc_entry("v3-guests", 0444, palacios_proc_dir); if (entry) { + entry->proc_fops = &guest_short_proc_ops; INFO("/proc/v3vee/v3-guests successfully created\n"); } else { ERROR("Could not create proc entry\n"); - goto failure1; + goto failure6; } - - entry = create_proc_read_entry("v3-mem", 0444, dir, - show_mem, NULL); - if (entry) { - INFO("/proc/v3vee/v3-mem successfully added\n"); + entry = create_proc_entry("v3-guests-details", 0444, palacios_proc_dir); + if (entry) { + entry->proc_fops = &guest_full_proc_ops; + INFO("/proc/v3vee/v3-guests-details successfully created\n"); } else { ERROR("Could not create proc entry\n"); - goto failure1; + goto failure7; } - } else { - ERROR("Could not create proc entry\n"); - goto failure1; + + entry = create_proc_entry("v3-info", 0444, palacios_proc_dir); + if (entry) { + entry->proc_fops = &info_proc_ops; + INFO("/proc/v3vee/v3-info successfully created\n"); + } else { + ERROR("Could not create proc entry\n"); + goto failure8; + } + + } return 0; - failure1: + failure8: + remove_proc_entry("v3-guests-details", palacios_proc_dir); + failure7: + remove_proc_entry("v3-guests", palacios_proc_dir); + failure6: + device_destroy(v3_class, dev); + failure5: unregister_chrdev_region(MKDEV(v3_major_num, 0), MAX_VMS + 1); - failure2: + failure4: class_destroy(v3_class); + failure3: + if (allow_devmem) { + palacios_restore_devmem(); + } + palacios_deinit_mm(); + failure2: + remove_proc_entry("v3vee", NULL); + failure1: + MEMCHECK_DEINIT(); + LOCKCHECK_DEINIT(); return ret; } @@ -422,6 +701,8 @@ static void __exit v3_exit(void) { extern u32 pg_frees; extern u32 mallocs; extern u32 frees; + extern u32 vmallocs; + extern u32 vfrees; int i = 0; struct v3_guest * guest; dev_t dev; @@ -429,8 +710,13 @@ static void __exit v3_exit(void) { /* Stop and free any running VMs */ for (i = 0; i < MAX_VMS; i++) { - if (guest_map[i] != NULL) { - guest = (struct v3_guest *)guest_map[i]; + if (guest_map[i] != NULL) { + guest = (struct v3_guest *)(guest_map[i]); + + if (!guest->v3_ctx) { + ERROR("Orphan VM detected and skipped: index=%d name=%s\n", i, guest->name); + continue; + } if (v3_stop_vm(guest->v3_ctx) < 0) ERROR("Couldn't stop VM %d\n", i); @@ -447,7 +733,10 @@ static void __exit v3_exit(void) { palacios_vmm_exit(); + palacios_deinit_numa(); + DEBUG("Palacios Mallocs = %d, Frees = %d\n", mallocs, frees); + DEBUG("Palacios Vmallocs = %d, Vfrees = %d\n", vmallocs, vfrees); DEBUG("Palacios Page Allocs = %d, Page Frees = %d\n", pg_allocs, pg_frees); unregister_chrdev_region(MKDEV(v3_major_num, 0), MAX_VMS + 1); @@ -460,13 +749,21 @@ static void __exit v3_exit(void) { deinit_lnx_extensions(); + if (allow_devmem) { + palacios_restore_devmem(); + } + palacios_deinit_mm(); - remove_proc_entry("v3-guests", dir); - remove_proc_entry("v3-mem", dir); + remove_proc_entry("v3-info", palacios_proc_dir); + remove_proc_entry("v3-guests-details", palacios_proc_dir); + remove_proc_entry("v3-guests", palacios_proc_dir); remove_proc_entry("v3vee", NULL); DEBUG("Palacios Module Mallocs = %d, Frees = %d\n", mod_allocs, mod_frees); + + MEMCHECK_DEINIT(); + LOCKCHECK_DEINIT(); } @@ -480,7 +777,7 @@ void * trace_malloc(size_t size, gfp_t flags) { void * addr = NULL; mod_allocs++; - addr = kmalloc(size, flags); + addr = palacios_alloc_extended(size, flags, -1); return addr; } @@ -488,5 +785,5 @@ void * trace_malloc(size_t size, gfp_t flags) { void trace_free(const void * objp) { mod_frees++; - kfree(objp); + palacios_free((void*)objp); }