Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Linux kernel compatability enhancements (through 3.19)
[palacios.git] / linux_module / main.c
index 7ec627b..b0dbfee 100644 (file)
@@ -3,7 +3,7 @@
    (c) Jack Lange, 2010
  */
 
-
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/errno.h>
 #include "palacios.h"
 #include "mm.h"
 #include "vm.h"
+#include "numa.h"
 #include "allow_devmem.h"
 #include "memcheck.h"
 #include "lockcheck.h"
 
 #include "linux-exts.h"
 
+#include "util-hashtable.h"
+
 
 MODULE_LICENSE("GPL");
 
@@ -63,6 +66,10 @@ static struct proc_dir_entry * palacios_proc_dir = NULL;
 struct class * v3_class = NULL;
 static struct cdev ctrl_dev;
 
+
+// mapping from thread ids to their resource control blocks
+struct hashtable *v3_thread_resource_map=0;
+
 static int register_vm(struct v3_guest * guest) {
     int i = 0;
 
@@ -90,7 +97,7 @@ static long v3_dev_ioctl(struct file * filp,
            struct v3_guest_img user_image;
            struct v3_guest * guest = palacios_alloc(sizeof(struct v3_guest));
 
-           if (IS_ERR(guest)) {
+           if (!(guest)) {
                ERROR("Palacios: Error allocating Kernel guest_image\n");
                return -EFAULT;
            }
@@ -116,9 +123,10 @@ static long v3_dev_ioctl(struct file * filp,
            guest->img_size = user_image.size;
 
            DEBUG("Palacios: Allocating kernel memory for guest image (%llu bytes)\n", user_image.size);
+           // overflow possible here, but only if guest image is probably to large for kernel anyway...
            guest->img = palacios_valloc(guest->img_size);
 
-           if (IS_ERR(guest->img)) {
+           if (!guest->img) {
                ERROR("Palacios Error: Could not allocate space for guest image\n");
                goto out_err1;
            }
@@ -128,7 +136,8 @@ static long v3_dev_ioctl(struct file * filp,
                goto out_err2;
            }      
 
-           strncpy(guest->name, user_image.name, 127);
+           strncpy(guest->name, user_image.name, 128);
+           guest->name[127] = 0;
 
            INIT_LIST_HEAD(&(guest->exts));
 
@@ -155,7 +164,7 @@ out_err:
            unsigned long vm_idx = arg;
             struct v3_guest * guest;
 
-            if (vm_idx > MAX_VMS) {
+            if (vm_idx >= MAX_VMS) {
                 ERROR("Invalid VM index: %ld\n", vm_idx);
                 return -1;
             }
@@ -265,42 +274,53 @@ struct proc_dir_entry *palacios_get_procdir(void)
 }
 
 
-#define MAX_VCORES  256
+#define MAX_CORES   1024
 #define MAX_REGIONS 1024
-
-
+#define MIN(x,y) ((x)<(y) ? (x) : (y))
 
 static int read_guests_details(struct seq_file *s, void *v)
 {
     unsigned int i = 0;
     unsigned int j = 0;
+    uint64_t num_vcores, num_regions;
+    uint64_t alloc_num_vcores, alloc_num_regions;
     struct v3_vm_base_state *base=0;
     struct v3_vm_core_state *core=0;
     struct v3_vm_mem_state *mem=0;
 
-    base = palacios_alloc(sizeof(struct v3_vm_base_state));
+
+    base = palacios_valloc(sizeof(struct v3_vm_base_state));
     
+
     if (!base) { 
       ERROR("No space for base state structure\n");
       goto out;
     }
 
-    core = palacios_alloc(sizeof(struct v3_vm_core_state) + MAX_VCORES*sizeof(struct v3_vm_vcore_state));
-    
-    if (!core) { 
-       ERROR("No space for core state structure\n");
-       goto out;
-    }
+
+    for(i = 0; i < MAX_VMS; i++) {
+
+       if (guest_map[i] != NULL) {
+           
+           v3_get_state_sizes_vm(guest_map[i]->v3_ctx,&num_vcores,&num_regions);
+
+           alloc_num_vcores = MIN(num_vcores,MAX_CORES);
+           alloc_num_regions = MIN(num_regions,MAX_REGIONS);
+
+           core = palacios_valloc(sizeof(struct v3_vm_core_state) + alloc_num_vcores*sizeof(struct v3_vm_vcore_state));
+           
+           if (!core) { 
+               ERROR("No space for core state structure\n");
+               goto out;
+           }
     
-    mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + MAX_REGIONS*sizeof(struct v3_vm_mem_region));
+           mem = palacios_valloc(sizeof(struct v3_vm_mem_state) + alloc_num_regions*sizeof(struct v3_vm_mem_region));
     
-    if (!mem) { 
-       ERROR("No space for memory state structure\n");
-       goto out;
-    }
+           if (!mem) { 
+               ERROR("No space for memory state structure\n");
+               goto out;
+           }
     
-    for(i = 0; i < MAX_VMS; i++) {
-       if (guest_map[i] != NULL) {
            seq_printf(s,
                       "---------------------------------------------------------------------------------------\n");
            seq_printf(s, 
@@ -310,34 +330,45 @@ static int read_guests_details(struct seq_file *s, void *v)
                       i,guest_map[i]->name, i);
            
            // Get extended data
-           core->num_vcores=MAX_VCORES; // max we can handle
-           mem->num_regions=MAX_REGIONS; // max we can handle
+           core->num_vcores=alloc_num_vcores;
+           mem->num_regions=alloc_num_regions;
            
            if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) {
                ERROR("Cannot get VM info\n");
                seq_printf(s, "<unable to get data for this VM>\n");
            } else {
                seq_printf(s, 
+                          "Type:         %s\n"
                           "State:        %s\n"
-                          "Cores:        %lu\n"
-                          "Regions:      %lu\n\n",
+                          "Cores:        %llu (%llu shown)\n"
+                          "Regions:      %llu (%llu shown)\n"
+                          "Memsize:      %llu (%llu ROS)\n\n",
+                          base->vm_type==V3_VM_GENERAL ? "general" :
+                          base->vm_type==V3_VM_HVM ? "HVM" : "UNKNOWN",
                           base->state==V3_VM_INVALID ? "INVALID" :
                           base->state==V3_VM_RUNNING ? "running" :
                           base->state==V3_VM_STOPPED ? "stopped" :
                           base->state==V3_VM_PAUSED ? "paused" :
                           base->state==V3_VM_ERROR ? "ERROR" :
-                          base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
+                          base->state==V3_VM_SIMULATING ? "simulating" : 
+                          base->state==V3_VM_RESETTING ? "resetting"  : "UNKNOWN",
+                          num_vcores,
                           core->num_vcores,
-                          mem->num_regions);
+                          num_regions,
+                          mem->num_regions,
+                          mem->mem_size,
+                          mem->ros_mem_size);
+
                seq_printf(s, "Core States\n");
                
                for (j=0;j<core->num_vcores;j++) {
                    seq_printf(s,
-                              "   vcore %u %s on pcore %lu %llu exits rip=0x%p %s %s %s\n",
+                              "   vcore %u %s on pcore %lu %llu exits rip=0x%p %s %s %s %s\n",
                               j, 
                               core->vcore[j].state==V3_VCORE_INVALID ? "INVALID" :
                               core->vcore[j].state==V3_VCORE_RUNNING ? "running" :
-                              core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" : "UNKNOWN",
+                              core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" :
+                              core->vcore[j].state==V3_VCORE_RESETTING ? "resetting" : "UNKNOWN",
                               core->vcore[j].pcore,
                               core->vcore[j].num_exits,
                               core->vcore[j].last_rip,
@@ -350,25 +381,39 @@ static int read_guests_details(struct seq_file *s, void *v)
                               core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_PHYSICAL ? "physical" :
                               core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_VIRTUAL ? "virtual" : "UNKNOWN",
                               core->vcore[j].mem_state==V3_VCORE_MEM_STATE_SHADOW ? "shadow" :
-                              core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN");
+                              core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN",
+                              core->vcore[j].vcore_type==V3_VCORE_GENERAL ? "" :
+                              core->vcore[j].vcore_type==V3_VCORE_ROS ? "ros" :
+                              core->vcore[j].vcore_type==V3_VCORE_HRT ? "hrt" : "UNKNOWN");
                }
 
+
                seq_printf(s, "\nMemory Regions\n");
                for (j=0;j<mem->num_regions;j++) { 
-                   seq_printf(s,"   region %u has HPAs 0x%p-0x%p\n",
-                              j, mem->region[j].host_paddr, mem->region[j].host_paddr+mem->region[j].size);
+                   seq_printf(s,"   region %u has HPAs 0x%016llx-0x%016llx (node %d) GPA 0x%016llx %s %s\n",
+                              j, (uint64_t)mem->region[j].host_paddr, (uint64_t)mem->region[j].host_paddr+mem->region[j].size,
+                              numa_addr_to_node((uintptr_t)(mem->region[j].host_paddr)),
+                              (uint64_t)mem->region[j].guest_paddr,
+                              mem->region[j].swapped ? "swapped" : "",
+                              mem->region[j].pinned ? "pinned" : "");
                }
+
            }
            seq_printf(s,
                       "---------------------------------------------------------------------------------------\n");
+
+           palacios_vfree(mem); mem=0;
+           palacios_vfree(core); core=0;
+
        }
+
     }
     
     
  out:
-    if (mem) { palacios_free(mem); }
-    if (core) { palacios_free(core); }
-    if (base) { palacios_free(base); }
+    if (mem) { palacios_vfree(mem); } // dead code but kept for clarity
+    if (core) { palacios_vfree(core); }
+    if (base) { palacios_vfree(base); }
     
     return 0;
 }
@@ -379,57 +424,69 @@ static int read_guests(struct seq_file *s, void *v)
     struct v3_vm_base_state *base=0;
     struct v3_vm_core_state *core=0;
     struct v3_vm_mem_state *mem=0;
+    uint64_t num_vcores, num_regions;
+
+
+    INFO("READ GUEST\n");
     
-    base = palacios_alloc(sizeof(struct v3_vm_base_state));
+    base = palacios_valloc(sizeof(struct v3_vm_base_state));
     
     if (!base) { 
       ERROR("No space for base state structure\n");
       goto out;
     }
 
-    core = palacios_alloc(sizeof(struct v3_vm_core_state) + MAX_VCORES*sizeof(struct v3_vm_vcore_state));
+    core = palacios_valloc(sizeof(struct v3_vm_core_state));
     
     if (!core) { 
        ERROR("No space for core state structure\n");
        goto out;
     }
     
-    mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + MAX_REGIONS*sizeof(struct v3_vm_mem_region));
+    mem = palacios_valloc(sizeof(struct v3_vm_mem_state));
     
     if (!mem) { 
        ERROR("No space for memory state structure\n");
        goto out;
     }
     
+
     for(i = 0; i < MAX_VMS; i++) {
        if (guest_map[i] != NULL) {
+
+           v3_get_state_sizes_vm(guest_map[i]->v3_ctx,&num_vcores,&num_regions);
+
            seq_printf(s,"%s\t/dev/v3-vm%d", guest_map[i]->name, i);
-           // Get extended data
-           core->num_vcores=MAX_VCORES; // max we can handle
-           mem->num_regions=MAX_REGIONS; // max we can handle
+
+           // Skip getting per core and per-region 
+           core->num_vcores=0;
+           mem->num_regions=0;
            
            if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) {
                ERROR("Cannot get VM info\n");
                seq_printf(s, "\t<unable to get data for this VM>\n");
            } else {
-               seq_printf(s,"\t%s\t%lu vcores\t%lu regions\n",
+               seq_printf(s,"\t%s\t%llu vcores\t%llu regions\t%llu mem\t%s\n",
                           base->state==V3_VM_INVALID ? "INVALID" :
                           base->state==V3_VM_RUNNING ? "running" :
                           base->state==V3_VM_STOPPED ? "stopped" :
                           base->state==V3_VM_PAUSED ? "paused" :
                           base->state==V3_VM_ERROR ? "ERROR" :
                           base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
-                          core->num_vcores,
-                          mem->num_regions);
+                          num_vcores,
+                          num_regions,
+                          mem->mem_size,
+                          base->vm_type == V3_VM_GENERAL ? "general" :
+                          base->vm_type == V3_VM_HVM ? "hvm" : "UNKNOWN");
            }
        }
     }
        
        
  out:
-    if (mem) { palacios_free(mem); }
-    if (core) { palacios_free(core); }
-    if (base) { palacios_free(base); }
+    if (mem) { palacios_vfree(mem); }
+    if (core) { palacios_vfree(core); }
+    if (base) { palacios_vfree(base); }
     
     return 0;
 }
@@ -437,18 +494,17 @@ static int read_guests(struct seq_file *s, void *v)
 
 static int guests_short_proc_open(struct inode * inode, struct file * filp) 
 {
-    struct proc_dir_entry * proc_entry = PDE(inode);
-    return single_open(filp, read_guests, proc_entry->data);
+    return single_open(filp, read_guests, PAL_PROC_GETDATA(inode));
 }
 
 static int guests_full_proc_open(struct inode * inode, struct file * filp) 
 {
-    struct proc_dir_entry * proc_entry = PDE(inode);
-    return single_open(filp, read_guests_details, proc_entry->data);
+    return single_open(filp, read_guests_details, PAL_PROC_GETDATA(inode));
 }
 
 
 
+
 static struct file_operations guest_full_proc_ops = {
     .owner = THIS_MODULE,
     .open = guests_full_proc_open, 
@@ -465,6 +521,87 @@ static struct file_operations guest_short_proc_ops = {
     .release = single_release,
 };
 
+// Supply basic information that the user-space tools need
+// to manipulate Palacios.   The current use case here is to 
+// convey memory information
+static int read_info(struct seq_file *s, void *v)
+{
+    uint64_t mem_block_size;
+    int i,j;
+    int max_node=-1;
+    seq_printf(s,"kernel MAX_ORDER:\t%d\n",MAX_ORDER);
+    seq_printf(s,"number of nodes:\t%d\n", numa_num_nodes());
+    seq_printf(s,"number of cpus: \t%d\n", num_online_cpus());
+    seq_printf(s,"\npalacios compiled mem_block_size:\t%d\n", V3_CONFIG_MEM_BLOCK_SIZE);
+    if (!v3_lookup_option("mem_block_size")) { 
+       mem_block_size = V3_CONFIG_MEM_BLOCK_SIZE;
+    } else {
+       if (strict_strtoull(v3_lookup_option("mem_block_size"), 0, &mem_block_size)) {
+           // huh?
+           mem_block_size=-1;
+       }
+    }
+    seq_printf(s,"palacios run-time mem_block_size:\t%llu\n", mem_block_size);
+    
+    seq_printf(s,"\nCPU to node mappings\n");
+    for (i=0;i<num_online_cpus();i++) { 
+       seq_printf(s,"cpu %d -> node %d\n", i, numa_cpu_to_node(i));
+       if (numa_cpu_to_node(i)>max_node) { 
+           max_node=numa_cpu_to_node(i);
+       }
+    }
+    seq_printf(s,"\nNode to node distances\n");
+    for (j=0;j<=max_node;j++) { 
+       seq_printf(s,"   \t%2d", j);
+    }
+    seq_printf(s,"\n");
+    for (i=0;i<=max_node;i++) { 
+       seq_printf(s,"%2d ",i);
+       for (j=0;j<=max_node;j++) { 
+           seq_printf(s,"\t%2d", numa_get_distance(i,j));
+       }
+       seq_printf(s,"\n");
+    }
+    seq_printf(s,"\nCPU to CPU distances\n");
+    for (j=0;j<num_online_cpus();j++) { 
+       seq_printf(s,"   \t%2d", j);
+    }
+    seq_printf(s,"\n");
+    for (i=0;i<num_online_cpus();i++) { 
+       seq_printf(s,"%2d ",i);
+       for (j=0;j<num_online_cpus();j++) { 
+           seq_printf(s,"\t%2d", numa_get_distance(numa_cpu_to_node(i),numa_cpu_to_node(j)));
+       }
+       seq_printf(s,"\n");
+    }
+    return 0;
+}
+
+static int info_proc_open(struct inode * inode, struct file * filp) 
+{
+    return single_open(filp, read_info, PAL_PROC_GETDATA(inode));
+}
+
+
+
+static struct file_operations info_proc_ops = {
+    .owner = THIS_MODULE,
+    .open = info_proc_open, 
+    .read = seq_read,
+    .llseek = seq_lseek, 
+    .release = single_release,
+};
+
+
+static inline uint_t thr_hash_func(addr_t key)
+{
+    return palacios_hash_long((long)key,64);
+}
+
+static inline int thr_hash_comp(addr_t k1, addr_t k2)
+{
+    return k1==k2;
+}
 
 static int __init v3_init(void) {
 
@@ -474,6 +611,13 @@ static int __init v3_init(void) {
     LOCKCHECK_INIT();
     MEMCHECK_INIT();
 
+
+    if (!(v3_thread_resource_map = palacios_create_htable(MAX_THREADS,thr_hash_func,thr_hash_comp))) { 
+       ERROR("Could not create thread/resource map\n");
+       ret = -1;
+       goto failure0;
+    }
+
     palacios_proc_dir = proc_mkdir("v3vee", NULL);
     if (!palacios_proc_dir) {
        ERROR("Could not create proc entry\n");
@@ -490,6 +634,10 @@ static int __init v3_init(void) {
       palacios_allow_devmem();
     }
 
+    // numa is now a required interface and we need it
+    // up before primary initiatilization
+    palacios_init_numa();
+
     // Initialize Palacios
     palacios_vmm_init(options);
 
@@ -498,7 +646,7 @@ static int __init v3_init(void) {
 
 
     v3_class = class_create(THIS_MODULE, "vms");
-    if (IS_ERR(v3_class)) {
+    if (!v3_class || IS_ERR(v3_class)) {
        ERROR("Failed to register V3 VM device class\n");
        ret =  PTR_ERR(v3_class);
        goto failure3;
@@ -534,30 +682,38 @@ static int __init v3_init(void) {
     {
        struct proc_dir_entry *entry;
 
-       entry = create_proc_entry("v3-guests", 0444, palacios_proc_dir);
-        if (entry) {
-           entry->proc_fops = &guest_short_proc_ops;
-           INFO("/proc/v3vee/v3-guests successfully created\n");
-       } else {
-           ERROR("Could not create proc entry\n");
-           goto failure6;
-       }
-       entry = create_proc_entry("v3-guests-details", 0444, palacios_proc_dir);
-        if (entry) {
-           entry->proc_fops = &guest_full_proc_ops;
-           INFO("/proc/v3vee/v3-guests-details successfully created\n");
-       } else {
-           ERROR("Could not create proc entry\n");
-           goto failure7;
-       }
+#define PALPROC(ent, success, error, out_target, fname, perm, parent, fops) \
+    PAL_PROC_CREATE(ent, fname, perm, parent, fops); \
+    if (ent) {                                    \
+        INFO(success);                            \
+    } else {                                      \
+        ERROR(error);                             \
+        goto out_target;                          \
+    }
+
+    PALPROC(entry, "/proc/v3vee/v3-guests succesfully created\n", 
+            "Could not create proc entry\n", failure6,
+            "v3-guests", 0444, palacios_proc_dir, &guest_short_proc_ops);
+
+    PALPROC(entry, "/proc/v3vee/v3-guests-details successfully created\n",
+            "Could not create proc entry\n", failure7, 
+            "v3-guests-details", 0444, palacios_proc_dir, &guest_full_proc_ops);
+
+    PALPROC(entry, "/proc/v3vee/v3-info successfully created\n", 
+            "Could not create proc entry\n", failure8, 
+            "v3-info", 0444, palacios_proc_dir, &info_proc_ops);
+
+
     }
        
     return 0;
 
- failure7:
+ failure8:
     remove_proc_entry("v3-guests-details", palacios_proc_dir);
- failure6:
+ failure7:
     remove_proc_entry("v3-guests", palacios_proc_dir);
+ failure6:
+    device_destroy(v3_class, dev);
  failure5:
     unregister_chrdev_region(MKDEV(v3_major_num, 0), MAX_VMS + 1);
  failure4:
@@ -570,6 +726,8 @@ static int __init v3_init(void) {
  failure2:
     remove_proc_entry("v3vee", NULL);
  failure1:   
+    palacios_free_htable(v3_thread_resource_map,0,0);
+ failure0:   
     MEMCHECK_DEINIT();
     LOCKCHECK_DEINIT();
 
@@ -591,8 +749,13 @@ static void __exit v3_exit(void) {
 
     /* Stop and free any running VMs */ 
     for (i = 0; i < MAX_VMS; i++) {
-       if (guest_map[i] != NULL) {
-                guest = (struct v3_guest *)guest_map[i];
+               if (guest_map[i] != NULL) {
+                   guest = (struct v3_guest *)(guest_map[i]);
+
+               if (!guest->v3_ctx) { 
+                   ERROR("Orphan VM detected and skipped: index=%d name=%s\n", i, guest->name);
+                   continue;
+               }
 
                 if (v3_stop_vm(guest->v3_ctx) < 0) 
                         ERROR("Couldn't stop VM %d\n", i);
@@ -609,6 +772,8 @@ static void __exit v3_exit(void) {
 
     palacios_vmm_exit();
 
+    palacios_deinit_numa();
+
     DEBUG("Palacios Mallocs = %d, Frees = %d\n", mallocs, frees);
     DEBUG("Palacios Vmallocs = %d, Vfrees = %d\n", vmallocs, vfrees);
     DEBUG("Palacios Page Allocs = %d, Page Frees = %d\n", pg_allocs, pg_frees);
@@ -629,12 +794,15 @@ static void __exit v3_exit(void) {
 
     palacios_deinit_mm();
 
+    remove_proc_entry("v3-info", palacios_proc_dir);
     remove_proc_entry("v3-guests-details", palacios_proc_dir);
     remove_proc_entry("v3-guests", palacios_proc_dir);
     remove_proc_entry("v3vee", NULL);
 
     DEBUG("Palacios Module Mallocs = %d, Frees = %d\n", mod_allocs, mod_frees);
     
+    palacios_free_htable(v3_thread_resource_map,0,0);
+
     MEMCHECK_DEINIT();
     LOCKCHECK_DEINIT();
 }