Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Expose HVM state to host + Linux host /proc additions for it
[palacios.git] / linux_module / main.c
index 228a1d3..723e5ea 100644 (file)
@@ -3,7 +3,7 @@
    (c) Jack Lange, 2010
  */
 
-
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/errno.h>
 #include <linux/kthread.h>
 
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 
 #include <palacios/vmm.h>
 
 #include "palacios.h"
 #include "mm.h"
 #include "vm.h"
+#include "numa.h"
 #include "allow_devmem.h"
 #include "memcheck.h"
 #include "lockcheck.h"
 
 #include "linux-exts.h"
 
-
 MODULE_LICENSE("GPL");
 
 // Module parameter
@@ -89,7 +90,7 @@ static long v3_dev_ioctl(struct file * filp,
            struct v3_guest_img user_image;
            struct v3_guest * guest = palacios_alloc(sizeof(struct v3_guest));
 
-           if (IS_ERR(guest)) {
+           if (!(guest)) {
                ERROR("Palacios: Error allocating Kernel guest_image\n");
                return -EFAULT;
            }
@@ -117,7 +118,7 @@ static long v3_dev_ioctl(struct file * filp,
            DEBUG("Palacios: Allocating kernel memory for guest image (%llu bytes)\n", user_image.size);
            guest->img = palacios_valloc(guest->img_size);
 
-           if (IS_ERR(guest->img)) {
+           if (!guest->img) {
                ERROR("Palacios Error: Could not allocate space for guest image\n");
                goto out_err1;
            }
@@ -196,11 +197,38 @@ out_err:
            break;
        }
 
+       case V3_REMOVE_MEMORY: {
+           struct v3_mem_region mem;
+           
+           memset(&mem, 0, sizeof(struct v3_mem_region));
+           
+           if (copy_from_user(&mem, argp, sizeof(struct v3_mem_region))) {
+               ERROR("copy from user error getting mem_region...\n");
+               return -EFAULT;
+           }
+
+           DEBUG("Removing memory at address %p\n", (void*)(mem.base_addr));
+
+           if (remove_palacios_memory(&mem) == -1) {
+               ERROR("Error removing memory from Palacios\n");
+               return -EFAULT;
+           }
+
+           break;
+       }
+           
+           
+
         case V3_RESET_MEMORY: {
-            if (palacios_init_mm() == -1) {
-                ERROR("Error resetting Palacios memory\n");
+           DEBUG("Resetting memory\n");
+            if (palacios_deinit_mm() == -1) {
+                ERROR("Error deiniting the Palacios memory manager\n");
                 return -EFAULT;
             }
+           if (palacios_init_mm()) { 
+               ERROR("Error initing the Palacios memory manager\n");
+               return -EFAULT;
+           }
             break;  
         }
 
@@ -238,13 +266,15 @@ struct proc_dir_entry *palacios_get_procdir(void)
 
 
 #define MAX_VCORES  256
-#define MAX_REGIONS 256
+#define MAX_REGIONS 1024
+
+
 
-static int read_guests(char * buf, char ** start, off_t off, int count,
-                      int * eof, void * data)
+static int read_guests_details(struct seq_file *s, void *v)
 {
-    int len = 0;
     unsigned int i = 0;
+    unsigned int j = 0;
+    uint64_t num_vcores, num_regions;
     struct v3_vm_base_state *base=0;
     struct v3_vm_core_state *core=0;
     struct v3_vm_mem_state *mem=0;
@@ -256,130 +286,287 @@ static int read_guests(char * buf, char ** start, off_t off, int count,
       goto out;
     }
 
-    core = palacios_alloc(sizeof(struct v3_vm_core_state) + MAX_VCORES*sizeof(struct v3_vm_vcore_state));
+    for(i = 0; i < MAX_VMS; i++) {
+
+       if (guest_map[i] != NULL) {
+           
+           v3_get_state_sizes_vm(guest_map[i]->v3_ctx,&num_vcores,&num_regions);
+
+           core = palacios_alloc(sizeof(struct v3_vm_core_state) + num_vcores*sizeof(struct v3_vm_vcore_state));
+           
+           if (!core) { 
+               ERROR("No space for core state structure\n");
+               goto out;
+           }
     
-    if (!core) { 
-      ERROR("No space for core state structure\n");
+           mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + num_regions*sizeof(struct v3_vm_mem_region));
+    
+           if (!mem) { 
+               ERROR("No space for memory state structure\n");
+               goto out;
+           }
+    
+           seq_printf(s,
+                      "---------------------------------------------------------------------------------------\n");
+           seq_printf(s, 
+                      "Entry:        %d\n"
+                      "Name:         %s\n"
+                      "Device:       /dev/v3-vm%d\n", 
+                      i,guest_map[i]->name, i);
+           
+           // Get extended data
+           core->num_vcores=num_vcores;
+           mem->num_regions=num_regions;
+           
+           if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) {
+               ERROR("Cannot get VM info\n");
+               seq_printf(s, "<unable to get data for this VM>\n");
+           } else {
+               seq_printf(s, 
+                          "Type:         %s\n"
+                          "State:        %s\n"
+                          "Cores:        %llu\n"
+                          "Regions:      %llu\n"
+                          "Memsize:      %llu (%llu ROS)\n\n",
+                          base->vm_type==V3_VM_GENERAL ? "general" :
+                          base->vm_type==V3_VM_HVM ? "HVM" : "UNKNOWN",
+                          base->state==V3_VM_INVALID ? "INVALID" :
+                          base->state==V3_VM_RUNNING ? "running" :
+                          base->state==V3_VM_STOPPED ? "stopped" :
+                          base->state==V3_VM_PAUSED ? "paused" :
+                          base->state==V3_VM_ERROR ? "ERROR" :
+                          base->state==V3_VM_SIMULATING ? "simulating" : 
+                          base->state==V3_VM_RESETTING ? "resetting"  : "UNKNOWN",
+                          core->num_vcores,
+                          mem->num_regions,
+                          mem->mem_size,
+                          mem->ros_mem_size);
+
+               seq_printf(s, "Core States\n");
+               
+               for (j=0;j<core->num_vcores;j++) {
+                   seq_printf(s,
+                              "   vcore %u %s on pcore %lu %llu exits rip=0x%p %s %s %s %s\n",
+                              j, 
+                              core->vcore[j].state==V3_VCORE_INVALID ? "INVALID" :
+                              core->vcore[j].state==V3_VCORE_RUNNING ? "running" :
+                              core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" :
+                              core->vcore[j].state==V3_VCORE_RESETTING ? "resetting" : "UNKNOWN",
+                              core->vcore[j].pcore,
+                              core->vcore[j].num_exits,
+                              core->vcore[j].last_rip,
+                              core->vcore[j].cpu_mode==V3_VCORE_CPU_REAL ? "real" :
+                              core->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED ? "protected" :
+                              core->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED_PAE ? "protectedpae" :
+                              core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG ? "long" :
+                              core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_32_COMPAT ? "long32" :
+                              core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_16_COMPAT ? "long16" : "UNKNOWN",
+                              core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_PHYSICAL ? "physical" :
+                              core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_VIRTUAL ? "virtual" : "UNKNOWN",
+                              core->vcore[j].mem_state==V3_VCORE_MEM_STATE_SHADOW ? "shadow" :
+                              core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN",
+                              core->vcore[j].vcore_type==V3_VCORE_GENERAL ? "" :
+                              core->vcore[j].vcore_type==V3_VCORE_ROS ? "ros" :
+                              core->vcore[j].vcore_type==V3_VCORE_HRT ? "hrt" : "UNKNOWN");
+               }
+
+               seq_printf(s, "\nMemory Regions\n");
+               for (j=0;j<mem->num_regions;j++) { 
+                   seq_printf(s,"   region %u has HPAs 0x%016llx-0x%016llx (node %d) GPA 0x%016llx %s %s\n",
+                              j, (uint64_t)mem->region[j].host_paddr, (uint64_t)mem->region[j].host_paddr+mem->region[j].size,
+                              numa_addr_to_node((uintptr_t)(mem->region[j].host_paddr)),
+                              (uint64_t)mem->region[j].guest_paddr,
+                              mem->region[j].swapped ? "swapped" : "",
+                              mem->region[j].pinned ? "pinned" : "");
+               }
+           }
+           seq_printf(s,
+                      "---------------------------------------------------------------------------------------\n");
+
+           palacios_free(mem); mem=0;
+           palacios_free(core); core=0;
+
+       }
+
+    }
+    
+    
+ out:
+    if (mem) { palacios_free(mem); }
+    if (core) { palacios_free(core); }
+    if (base) { palacios_free(base); }
+    
+    return 0;
+}
+
+static int read_guests(struct seq_file *s, void *v)
+{
+    unsigned int i = 0;
+    struct v3_vm_base_state *base=0;
+    struct v3_vm_core_state *core=0;
+    struct v3_vm_mem_state *mem=0;
+    
+    base = palacios_alloc(sizeof(struct v3_vm_base_state));
+    
+    if (!base) { 
+      ERROR("No space for base state structure\n");
       goto out;
     }
 
+    core = palacios_alloc(sizeof(struct v3_vm_core_state) + MAX_VCORES*sizeof(struct v3_vm_vcore_state));
+    
+    if (!core) { 
+       ERROR("No space for core state structure\n");
+       goto out;
+    }
+    
     mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + MAX_REGIONS*sizeof(struct v3_vm_mem_region));
     
     if (!mem) { 
-      ERROR("No space for memory state structure\n");
-      goto out;
+       ERROR("No space for memory state structure\n");
+       goto out;
     }
-
+    
     for(i = 0; i < MAX_VMS; i++) {
-      if (guest_map[i] != NULL) {
-       if (len>=count) { 
-         goto out;
-       } else {
-         len += snprintf(buf+len, count-len,
-                         "%s\t/dev/v3-vm%d ", 
-                         guest_map[i]->name, i);
-         
-         if (len>=count) { 
-           *(buf+len-1)='\n';
-           goto out;
-         } else {
+       if (guest_map[i] != NULL) {
+           seq_printf(s,"%s\t/dev/v3-vm%d", guest_map[i]->name, i);
            // Get extended data
            core->num_vcores=MAX_VCORES; // max we can handle
            mem->num_regions=MAX_REGIONS; // max we can handle
+           
            if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) {
-             ERROR("Cannot get VM info\n");
-             *(buf+len-1)='\n';
-             goto out;
+               ERROR("Cannot get VM info\n");
+               seq_printf(s, "\t<unable to get data for this VM>\n");
            } else {
-             unsigned long j;
-
-             len+=snprintf(buf+len, count-len,
-                           "%s %lu regions [ ", 
-                           base->state==V3_VM_INVALID ? "INVALID" :
-                           base->state==V3_VM_RUNNING ? "running" :
-                           base->state==V3_VM_STOPPED ? "stopped" :
-                           base->state==V3_VM_PAUSED ? "paused" :
-                           base->state==V3_VM_ERROR ? "ERROR" :
-                           base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
-                           mem->num_regions);
-
-             if (len>=count) { 
-               *(buf+len-1)='\n';
-               goto out;
-             }
-
-             for (j=0;j<mem->num_regions;j++) { 
-                 len+=snprintf(buf+len, count-len,
-                               "(region %lu 0x%p-0x%p) ",
-                               j, mem->region[j].host_paddr, mem->region[j].host_paddr+mem->region[j].size);
-                 if (len>=count) { 
-                     *(buf+len-1)='\n';
-                     goto out;
-                 }
-             }
-                 
-             len+=snprintf(buf+len, count-len,
-                           "] %lu vcores [ ", 
-                           core->num_vcores);
-
-             if (len>=count) { 
-               *(buf+len-1)='\n';
-               goto out;
-             }
-                 
-             for (j=0;j<core->num_vcores;j++) {
-               len+=snprintf(buf+len, count-len,
-                             "(vcore %lu %s on pcore %lu %llu exits rip=0x%p %s %s %s) ",
-                             j, 
-                             core->vcore[j].state==V3_VCORE_INVALID ? "INVALID" :
-                             core->vcore[j].state==V3_VCORE_RUNNING ? "running" :
-                             core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" : "UNKNOWN",
-                             core->vcore[j].pcore,
-                             core->vcore[j].num_exits,
-                             core->vcore[j].last_rip,
-                             core->vcore[j].cpu_mode==V3_VCORE_CPU_REAL ? "real" :
-                             core->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED ? "protected" :
-                             core->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED_PAE ? "protectedpae" :
-                             core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG ? "long" :
-                             core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_32_COMPAT ? "long32" :
-                             core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_16_COMPAT ? "long16" : "UNKNOWN",
-                             core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_PHYSICAL ? "physical" :
-                             core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_VIRTUAL ? "virtual" : "UNKNOWN",
-                             core->vcore[j].mem_state==V3_VCORE_MEM_STATE_SHADOW ? "shadow" :
-                             core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN");
-               if (len>=count) {
-                   *(buf+len-1)='\n';
-                   goto out;
-               }
-             }
-
-             len+=snprintf(buf+len, count-len,
-                           "] ");
-
-             if (len>=count) { 
-               *(buf+len-1)='\n';
-               goto out;
-             }
-                 
-             *(buf+len-1)='\n';
-
+               seq_printf(s,"\t%s\t%llu vcores\t%llu regions\t%llu mem\t%s\n",
+                          base->state==V3_VM_INVALID ? "INVALID" :
+                          base->state==V3_VM_RUNNING ? "running" :
+                          base->state==V3_VM_STOPPED ? "stopped" :
+                          base->state==V3_VM_PAUSED ? "paused" :
+                          base->state==V3_VM_ERROR ? "ERROR" :
+                          base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
+                          core->num_vcores,
+                          mem->num_regions,
+                          mem->mem_size,
+                          base->vm_type == V3_VM_GENERAL ? "general" :
+                          base->vm_type == V3_VM_HVM ? "hvm" : "UNKNOWN");
            }
-         }
        }
-      }
     }
+       
+       
  out:
     if (mem) { palacios_free(mem); }
     if (core) { palacios_free(core); }
     if (base) { palacios_free(base); }
+    
+    return 0;
+}
+
+
+static int guests_short_proc_open(struct inode * inode, struct file * filp) 
+{
+    struct proc_dir_entry * proc_entry = PDE(inode);
+    return single_open(filp, read_guests, proc_entry->data);
+}
+
+static int guests_full_proc_open(struct inode * inode, struct file * filp) 
+{
+    struct proc_dir_entry * proc_entry = PDE(inode);
+    return single_open(filp, read_guests_details, proc_entry->data);
+}
+
+
+
+
+static struct file_operations guest_full_proc_ops = {
+    .owner = THIS_MODULE,
+    .open = guests_full_proc_open, 
+    .read = seq_read,
+    .llseek = seq_lseek, 
+    .release = single_release,
+};
+
+static struct file_operations guest_short_proc_ops = {
+    .owner = THIS_MODULE,
+    .open = guests_short_proc_open, 
+    .read = seq_read,
+    .llseek = seq_lseek, 
+    .release = single_release,
+};
 
-    return len;
+// Supply basic information that the user-space tools need
+// to manipulate Palacios.   The current use case here is to 
+// convey memory information
+static int read_info(struct seq_file *s, void *v)
+{
+    uint64_t mem_block_size;
+    int i,j;
+    int max_node=-1;
+    seq_printf(s,"kernel MAX_ORDER:\t%d\n",MAX_ORDER);
+    seq_printf(s,"number of nodes:\t%d\n", numa_num_nodes());
+    seq_printf(s,"number of cpus: \t%d\n", num_online_cpus());
+    seq_printf(s,"\npalacios compiled mem_block_size:\t%d\n", V3_CONFIG_MEM_BLOCK_SIZE);
+    if (!v3_lookup_option("mem_block_size")) { 
+       mem_block_size = V3_CONFIG_MEM_BLOCK_SIZE;
+    } else {
+       if (strict_strtoull(v3_lookup_option("mem_block_size"), 0, &mem_block_size)) {
+           // huh?
+           mem_block_size=-1;
+       }
+    }
+    seq_printf(s,"palacios run-time mem_block_size:\t%llu\n", mem_block_size);
+    
+    seq_printf(s,"\nCPU to node mappings\n");
+    for (i=0;i<num_online_cpus();i++) { 
+       seq_printf(s,"cpu %d -> node %d\n", i, numa_cpu_to_node(i));
+       if (numa_cpu_to_node(i)>max_node) { 
+           max_node=numa_cpu_to_node(i);
+       }
+    }
+    seq_printf(s,"\nNode to node distances\n");
+    for (j=0;j<=max_node;j++) { 
+       seq_printf(s,"   \t%2d", j);
+    }
+    seq_printf(s,"\n");
+    for (i=0;i<=max_node;i++) { 
+       seq_printf(s,"%2d ",i);
+       for (j=0;j<=max_node;j++) { 
+           seq_printf(s,"\t%2d", numa_get_distance(i,j));
+       }
+       seq_printf(s,"\n");
+    }
+    seq_printf(s,"\nCPU to CPU distances\n");
+    for (j=0;j<num_online_cpus();j++) { 
+       seq_printf(s,"   \t%2d", j);
+    }
+    seq_printf(s,"\n");
+    for (i=0;i<num_online_cpus();i++) { 
+       seq_printf(s,"%2d ",i);
+       for (j=0;j<num_online_cpus();j++) { 
+           seq_printf(s,"\t%2d", numa_get_distance(numa_cpu_to_node(i),numa_cpu_to_node(j)));
+       }
+       seq_printf(s,"\n");
+    }
+    return 0;
 }
 
+static int info_proc_open(struct inode * inode, struct file * filp) 
+{
+    struct proc_dir_entry * proc_entry = PDE(inode);
+    return single_open(filp, read_info, proc_entry->data);
+}
 
 
 
+static struct file_operations info_proc_ops = {
+    .owner = THIS_MODULE,
+    .open = info_proc_open, 
+    .read = seq_read,
+    .llseek = seq_lseek, 
+    .release = single_release,
+};
+
 
 static int __init v3_init(void) {
 
@@ -405,6 +592,10 @@ static int __init v3_init(void) {
       palacios_allow_devmem();
     }
 
+    // numa is now a required interface and we need it
+    // up before primary initiatilization
+    palacios_init_numa();
+
     // Initialize Palacios
     palacios_vmm_init(options);
 
@@ -413,7 +604,7 @@ static int __init v3_init(void) {
 
 
     v3_class = class_create(THIS_MODULE, "vms");
-    if (IS_ERR(v3_class)) {
+    if (!v3_class || IS_ERR(v3_class)) {
        ERROR("Failed to register V3 VM device class\n");
        ret =  PTR_ERR(v3_class);
        goto failure3;
@@ -449,20 +640,43 @@ static int __init v3_init(void) {
     {
        struct proc_dir_entry *entry;
 
-       //INFO("palacios_proc_dir=%p before v3-guests\n",palacios_proc_dir);
-       entry = create_proc_read_entry("v3-guests", 0444, palacios_proc_dir, read_guests, NULL);
+       entry = create_proc_entry("v3-guests", 0444, palacios_proc_dir);
         if (entry) {
+           entry->proc_fops = &guest_short_proc_ops;
            INFO("/proc/v3vee/v3-guests successfully created\n");
        } else {
            ERROR("Could not create proc entry\n");
            goto failure6;
        }
+       entry = create_proc_entry("v3-guests-details", 0444, palacios_proc_dir);
+        if (entry) {
+           entry->proc_fops = &guest_full_proc_ops;
+           INFO("/proc/v3vee/v3-guests-details successfully created\n");
+       } else {
+           ERROR("Could not create proc entry\n");
+           goto failure7;
+       }
+
+       entry = create_proc_entry("v3-info", 0444, palacios_proc_dir);
+        if (entry) {
+           entry->proc_fops = &info_proc_ops;
+           INFO("/proc/v3vee/v3-info successfully created\n");
+       } else {
+           ERROR("Could not create proc entry\n");
+           goto failure8;
+       }
+
+
     }
        
     return 0;
 
- failure6:
+ failure8:
+    remove_proc_entry("v3-guests-details", palacios_proc_dir);
+ failure7:
     remove_proc_entry("v3-guests", palacios_proc_dir);
+ failure6:
+    device_destroy(v3_class, dev);
  failure5:
     unregister_chrdev_region(MKDEV(v3_major_num, 0), MAX_VMS + 1);
  failure4:
@@ -496,8 +710,13 @@ static void __exit v3_exit(void) {
 
     /* Stop and free any running VMs */ 
     for (i = 0; i < MAX_VMS; i++) {
-       if (guest_map[i] != NULL) {
-                guest = (struct v3_guest *)guest_map[i];
+               if (guest_map[i] != NULL) {
+                   guest = (struct v3_guest *)(guest_map[i]);
+
+               if (!guest->v3_ctx) { 
+                   ERROR("Orphan VM detected and skipped: index=%d name=%s\n", i, guest->name);
+                   continue;
+               }
 
                 if (v3_stop_vm(guest->v3_ctx) < 0) 
                         ERROR("Couldn't stop VM %d\n", i);
@@ -514,6 +733,8 @@ static void __exit v3_exit(void) {
 
     palacios_vmm_exit();
 
+    palacios_deinit_numa();
+
     DEBUG("Palacios Mallocs = %d, Frees = %d\n", mallocs, frees);
     DEBUG("Palacios Vmallocs = %d, Vfrees = %d\n", vmallocs, vfrees);
     DEBUG("Palacios Page Allocs = %d, Page Frees = %d\n", pg_allocs, pg_frees);
@@ -534,6 +755,8 @@ static void __exit v3_exit(void) {
 
     palacios_deinit_mm();
 
+    remove_proc_entry("v3-info", palacios_proc_dir);
+    remove_proc_entry("v3-guests-details", palacios_proc_dir);
     remove_proc_entry("v3-guests", palacios_proc_dir);
     remove_proc_entry("v3vee", NULL);