Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Expose HVM state to host + Linux host /proc additions for it
[palacios.git] / linux_module / main.c
index 7ec627b..723e5ea 100644 (file)
@@ -3,7 +3,7 @@
    (c) Jack Lange, 2010
  */
 
-
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/errno.h>
 #include "palacios.h"
 #include "mm.h"
 #include "vm.h"
+#include "numa.h"
 #include "allow_devmem.h"
 #include "memcheck.h"
 #include "lockcheck.h"
 
 #include "linux-exts.h"
 
-
 MODULE_LICENSE("GPL");
 
 // Module parameter
@@ -90,7 +90,7 @@ static long v3_dev_ioctl(struct file * filp,
            struct v3_guest_img user_image;
            struct v3_guest * guest = palacios_alloc(sizeof(struct v3_guest));
 
-           if (IS_ERR(guest)) {
+           if (!(guest)) {
                ERROR("Palacios: Error allocating Kernel guest_image\n");
                return -EFAULT;
            }
@@ -118,7 +118,7 @@ static long v3_dev_ioctl(struct file * filp,
            DEBUG("Palacios: Allocating kernel memory for guest image (%llu bytes)\n", user_image.size);
            guest->img = palacios_valloc(guest->img_size);
 
-           if (IS_ERR(guest->img)) {
+           if (!guest->img) {
                ERROR("Palacios Error: Could not allocate space for guest image\n");
                goto out_err1;
            }
@@ -274,6 +274,7 @@ static int read_guests_details(struct seq_file *s, void *v)
 {
     unsigned int i = 0;
     unsigned int j = 0;
+    uint64_t num_vcores, num_regions;
     struct v3_vm_base_state *base=0;
     struct v3_vm_core_state *core=0;
     struct v3_vm_mem_state *mem=0;
@@ -285,22 +286,26 @@ static int read_guests_details(struct seq_file *s, void *v)
       goto out;
     }
 
-    core = palacios_alloc(sizeof(struct v3_vm_core_state) + MAX_VCORES*sizeof(struct v3_vm_vcore_state));
-    
-    if (!core) { 
-       ERROR("No space for core state structure\n");
-       goto out;
-    }
+    for(i = 0; i < MAX_VMS; i++) {
+
+       if (guest_map[i] != NULL) {
+           
+           v3_get_state_sizes_vm(guest_map[i]->v3_ctx,&num_vcores,&num_regions);
+
+           core = palacios_alloc(sizeof(struct v3_vm_core_state) + num_vcores*sizeof(struct v3_vm_vcore_state));
+           
+           if (!core) { 
+               ERROR("No space for core state structure\n");
+               goto out;
+           }
     
-    mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + MAX_REGIONS*sizeof(struct v3_vm_mem_region));
+           mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + num_regions*sizeof(struct v3_vm_mem_region));
     
-    if (!mem) { 
-       ERROR("No space for memory state structure\n");
-       goto out;
-    }
+           if (!mem) { 
+               ERROR("No space for memory state structure\n");
+               goto out;
+           }
     
-    for(i = 0; i < MAX_VMS; i++) {
-       if (guest_map[i] != NULL) {
            seq_printf(s,
                       "---------------------------------------------------------------------------------------\n");
            seq_printf(s, 
@@ -310,34 +315,43 @@ static int read_guests_details(struct seq_file *s, void *v)
                       i,guest_map[i]->name, i);
            
            // Get extended data
-           core->num_vcores=MAX_VCORES; // max we can handle
-           mem->num_regions=MAX_REGIONS; // max we can handle
+           core->num_vcores=num_vcores;
+           mem->num_regions=num_regions;
            
            if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) {
                ERROR("Cannot get VM info\n");
                seq_printf(s, "<unable to get data for this VM>\n");
            } else {
                seq_printf(s, 
+                          "Type:         %s\n"
                           "State:        %s\n"
-                          "Cores:        %lu\n"
-                          "Regions:      %lu\n\n",
+                          "Cores:        %llu\n"
+                          "Regions:      %llu\n"
+                          "Memsize:      %llu (%llu ROS)\n\n",
+                          base->vm_type==V3_VM_GENERAL ? "general" :
+                          base->vm_type==V3_VM_HVM ? "HVM" : "UNKNOWN",
                           base->state==V3_VM_INVALID ? "INVALID" :
                           base->state==V3_VM_RUNNING ? "running" :
                           base->state==V3_VM_STOPPED ? "stopped" :
                           base->state==V3_VM_PAUSED ? "paused" :
                           base->state==V3_VM_ERROR ? "ERROR" :
-                          base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
+                          base->state==V3_VM_SIMULATING ? "simulating" : 
+                          base->state==V3_VM_RESETTING ? "resetting"  : "UNKNOWN",
                           core->num_vcores,
-                          mem->num_regions);
+                          mem->num_regions,
+                          mem->mem_size,
+                          mem->ros_mem_size);
+
                seq_printf(s, "Core States\n");
                
                for (j=0;j<core->num_vcores;j++) {
                    seq_printf(s,
-                              "   vcore %u %s on pcore %lu %llu exits rip=0x%p %s %s %s\n",
+                              "   vcore %u %s on pcore %lu %llu exits rip=0x%p %s %s %s %s\n",
                               j, 
                               core->vcore[j].state==V3_VCORE_INVALID ? "INVALID" :
                               core->vcore[j].state==V3_VCORE_RUNNING ? "running" :
-                              core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" : "UNKNOWN",
+                              core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" :
+                              core->vcore[j].state==V3_VCORE_RESETTING ? "resetting" : "UNKNOWN",
                               core->vcore[j].pcore,
                               core->vcore[j].num_exits,
                               core->vcore[j].last_rip,
@@ -350,18 +364,30 @@ static int read_guests_details(struct seq_file *s, void *v)
                               core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_PHYSICAL ? "physical" :
                               core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_VIRTUAL ? "virtual" : "UNKNOWN",
                               core->vcore[j].mem_state==V3_VCORE_MEM_STATE_SHADOW ? "shadow" :
-                              core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN");
+                              core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN",
+                              core->vcore[j].vcore_type==V3_VCORE_GENERAL ? "" :
+                              core->vcore[j].vcore_type==V3_VCORE_ROS ? "ros" :
+                              core->vcore[j].vcore_type==V3_VCORE_HRT ? "hrt" : "UNKNOWN");
                }
 
                seq_printf(s, "\nMemory Regions\n");
                for (j=0;j<mem->num_regions;j++) { 
-                   seq_printf(s,"   region %u has HPAs 0x%p-0x%p\n",
-                              j, mem->region[j].host_paddr, mem->region[j].host_paddr+mem->region[j].size);
+                   seq_printf(s,"   region %u has HPAs 0x%016llx-0x%016llx (node %d) GPA 0x%016llx %s %s\n",
+                              j, (uint64_t)mem->region[j].host_paddr, (uint64_t)mem->region[j].host_paddr+mem->region[j].size,
+                              numa_addr_to_node((uintptr_t)(mem->region[j].host_paddr)),
+                              (uint64_t)mem->region[j].guest_paddr,
+                              mem->region[j].swapped ? "swapped" : "",
+                              mem->region[j].pinned ? "pinned" : "");
                }
            }
            seq_printf(s,
                       "---------------------------------------------------------------------------------------\n");
+
+           palacios_free(mem); mem=0;
+           palacios_free(core); core=0;
+
        }
+
     }
     
     
@@ -412,7 +438,7 @@ static int read_guests(struct seq_file *s, void *v)
                ERROR("Cannot get VM info\n");
                seq_printf(s, "\t<unable to get data for this VM>\n");
            } else {
-               seq_printf(s,"\t%s\t%lu vcores\t%lu regions\n",
+               seq_printf(s,"\t%s\t%llu vcores\t%llu regions\t%llu mem\t%s\n",
                           base->state==V3_VM_INVALID ? "INVALID" :
                           base->state==V3_VM_RUNNING ? "running" :
                           base->state==V3_VM_STOPPED ? "stopped" :
@@ -420,7 +446,10 @@ static int read_guests(struct seq_file *s, void *v)
                           base->state==V3_VM_ERROR ? "ERROR" :
                           base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
                           core->num_vcores,
-                          mem->num_regions);
+                          mem->num_regions,
+                          mem->mem_size,
+                          base->vm_type == V3_VM_GENERAL ? "general" :
+                          base->vm_type == V3_VM_HVM ? "hvm" : "UNKNOWN");
            }
        }
     }
@@ -449,6 +478,7 @@ static int guests_full_proc_open(struct inode * inode, struct file * filp)
 
 
 
+
 static struct file_operations guest_full_proc_ops = {
     .owner = THIS_MODULE,
     .open = guests_full_proc_open, 
@@ -465,6 +495,78 @@ static struct file_operations guest_short_proc_ops = {
     .release = single_release,
 };
 
+// Supply basic information that the user-space tools need
+// to manipulate Palacios.   The current use case here is to 
+// convey memory information
+static int read_info(struct seq_file *s, void *v)
+{
+    uint64_t mem_block_size;
+    int i,j;
+    int max_node=-1;
+    seq_printf(s,"kernel MAX_ORDER:\t%d\n",MAX_ORDER);
+    seq_printf(s,"number of nodes:\t%d\n", numa_num_nodes());
+    seq_printf(s,"number of cpus: \t%d\n", num_online_cpus());
+    seq_printf(s,"\npalacios compiled mem_block_size:\t%d\n", V3_CONFIG_MEM_BLOCK_SIZE);
+    if (!v3_lookup_option("mem_block_size")) { 
+       mem_block_size = V3_CONFIG_MEM_BLOCK_SIZE;
+    } else {
+       if (strict_strtoull(v3_lookup_option("mem_block_size"), 0, &mem_block_size)) {
+           // huh?
+           mem_block_size=-1;
+       }
+    }
+    seq_printf(s,"palacios run-time mem_block_size:\t%llu\n", mem_block_size);
+    
+    seq_printf(s,"\nCPU to node mappings\n");
+    for (i=0;i<num_online_cpus();i++) { 
+       seq_printf(s,"cpu %d -> node %d\n", i, numa_cpu_to_node(i));
+       if (numa_cpu_to_node(i)>max_node) { 
+           max_node=numa_cpu_to_node(i);
+       }
+    }
+    seq_printf(s,"\nNode to node distances\n");
+    for (j=0;j<=max_node;j++) { 
+       seq_printf(s,"   \t%2d", j);
+    }
+    seq_printf(s,"\n");
+    for (i=0;i<=max_node;i++) { 
+       seq_printf(s,"%2d ",i);
+       for (j=0;j<=max_node;j++) { 
+           seq_printf(s,"\t%2d", numa_get_distance(i,j));
+       }
+       seq_printf(s,"\n");
+    }
+    seq_printf(s,"\nCPU to CPU distances\n");
+    for (j=0;j<num_online_cpus();j++) { 
+       seq_printf(s,"   \t%2d", j);
+    }
+    seq_printf(s,"\n");
+    for (i=0;i<num_online_cpus();i++) { 
+       seq_printf(s,"%2d ",i);
+       for (j=0;j<num_online_cpus();j++) { 
+           seq_printf(s,"\t%2d", numa_get_distance(numa_cpu_to_node(i),numa_cpu_to_node(j)));
+       }
+       seq_printf(s,"\n");
+    }
+    return 0;
+}
+
+static int info_proc_open(struct inode * inode, struct file * filp) 
+{
+    struct proc_dir_entry * proc_entry = PDE(inode);
+    return single_open(filp, read_info, proc_entry->data);
+}
+
+
+
+static struct file_operations info_proc_ops = {
+    .owner = THIS_MODULE,
+    .open = info_proc_open, 
+    .read = seq_read,
+    .llseek = seq_lseek, 
+    .release = single_release,
+};
+
 
 static int __init v3_init(void) {
 
@@ -490,6 +592,10 @@ static int __init v3_init(void) {
       palacios_allow_devmem();
     }
 
+    // numa is now a required interface and we need it
+    // up before primary initiatilization
+    palacios_init_numa();
+
     // Initialize Palacios
     palacios_vmm_init(options);
 
@@ -498,7 +604,7 @@ static int __init v3_init(void) {
 
 
     v3_class = class_create(THIS_MODULE, "vms");
-    if (IS_ERR(v3_class)) {
+    if (!v3_class || IS_ERR(v3_class)) {
        ERROR("Failed to register V3 VM device class\n");
        ret =  PTR_ERR(v3_class);
        goto failure3;
@@ -550,14 +656,27 @@ static int __init v3_init(void) {
            ERROR("Could not create proc entry\n");
            goto failure7;
        }
+
+       entry = create_proc_entry("v3-info", 0444, palacios_proc_dir);
+        if (entry) {
+           entry->proc_fops = &info_proc_ops;
+           INFO("/proc/v3vee/v3-info successfully created\n");
+       } else {
+           ERROR("Could not create proc entry\n");
+           goto failure8;
+       }
+
+
     }
        
     return 0;
 
- failure7:
+ failure8:
     remove_proc_entry("v3-guests-details", palacios_proc_dir);
- failure6:
+ failure7:
     remove_proc_entry("v3-guests", palacios_proc_dir);
+ failure6:
+    device_destroy(v3_class, dev);
  failure5:
     unregister_chrdev_region(MKDEV(v3_major_num, 0), MAX_VMS + 1);
  failure4:
@@ -591,8 +710,13 @@ static void __exit v3_exit(void) {
 
     /* Stop and free any running VMs */ 
     for (i = 0; i < MAX_VMS; i++) {
-       if (guest_map[i] != NULL) {
-                guest = (struct v3_guest *)guest_map[i];
+               if (guest_map[i] != NULL) {
+                   guest = (struct v3_guest *)(guest_map[i]);
+
+               if (!guest->v3_ctx) { 
+                   ERROR("Orphan VM detected and skipped: index=%d name=%s\n", i, guest->name);
+                   continue;
+               }
 
                 if (v3_stop_vm(guest->v3_ctx) < 0) 
                         ERROR("Couldn't stop VM %d\n", i);
@@ -609,6 +733,8 @@ static void __exit v3_exit(void) {
 
     palacios_vmm_exit();
 
+    palacios_deinit_numa();
+
     DEBUG("Palacios Mallocs = %d, Frees = %d\n", mallocs, frees);
     DEBUG("Palacios Vmallocs = %d, Vfrees = %d\n", vmallocs, vfrees);
     DEBUG("Palacios Page Allocs = %d, Page Frees = %d\n", pg_allocs, pg_frees);
@@ -629,6 +755,7 @@ static void __exit v3_exit(void) {
 
     palacios_deinit_mm();
 
+    remove_proc_entry("v3-info", palacios_proc_dir);
     remove_proc_entry("v3-guests-details", palacios_proc_dir);
     remove_proc_entry("v3-guests", palacios_proc_dir);
     remove_proc_entry("v3vee", NULL);