(c) Jack Lange, 2010
*/
-
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/errno.h>
#include <linux/kthread.h>
#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include <palacios/vmm.h>
#include "palacios.h"
#include "mm.h"
#include "vm.h"
+#include "numa.h"
#include "allow_devmem.h"
#include "memcheck.h"
#include "lockcheck.h"
#include "linux-exts.h"
-
MODULE_LICENSE("GPL");
// Module parameter
struct v3_guest_img user_image;
struct v3_guest * guest = palacios_alloc(sizeof(struct v3_guest));
- if (IS_ERR(guest)) {
+ if (!(guest)) {
ERROR("Palacios: Error allocating Kernel guest_image\n");
return -EFAULT;
}
DEBUG("Palacios: Allocating kernel memory for guest image (%llu bytes)\n", user_image.size);
guest->img = palacios_valloc(guest->img_size);
- if (IS_ERR(guest->img)) {
+ if (!guest->img) {
ERROR("Palacios Error: Could not allocate space for guest image\n");
goto out_err1;
}
break;
}
+ case V3_REMOVE_MEMORY: {
+ struct v3_mem_region mem;
+
+ memset(&mem, 0, sizeof(struct v3_mem_region));
+
+ if (copy_from_user(&mem, argp, sizeof(struct v3_mem_region))) {
+ ERROR("copy from user error getting mem_region...\n");
+ return -EFAULT;
+ }
+
+ DEBUG("Removing memory at address %p\n", (void*)(mem.base_addr));
+
+ if (remove_palacios_memory(&mem) == -1) {
+ ERROR("Error removing memory from Palacios\n");
+ return -EFAULT;
+ }
+
+ break;
+ }
+
+
+
case V3_RESET_MEMORY: {
- if (palacios_init_mm() == -1) {
- ERROR("Error resetting Palacios memory\n");
+ DEBUG("Resetting memory\n");
+ if (palacios_deinit_mm() == -1) {
+ ERROR("Error deiniting the Palacios memory manager\n");
return -EFAULT;
}
+ if (palacios_init_mm()) {
+ ERROR("Error initing the Palacios memory manager\n");
+ return -EFAULT;
+ }
break;
}
struct proc_dir_entry *palacios_get_procdir(void)
{
- INFO("Returning procdir=%p\n",palacios_proc_dir);
+ // INFO("Returning procdir=%p\n",palacios_proc_dir);
return palacios_proc_dir;
}
#define MAX_VCORES 256
-#define MAX_REGIONS 256
+#define MAX_REGIONS 1024
-static int read_guests(char * buf, char ** start, off_t off, int count,
- int * eof, void * data)
+
+
+static int read_guests_details(struct seq_file *s, void *v)
{
- int len = 0;
unsigned int i = 0;
+ unsigned int j = 0;
struct v3_vm_base_state *base=0;
struct v3_vm_core_state *core=0;
struct v3_vm_mem_state *mem=0;
core = palacios_alloc(sizeof(struct v3_vm_core_state) + MAX_VCORES*sizeof(struct v3_vm_vcore_state));
if (!core) {
- ERROR("No space for core state structure\n");
- goto out;
+ ERROR("No space for core state structure\n");
+ goto out;
}
-
+
mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + MAX_REGIONS*sizeof(struct v3_vm_mem_region));
if (!mem) {
- ERROR("No space for memory state structure\n");
- goto out;
+ ERROR("No space for memory state structure\n");
+ goto out;
}
-
+
for(i = 0; i < MAX_VMS; i++) {
- if (guest_map[i] != NULL) {
- if (len>=count) {
- goto out;
- } else {
- len += snprintf(buf+len, count-len,
- "%s\t/dev/v3-vm%d ",
- guest_map[i]->name, i);
-
- if (len>=count) {
- *(buf+len-1)='\n';
- goto out;
- } else {
+ if (guest_map[i] != NULL) {
+ seq_printf(s,
+ "---------------------------------------------------------------------------------------\n");
+ seq_printf(s,
+ "Entry: %d\n"
+ "Name: %s\n"
+ "Device: /dev/v3-vm%d\n",
+ i,guest_map[i]->name, i);
+
// Get extended data
core->num_vcores=MAX_VCORES; // max we can handle
mem->num_regions=MAX_REGIONS; // max we can handle
+
if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) {
- ERROR("Cannot get VM info\n");
- *(buf+len-1)='\n';
- goto out;
+ ERROR("Cannot get VM info\n");
+ seq_printf(s, "<unable to get data for this VM>\n");
} else {
- unsigned long j;
-
- len+=snprintf(buf+len, count-len,
- "%s %lu regions [ ",
- base->state==V3_VM_INVALID ? "INVALID" :
- base->state==V3_VM_RUNNING ? "running" :
- base->state==V3_VM_STOPPED ? "stopped" :
- base->state==V3_VM_PAUSED ? "paused" :
- base->state==V3_VM_ERROR ? "ERROR" :
- base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
- mem->num_regions);
-
- if (len>=count) {
- *(buf+len-1)='\n';
- goto out;
- }
-
- for (j=0;j<mem->num_regions;j++) {
- len+=snprintf(buf+len, count-len,
- "(region %lu 0x%p-0x%p) ",
- j, mem->region[j].host_paddr, mem->region[j].host_paddr+mem->region[j].size);
- if (len>=count) {
- *(buf+len-1)='\n';
- goto out;
- }
- }
-
- len+=snprintf(buf+len, count-len,
- "] %lu vcores [ ",
- core->num_vcores);
-
- if (len>=count) {
- *(buf+len-1)='\n';
- goto out;
- }
-
- for (j=0;j<core->num_vcores;j++) {
- len+=snprintf(buf+len, count-len,
- "(vcore %lu %s on pcore %lu %llu exits rip=0x%p %s %s %s) ",
- j,
- core->vcore[j].state==V3_VCORE_INVALID ? "INVALID" :
- core->vcore[j].state==V3_VCORE_RUNNING ? "running" :
- core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" : "UNKNOWN",
- core->vcore[j].pcore,
- core->vcore[j].num_exits,
- core->vcore[j].last_rip,
- core->vcore[j].cpu_mode==V3_VCORE_CPU_REAL ? "real" :
- core->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED ? "protected" :
- core->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED_PAE ? "protectedpae" :
- core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG ? "long" :
- core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_32_COMPAT ? "long32" :
- core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_16_COMPAT ? "long16" : "UNKNOWN",
- core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_PHYSICAL ? "physical" :
- core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_VIRTUAL ? "virtual" : "UNKNOWN",
- core->vcore[j].mem_state==V3_VCORE_MEM_STATE_SHADOW ? "shadow" :
- core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN");
- if (len>=count) {
- *(buf+len-1)='\n';
- goto out;
+ seq_printf(s,
+ "State: %s\n"
+ "Cores: %lu\n"
+ "Regions: %lu\n\n",
+ base->state==V3_VM_INVALID ? "INVALID" :
+ base->state==V3_VM_RUNNING ? "running" :
+ base->state==V3_VM_STOPPED ? "stopped" :
+ base->state==V3_VM_PAUSED ? "paused" :
+ base->state==V3_VM_ERROR ? "ERROR" :
+ base->state==V3_VM_SIMULATING ? "simulating" :
+ base->state==V3_VM_RESETTING ? "resetting" : "UNKNOWN",
+ core->num_vcores,
+ mem->num_regions);
+ seq_printf(s, "Core States\n");
+
+ for (j=0;j<core->num_vcores;j++) {
+ seq_printf(s,
+ " vcore %u %s on pcore %lu %llu exits rip=0x%p %s %s %s\n",
+ j,
+ core->vcore[j].state==V3_VCORE_INVALID ? "INVALID" :
+ core->vcore[j].state==V3_VCORE_RUNNING ? "running" :
+ core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" :
+ core->vcore[j].state==V3_VCORE_RESETTING ? "resetting" : "UNKNOWN",
+ core->vcore[j].pcore,
+ core->vcore[j].num_exits,
+ core->vcore[j].last_rip,
+ core->vcore[j].cpu_mode==V3_VCORE_CPU_REAL ? "real" :
+ core->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED ? "protected" :
+ core->vcore[j].cpu_mode==V3_VCORE_CPU_PROTECTED_PAE ? "protectedpae" :
+ core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG ? "long" :
+ core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_32_COMPAT ? "long32" :
+ core->vcore[j].cpu_mode==V3_VCORE_CPU_LONG_16_COMPAT ? "long16" : "UNKNOWN",
+ core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_PHYSICAL ? "physical" :
+ core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_VIRTUAL ? "virtual" : "UNKNOWN",
+ core->vcore[j].mem_state==V3_VCORE_MEM_STATE_SHADOW ? "shadow" :
+ core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN");
}
- }
- len+=snprintf(buf+len, count-len,
- "] ");
+ seq_printf(s, "\nMemory Regions\n");
+ for (j=0;j<mem->num_regions;j++) {
+ seq_printf(s," region %u has HPAs 0x%p-0x%p (node %d) %s %s\n",
+ j, mem->region[j].host_paddr, mem->region[j].host_paddr+mem->region[j].size,
+ numa_addr_to_node((uintptr_t)(mem->region[j].host_paddr)),
+ mem->region[j].swapped ? "swapped" : "",
+ mem->region[j].pinned ? "pinned" : "");
+ }
+ }
+ seq_printf(s,
+ "---------------------------------------------------------------------------------------\n");
+ }
+ }
+
+
+ out:
+ if (mem) { palacios_free(mem); }
+ if (core) { palacios_free(core); }
+ if (base) { palacios_free(base); }
+
+ return 0;
+}
- if (len>=count) {
- *(buf+len-1)='\n';
- goto out;
- }
-
- *(buf+len-1)='\n';
+static int read_guests(struct seq_file *s, void *v)
+{
+ unsigned int i = 0;
+ struct v3_vm_base_state *base=0;
+ struct v3_vm_core_state *core=0;
+ struct v3_vm_mem_state *mem=0;
+
+ base = palacios_alloc(sizeof(struct v3_vm_base_state));
+
+ if (!base) {
+ ERROR("No space for base state structure\n");
+ goto out;
+ }
+ core = palacios_alloc(sizeof(struct v3_vm_core_state) + MAX_VCORES*sizeof(struct v3_vm_vcore_state));
+
+ if (!core) {
+ ERROR("No space for core state structure\n");
+ goto out;
+ }
+
+ mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + MAX_REGIONS*sizeof(struct v3_vm_mem_region));
+
+ if (!mem) {
+ ERROR("No space for memory state structure\n");
+ goto out;
+ }
+
+ for(i = 0; i < MAX_VMS; i++) {
+ if (guest_map[i] != NULL) {
+ seq_printf(s,"%s\t/dev/v3-vm%d", guest_map[i]->name, i);
+ // Get extended data
+ core->num_vcores=MAX_VCORES; // max we can handle
+ mem->num_regions=MAX_REGIONS; // max we can handle
+
+ if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) {
+ ERROR("Cannot get VM info\n");
+ seq_printf(s, "\t<unable to get data for this VM>\n");
+ } else {
+ seq_printf(s,"\t%s\t%lu vcores\t%lu regions\n",
+ base->state==V3_VM_INVALID ? "INVALID" :
+ base->state==V3_VM_RUNNING ? "running" :
+ base->state==V3_VM_STOPPED ? "stopped" :
+ base->state==V3_VM_PAUSED ? "paused" :
+ base->state==V3_VM_ERROR ? "ERROR" :
+ base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
+ core->num_vcores,
+ mem->num_regions);
}
- }
}
- }
}
-
+
+
out:
if (mem) { palacios_free(mem); }
if (core) { palacios_free(core); }
if (base) { palacios_free(base); }
+
+ return 0;
+}
+
+
+static int guests_short_proc_open(struct inode * inode, struct file * filp)
+{
+ struct proc_dir_entry * proc_entry = PDE(inode);
+ return single_open(filp, read_guests, proc_entry->data);
+}
- return len;
+static int guests_full_proc_open(struct inode * inode, struct file * filp)
+{
+ struct proc_dir_entry * proc_entry = PDE(inode);
+ return single_open(filp, read_guests_details, proc_entry->data);
}
+static struct file_operations guest_full_proc_ops = {
+ .owner = THIS_MODULE,
+ .open = guests_full_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static struct file_operations guest_short_proc_ops = {
+ .owner = THIS_MODULE,
+ .open = guests_short_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+// Supply basic information that the user-space tools need
+// to manipulate Palacios. The current use case here is to
+// convey memory information
+static int read_info(struct seq_file *s, void *v)
+{
+ uint64_t mem_block_size;
+ int i,j;
+ int max_node=-1;
+ seq_printf(s,"kernel MAX_ORDER:\t%d\n",MAX_ORDER);
+ seq_printf(s,"number of nodes:\t%d\n", numa_num_nodes());
+ seq_printf(s,"number of cpus: \t%d\n", num_online_cpus());
+ seq_printf(s,"\npalacios compiled mem_block_size:\t%d\n", V3_CONFIG_MEM_BLOCK_SIZE);
+ if (!v3_lookup_option("mem_block_size")) {
+ mem_block_size = V3_CONFIG_MEM_BLOCK_SIZE;
+ } else {
+ if (strict_strtoull(v3_lookup_option("mem_block_size"), 0, &mem_block_size)) {
+ // huh?
+ mem_block_size=-1;
+ }
+ }
+ seq_printf(s,"palacios run-time mem_block_size:\t%llu\n", mem_block_size);
+
+ seq_printf(s,"\nCPU to node mappings\n");
+ for (i=0;i<num_online_cpus();i++) {
+ seq_printf(s,"cpu %d -> node %d\n", i, numa_cpu_to_node(i));
+ if (numa_cpu_to_node(i)>max_node) {
+ max_node=numa_cpu_to_node(i);
+ }
+ }
+ seq_printf(s,"\nNode to node distances\n");
+ for (j=0;j<=max_node;j++) {
+ seq_printf(s," \t%2d", j);
+ }
+ seq_printf(s,"\n");
+ for (i=0;i<=max_node;i++) {
+ seq_printf(s,"%2d ",i);
+ for (j=0;j<=max_node;j++) {
+ seq_printf(s,"\t%2d", numa_get_distance(i,j));
+ }
+ seq_printf(s,"\n");
+ }
+ seq_printf(s,"\nCPU to CPU distances\n");
+ for (j=0;j<num_online_cpus();j++) {
+ seq_printf(s," \t%2d", j);
+ }
+ seq_printf(s,"\n");
+ for (i=0;i<num_online_cpus();i++) {
+ seq_printf(s,"%2d ",i);
+ for (j=0;j<num_online_cpus();j++) {
+ seq_printf(s,"\t%2d", numa_get_distance(numa_cpu_to_node(i),numa_cpu_to_node(j)));
+ }
+ seq_printf(s,"\n");
+ }
+ return 0;
+}
+
+static int info_proc_open(struct inode * inode, struct file * filp)
+{
+ struct proc_dir_entry * proc_entry = PDE(inode);
+ return single_open(filp, read_info, proc_entry->data);
+}
+
+
+
+static struct file_operations info_proc_ops = {
+ .owner = THIS_MODULE,
+ .open = info_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int __init v3_init(void) {
palacios_allow_devmem();
}
+ // numa is now a required interface and we need it
+ // up before primary initiatilization
+ palacios_init_numa();
+
// Initialize Palacios
palacios_vmm_init(options);
v3_class = class_create(THIS_MODULE, "vms");
- if (IS_ERR(v3_class)) {
+ if (!v3_class || IS_ERR(v3_class)) {
ERROR("Failed to register V3 VM device class\n");
ret = PTR_ERR(v3_class);
goto failure3;
{
struct proc_dir_entry *entry;
- INFO("palacios_proc_dir=%p before v3-guests\n",palacios_proc_dir);
- entry = create_proc_read_entry("v3-guests", 0444, palacios_proc_dir, read_guests, NULL);
+ entry = create_proc_entry("v3-guests", 0444, palacios_proc_dir);
if (entry) {
+ entry->proc_fops = &guest_short_proc_ops;
INFO("/proc/v3vee/v3-guests successfully created\n");
} else {
ERROR("Could not create proc entry\n");
goto failure6;
}
+ entry = create_proc_entry("v3-guests-details", 0444, palacios_proc_dir);
+ if (entry) {
+ entry->proc_fops = &guest_full_proc_ops;
+ INFO("/proc/v3vee/v3-guests-details successfully created\n");
+ } else {
+ ERROR("Could not create proc entry\n");
+ goto failure7;
+ }
+
+ entry = create_proc_entry("v3-info", 0444, palacios_proc_dir);
+ if (entry) {
+ entry->proc_fops = &info_proc_ops;
+ INFO("/proc/v3vee/v3-info successfully created\n");
+ } else {
+ ERROR("Could not create proc entry\n");
+ goto failure8;
+ }
+
+
}
return 0;
- failure6:
+ failure8:
+ remove_proc_entry("v3-guests-details", palacios_proc_dir);
+ failure7:
remove_proc_entry("v3-guests", palacios_proc_dir);
+ failure6:
+ device_destroy(v3_class, dev);
failure5:
unregister_chrdev_region(MKDEV(v3_major_num, 0), MAX_VMS + 1);
failure4:
/* Stop and free any running VMs */
for (i = 0; i < MAX_VMS; i++) {
- if (guest_map[i] != NULL) {
- guest = (struct v3_guest *)guest_map[i];
+ if (guest_map[i] != NULL) {
+ guest = (struct v3_guest *)(guest_map[i]);
+
+ if (!guest->v3_ctx) {
+ ERROR("Orphan VM detected and skipped: index=%d name=%s\n", i, guest->name);
+ continue;
+ }
if (v3_stop_vm(guest->v3_ctx) < 0)
ERROR("Couldn't stop VM %d\n", i);
palacios_vmm_exit();
+ palacios_deinit_numa();
+
DEBUG("Palacios Mallocs = %d, Frees = %d\n", mallocs, frees);
DEBUG("Palacios Vmallocs = %d, Vfrees = %d\n", vmallocs, vfrees);
DEBUG("Palacios Page Allocs = %d, Page Frees = %d\n", pg_allocs, pg_frees);
palacios_deinit_mm();
+ remove_proc_entry("v3-info", palacios_proc_dir);
+ remove_proc_entry("v3-guests-details", palacios_proc_dir);
remove_proc_entry("v3-guests", palacios_proc_dir);
remove_proc_entry("v3vee", NULL);
void * addr = NULL;
mod_allocs++;
- addr = palacios_alloc_extended(size, flags);
+ addr = palacios_alloc_extended(size, flags, -1);
return addr;
}