(c) Jack Lange, 2010
*/
-
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/errno.h>
#include "palacios.h"
#include "mm.h"
#include "vm.h"
+#include "numa.h"
#include "allow_devmem.h"
#include "memcheck.h"
#include "lockcheck.h"
#include "linux-exts.h"
+#include "util-hashtable.h"
MODULE_LICENSE("GPL");
struct class * v3_class = NULL;
static struct cdev ctrl_dev;
+
+// mapping from thread ids to their resource control blocks
+struct hashtable *v3_thread_resource_map=0;
+
static int register_vm(struct v3_guest * guest) {
int i = 0;
struct v3_guest_img user_image;
struct v3_guest * guest = palacios_alloc(sizeof(struct v3_guest));
- if (IS_ERR(guest)) {
+ if (!(guest)) {
ERROR("Palacios: Error allocating Kernel guest_image\n");
return -EFAULT;
}
DEBUG("Palacios: Allocating kernel memory for guest image (%llu bytes)\n", user_image.size);
guest->img = palacios_valloc(guest->img_size);
- if (IS_ERR(guest->img)) {
+ if (!guest->img) {
ERROR("Palacios Error: Could not allocate space for guest image\n");
goto out_err1;
}
goto out_err2;
}
- strncpy(guest->name, user_image.name, 127);
+ strncpy(guest->name, user_image.name, 128);
+ guest->name[127] = 0;
INIT_LIST_HEAD(&(guest->exts));
unsigned long vm_idx = arg;
struct v3_guest * guest;
- if (vm_idx > MAX_VMS) {
+ if (vm_idx >= MAX_VMS) {
ERROR("Invalid VM index: %ld\n", vm_idx);
return -1;
}
}
-#define MAX_VCORES 256
+#define MAX_CORES 1024
#define MAX_REGIONS 1024
-
-
+#define MIN(x,y) ((x)<(y) ? (x) : (y))
static int read_guests_details(struct seq_file *s, void *v)
{
unsigned int i = 0;
unsigned int j = 0;
+ uint64_t num_vcores, num_regions;
+ uint64_t alloc_num_vcores, alloc_num_regions;
struct v3_vm_base_state *base=0;
struct v3_vm_core_state *core=0;
struct v3_vm_mem_state *mem=0;
- base = palacios_alloc(sizeof(struct v3_vm_base_state));
+
+ base = palacios_valloc(sizeof(struct v3_vm_base_state));
+
if (!base) {
ERROR("No space for base state structure\n");
goto out;
}
- core = palacios_alloc(sizeof(struct v3_vm_core_state) + MAX_VCORES*sizeof(struct v3_vm_vcore_state));
-
- if (!core) {
- ERROR("No space for core state structure\n");
- goto out;
- }
+
+ for(i = 0; i < MAX_VMS; i++) {
+
+ if (guest_map[i] != NULL) {
+
+ v3_get_state_sizes_vm(guest_map[i]->v3_ctx,&num_vcores,&num_regions);
+
+ alloc_num_vcores = MIN(num_vcores,MAX_CORES);
+ alloc_num_regions = MIN(num_regions,MAX_REGIONS);
+
+ core = palacios_valloc(sizeof(struct v3_vm_core_state) + alloc_num_vcores*sizeof(struct v3_vm_vcore_state));
+
+ if (!core) {
+ ERROR("No space for core state structure\n");
+ goto out;
+ }
- mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + MAX_REGIONS*sizeof(struct v3_vm_mem_region));
+ mem = palacios_valloc(sizeof(struct v3_vm_mem_state) + alloc_num_regions*sizeof(struct v3_vm_mem_region));
- if (!mem) {
- ERROR("No space for memory state structure\n");
- goto out;
- }
+ if (!mem) {
+ ERROR("No space for memory state structure\n");
+ goto out;
+ }
- for(i = 0; i < MAX_VMS; i++) {
- if (guest_map[i] != NULL) {
seq_printf(s,
"---------------------------------------------------------------------------------------\n");
seq_printf(s,
i,guest_map[i]->name, i);
// Get extended data
- core->num_vcores=MAX_VCORES; // max we can handle
- mem->num_regions=MAX_REGIONS; // max we can handle
+ core->num_vcores=alloc_num_vcores;
+ mem->num_regions=alloc_num_regions;
if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) {
ERROR("Cannot get VM info\n");
seq_printf(s, "<unable to get data for this VM>\n");
} else {
seq_printf(s,
+ "Type: %s\n"
"State: %s\n"
- "Cores: %lu\n"
- "Regions: %lu\n\n",
+ "Cores: %llu (%llu shown)\n"
+ "Regions: %llu (%llu shown)\n"
+ "Memsize: %llu (%llu ROS)\n\n",
+ base->vm_type==V3_VM_GENERAL ? "general" :
+ base->vm_type==V3_VM_HVM ? "HVM" : "UNKNOWN",
base->state==V3_VM_INVALID ? "INVALID" :
base->state==V3_VM_RUNNING ? "running" :
base->state==V3_VM_STOPPED ? "stopped" :
base->state==V3_VM_PAUSED ? "paused" :
base->state==V3_VM_ERROR ? "ERROR" :
- base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
+ base->state==V3_VM_SIMULATING ? "simulating" :
+ base->state==V3_VM_RESETTING ? "resetting" : "UNKNOWN",
+ num_vcores,
core->num_vcores,
- mem->num_regions);
+ num_regions,
+ mem->num_regions,
+ mem->mem_size,
+ mem->ros_mem_size);
+
seq_printf(s, "Core States\n");
for (j=0;j<core->num_vcores;j++) {
seq_printf(s,
- " vcore %u %s on pcore %lu %llu exits rip=0x%p %s %s %s\n",
+ " vcore %u %s on pcore %lu %llu exits rip=0x%p %s %s %s %s\n",
j,
core->vcore[j].state==V3_VCORE_INVALID ? "INVALID" :
core->vcore[j].state==V3_VCORE_RUNNING ? "running" :
- core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" : "UNKNOWN",
+ core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" :
+ core->vcore[j].state==V3_VCORE_RESETTING ? "resetting" : "UNKNOWN",
core->vcore[j].pcore,
core->vcore[j].num_exits,
core->vcore[j].last_rip,
core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_PHYSICAL ? "physical" :
core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_VIRTUAL ? "virtual" : "UNKNOWN",
core->vcore[j].mem_state==V3_VCORE_MEM_STATE_SHADOW ? "shadow" :
- core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN");
+ core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN",
+ core->vcore[j].vcore_type==V3_VCORE_GENERAL ? "" :
+ core->vcore[j].vcore_type==V3_VCORE_ROS ? "ros" :
+ core->vcore[j].vcore_type==V3_VCORE_HRT ? "hrt" : "UNKNOWN");
}
+
seq_printf(s, "\nMemory Regions\n");
for (j=0;j<mem->num_regions;j++) {
- seq_printf(s," region %u has HPAs 0x%p-0x%p\n",
- j, mem->region[j].host_paddr, mem->region[j].host_paddr+mem->region[j].size);
+ seq_printf(s," region %u has HPAs 0x%016llx-0x%016llx (node %d) GPA 0x%016llx %s %s\n",
+ j, (uint64_t)mem->region[j].host_paddr, (uint64_t)mem->region[j].host_paddr+mem->region[j].size,
+ numa_addr_to_node((uintptr_t)(mem->region[j].host_paddr)),
+ (uint64_t)mem->region[j].guest_paddr,
+ mem->region[j].swapped ? "swapped" : "",
+ mem->region[j].pinned ? "pinned" : "");
}
+
}
seq_printf(s,
"---------------------------------------------------------------------------------------\n");
+
+ palacios_vfree(mem); mem=0;
+ palacios_vfree(core); core=0;
+
}
+
}
out:
- if (mem) { palacios_free(mem); }
- if (core) { palacios_free(core); }
- if (base) { palacios_free(base); }
+ if (mem) { palacios_vfree(mem); }
+ if (core) { palacios_vfree(core); }
+ if (base) { palacios_vfree(base); }
return 0;
}
struct v3_vm_base_state *base=0;
struct v3_vm_core_state *core=0;
struct v3_vm_mem_state *mem=0;
+ uint64_t num_vcores, num_regions;
+
+
+ INFO("READ GUEST\n");
- base = palacios_alloc(sizeof(struct v3_vm_base_state));
+ base = palacios_valloc(sizeof(struct v3_vm_base_state));
if (!base) {
ERROR("No space for base state structure\n");
goto out;
}
- core = palacios_alloc(sizeof(struct v3_vm_core_state) + MAX_VCORES*sizeof(struct v3_vm_vcore_state));
+ core = palacios_valloc(sizeof(struct v3_vm_core_state));
if (!core) {
ERROR("No space for core state structure\n");
goto out;
}
- mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + MAX_REGIONS*sizeof(struct v3_vm_mem_region));
+ mem = palacios_valloc(sizeof(struct v3_vm_mem_state));
if (!mem) {
ERROR("No space for memory state structure\n");
goto out;
}
+
for(i = 0; i < MAX_VMS; i++) {
if (guest_map[i] != NULL) {
+
+ v3_get_state_sizes_vm(guest_map[i]->v3_ctx,&num_vcores,&num_regions);
+
seq_printf(s,"%s\t/dev/v3-vm%d", guest_map[i]->name, i);
- // Get extended data
- core->num_vcores=MAX_VCORES; // max we can handle
- mem->num_regions=MAX_REGIONS; // max we can handle
+
+ // Skip getting per core and per-region
+ core->num_vcores=0;
+ mem->num_regions=0;
if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) {
ERROR("Cannot get VM info\n");
seq_printf(s, "\t<unable to get data for this VM>\n");
} else {
- seq_printf(s,"\t%s\t%lu vcores\t%lu regions\n",
+ seq_printf(s,"\t%s\t%llu vcores\t%llu regions\t%llu mem\t%s\n",
base->state==V3_VM_INVALID ? "INVALID" :
base->state==V3_VM_RUNNING ? "running" :
base->state==V3_VM_STOPPED ? "stopped" :
base->state==V3_VM_PAUSED ? "paused" :
base->state==V3_VM_ERROR ? "ERROR" :
base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
- core->num_vcores,
- mem->num_regions);
+ num_vcores,
+ num_regions,
+ mem->mem_size,
+ base->vm_type == V3_VM_GENERAL ? "general" :
+ base->vm_type == V3_VM_HVM ? "hvm" : "UNKNOWN");
}
}
}
out:
- if (mem) { palacios_free(mem); }
- if (core) { palacios_free(core); }
- if (base) { palacios_free(base); }
+ if (mem) { palacios_vfree(mem); }
+ if (core) { palacios_vfree(core); }
+ if (base) { palacios_vfree(base); }
return 0;
}
+
static struct file_operations guest_full_proc_ops = {
.owner = THIS_MODULE,
.open = guests_full_proc_open,
.release = single_release,
};
+// Supply basic information that the user-space tools need
+// to manipulate Palacios. The current use case here is to
+// convey memory information
+static int read_info(struct seq_file *s, void *v)
+{
+ uint64_t mem_block_size;
+ int i,j;
+ int max_node=-1;
+ seq_printf(s,"kernel MAX_ORDER:\t%d\n",MAX_ORDER);
+ seq_printf(s,"number of nodes:\t%d\n", numa_num_nodes());
+ seq_printf(s,"number of cpus: \t%d\n", num_online_cpus());
+ seq_printf(s,"\npalacios compiled mem_block_size:\t%d\n", V3_CONFIG_MEM_BLOCK_SIZE);
+ if (!v3_lookup_option("mem_block_size")) {
+ mem_block_size = V3_CONFIG_MEM_BLOCK_SIZE;
+ } else {
+ if (strict_strtoull(v3_lookup_option("mem_block_size"), 0, &mem_block_size)) {
+ // huh?
+ mem_block_size=-1;
+ }
+ }
+ seq_printf(s,"palacios run-time mem_block_size:\t%llu\n", mem_block_size);
+
+ seq_printf(s,"\nCPU to node mappings\n");
+ for (i=0;i<num_online_cpus();i++) {
+ seq_printf(s,"cpu %d -> node %d\n", i, numa_cpu_to_node(i));
+ if (numa_cpu_to_node(i)>max_node) {
+ max_node=numa_cpu_to_node(i);
+ }
+ }
+ seq_printf(s,"\nNode to node distances\n");
+ for (j=0;j<=max_node;j++) {
+ seq_printf(s," \t%2d", j);
+ }
+ seq_printf(s,"\n");
+ for (i=0;i<=max_node;i++) {
+ seq_printf(s,"%2d ",i);
+ for (j=0;j<=max_node;j++) {
+ seq_printf(s,"\t%2d", numa_get_distance(i,j));
+ }
+ seq_printf(s,"\n");
+ }
+ seq_printf(s,"\nCPU to CPU distances\n");
+ for (j=0;j<num_online_cpus();j++) {
+ seq_printf(s," \t%2d", j);
+ }
+ seq_printf(s,"\n");
+ for (i=0;i<num_online_cpus();i++) {
+ seq_printf(s,"%2d ",i);
+ for (j=0;j<num_online_cpus();j++) {
+ seq_printf(s,"\t%2d", numa_get_distance(numa_cpu_to_node(i),numa_cpu_to_node(j)));
+ }
+ seq_printf(s,"\n");
+ }
+ return 0;
+}
+
+static int info_proc_open(struct inode * inode, struct file * filp)
+{
+ struct proc_dir_entry * proc_entry = PDE(inode);
+ return single_open(filp, read_info, proc_entry->data);
+}
+
+
+
+static struct file_operations info_proc_ops = {
+ .owner = THIS_MODULE,
+ .open = info_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+
+static inline uint_t thr_hash_func(addr_t key)
+{
+ return palacios_hash_long((long)key,64);
+}
+
+static inline int thr_hash_comp(addr_t k1, addr_t k2)
+{
+ return k1==k2;
+}
static int __init v3_init(void) {
LOCKCHECK_INIT();
MEMCHECK_INIT();
+
+ if (!(v3_thread_resource_map = palacios_create_htable(MAX_THREADS,thr_hash_func,thr_hash_comp))) {
+ ERROR("Could not create thread/resource map\n");
+ ret = -1;
+ goto failure0;
+ }
+
palacios_proc_dir = proc_mkdir("v3vee", NULL);
if (!palacios_proc_dir) {
ERROR("Could not create proc entry\n");
palacios_allow_devmem();
}
+ // numa is now a required interface and we need it
+ // up before primary initiatilization
+ palacios_init_numa();
+
// Initialize Palacios
palacios_vmm_init(options);
v3_class = class_create(THIS_MODULE, "vms");
- if (IS_ERR(v3_class)) {
+ if (!v3_class || IS_ERR(v3_class)) {
ERROR("Failed to register V3 VM device class\n");
ret = PTR_ERR(v3_class);
goto failure3;
ERROR("Could not create proc entry\n");
goto failure7;
}
+
+ entry = create_proc_entry("v3-info", 0444, palacios_proc_dir);
+ if (entry) {
+ entry->proc_fops = &info_proc_ops;
+ INFO("/proc/v3vee/v3-info successfully created\n");
+ } else {
+ ERROR("Could not create proc entry\n");
+ goto failure8;
+ }
+
+
}
return 0;
- failure7:
+ failure8:
remove_proc_entry("v3-guests-details", palacios_proc_dir);
- failure6:
+ failure7:
remove_proc_entry("v3-guests", palacios_proc_dir);
+ failure6:
+ device_destroy(v3_class, dev);
failure5:
unregister_chrdev_region(MKDEV(v3_major_num, 0), MAX_VMS + 1);
failure4:
failure2:
remove_proc_entry("v3vee", NULL);
failure1:
+ palacios_free_htable(v3_thread_resource_map,0,0);
+ failure0:
MEMCHECK_DEINIT();
LOCKCHECK_DEINIT();
/* Stop and free any running VMs */
for (i = 0; i < MAX_VMS; i++) {
- if (guest_map[i] != NULL) {
- guest = (struct v3_guest *)guest_map[i];
+ if (guest_map[i] != NULL) {
+ guest = (struct v3_guest *)(guest_map[i]);
+
+ if (!guest->v3_ctx) {
+ ERROR("Orphan VM detected and skipped: index=%d name=%s\n", i, guest->name);
+ continue;
+ }
if (v3_stop_vm(guest->v3_ctx) < 0)
ERROR("Couldn't stop VM %d\n", i);
palacios_vmm_exit();
+ palacios_deinit_numa();
+
DEBUG("Palacios Mallocs = %d, Frees = %d\n", mallocs, frees);
DEBUG("Palacios Vmallocs = %d, Vfrees = %d\n", vmallocs, vfrees);
DEBUG("Palacios Page Allocs = %d, Page Frees = %d\n", pg_allocs, pg_frees);
palacios_deinit_mm();
+ remove_proc_entry("v3-info", palacios_proc_dir);
remove_proc_entry("v3-guests-details", palacios_proc_dir);
remove_proc_entry("v3-guests", palacios_proc_dir);
remove_proc_entry("v3vee", NULL);
DEBUG("Palacios Module Mallocs = %d, Frees = %d\n", mod_allocs, mod_frees);
+ palacios_free_htable(v3_thread_resource_map,0,0);
+
MEMCHECK_DEINIT();
LOCKCHECK_DEINIT();
}