(c) Jack Lange, 2010
*/
-
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/errno.h>
#include "palacios.h"
#include "mm.h"
#include "vm.h"
+#include "numa.h"
#include "allow_devmem.h"
#include "memcheck.h"
#include "lockcheck.h"
#include "linux-exts.h"
-
MODULE_LICENSE("GPL");
// Module parameter
struct v3_guest_img user_image;
struct v3_guest * guest = palacios_alloc(sizeof(struct v3_guest));
- if (IS_ERR(guest)) {
+ if (!(guest)) {
ERROR("Palacios: Error allocating Kernel guest_image\n");
return -EFAULT;
}
DEBUG("Palacios: Allocating kernel memory for guest image (%llu bytes)\n", user_image.size);
guest->img = palacios_valloc(guest->img_size);
- if (IS_ERR(guest->img)) {
+ if (!guest->img) {
ERROR("Palacios Error: Could not allocate space for guest image\n");
goto out_err1;
}
{
unsigned int i = 0;
unsigned int j = 0;
+ uint64_t num_vcores, num_regions;
struct v3_vm_base_state *base=0;
struct v3_vm_core_state *core=0;
struct v3_vm_mem_state *mem=0;
goto out;
}
- core = palacios_alloc(sizeof(struct v3_vm_core_state) + MAX_VCORES*sizeof(struct v3_vm_vcore_state));
-
- if (!core) {
- ERROR("No space for core state structure\n");
- goto out;
- }
+ for(i = 0; i < MAX_VMS; i++) {
+
+ if (guest_map[i] != NULL) {
+
+ v3_get_state_sizes_vm(guest_map[i]->v3_ctx,&num_vcores,&num_regions);
+
+ core = palacios_alloc(sizeof(struct v3_vm_core_state) + num_vcores*sizeof(struct v3_vm_vcore_state));
+
+ if (!core) {
+ ERROR("No space for core state structure\n");
+ goto out;
+ }
- mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + MAX_REGIONS*sizeof(struct v3_vm_mem_region));
+ mem = palacios_alloc(sizeof(struct v3_vm_mem_state) + num_regions*sizeof(struct v3_vm_mem_region));
- if (!mem) {
- ERROR("No space for memory state structure\n");
- goto out;
- }
+ if (!mem) {
+ ERROR("No space for memory state structure\n");
+ goto out;
+ }
- for(i = 0; i < MAX_VMS; i++) {
- if (guest_map[i] != NULL) {
seq_printf(s,
"---------------------------------------------------------------------------------------\n");
seq_printf(s,
i,guest_map[i]->name, i);
// Get extended data
- core->num_vcores=MAX_VCORES; // max we can handle
- mem->num_regions=MAX_REGIONS; // max we can handle
+ core->num_vcores=num_vcores;
+ mem->num_regions=num_regions;
if (v3_get_state_vm(guest_map[i]->v3_ctx, base, core, mem)) {
ERROR("Cannot get VM info\n");
seq_printf(s, "<unable to get data for this VM>\n");
} else {
seq_printf(s,
+ "Type: %s\n"
"State: %s\n"
- "Cores: %lu\n"
- "Regions: %lu\n\n",
+ "Cores: %llu\n"
+ "Regions: %llu\n"
+ "Memsize: %llu (%llu ROS)\n\n",
+ base->vm_type==V3_VM_GENERAL ? "general" :
+ base->vm_type==V3_VM_HVM ? "HVM" : "UNKNOWN",
base->state==V3_VM_INVALID ? "INVALID" :
base->state==V3_VM_RUNNING ? "running" :
base->state==V3_VM_STOPPED ? "stopped" :
base->state==V3_VM_PAUSED ? "paused" :
base->state==V3_VM_ERROR ? "ERROR" :
- base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
+ base->state==V3_VM_SIMULATING ? "simulating" :
+ base->state==V3_VM_RESETTING ? "resetting" : "UNKNOWN",
core->num_vcores,
- mem->num_regions);
+ mem->num_regions,
+ mem->mem_size,
+ mem->ros_mem_size);
+
seq_printf(s, "Core States\n");
for (j=0;j<core->num_vcores;j++) {
seq_printf(s,
- " vcore %u %s on pcore %lu %llu exits rip=0x%p %s %s %s\n",
+ " vcore %u %s on pcore %lu %llu exits rip=0x%p %s %s %s %s\n",
j,
core->vcore[j].state==V3_VCORE_INVALID ? "INVALID" :
core->vcore[j].state==V3_VCORE_RUNNING ? "running" :
- core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" : "UNKNOWN",
+ core->vcore[j].state==V3_VCORE_STOPPED ? "stopped" :
+ core->vcore[j].state==V3_VCORE_RESETTING ? "resetting" : "UNKNOWN",
core->vcore[j].pcore,
core->vcore[j].num_exits,
core->vcore[j].last_rip,
core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_PHYSICAL ? "physical" :
core->vcore[j].mem_mode==V3_VCORE_MEM_MODE_VIRTUAL ? "virtual" : "UNKNOWN",
core->vcore[j].mem_state==V3_VCORE_MEM_STATE_SHADOW ? "shadow" :
- core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN");
+ core->vcore[j].mem_state==V3_VCORE_MEM_STATE_NESTED ? "nested" : "UNKNOWN",
+ core->vcore[j].vcore_type==V3_VCORE_GENERAL ? "" :
+ core->vcore[j].vcore_type==V3_VCORE_ROS ? "ros" :
+ core->vcore[j].vcore_type==V3_VCORE_HRT ? "hrt" : "UNKNOWN");
}
seq_printf(s, "\nMemory Regions\n");
for (j=0;j<mem->num_regions;j++) {
- seq_printf(s," region %u has HPAs 0x%p-0x%p\n",
- j, mem->region[j].host_paddr, mem->region[j].host_paddr+mem->region[j].size);
+ seq_printf(s," region %u has HPAs 0x%016llx-0x%016llx (node %d) GPA 0x%016llx %s %s\n",
+ j, (uint64_t)mem->region[j].host_paddr, (uint64_t)mem->region[j].host_paddr+mem->region[j].size,
+ numa_addr_to_node((uintptr_t)(mem->region[j].host_paddr)),
+ (uint64_t)mem->region[j].guest_paddr,
+ mem->region[j].swapped ? "swapped" : "",
+ mem->region[j].pinned ? "pinned" : "");
}
}
seq_printf(s,
"---------------------------------------------------------------------------------------\n");
+
+ palacios_free(mem); mem=0;
+ palacios_free(core); core=0;
+
}
+
}
ERROR("Cannot get VM info\n");
seq_printf(s, "\t<unable to get data for this VM>\n");
} else {
- seq_printf(s,"\t%s\t%lu vcores\t%lu regions\n",
+ seq_printf(s,"\t%s\t%llu vcores\t%llu regions\t%llu mem\t%s\n",
base->state==V3_VM_INVALID ? "INVALID" :
base->state==V3_VM_RUNNING ? "running" :
base->state==V3_VM_STOPPED ? "stopped" :
base->state==V3_VM_ERROR ? "ERROR" :
base->state==V3_VM_SIMULATING ? "simulating" : "UNKNOWN",
core->num_vcores,
- mem->num_regions);
+ mem->num_regions,
+ mem->mem_size,
+ base->vm_type == V3_VM_GENERAL ? "general" :
+ base->vm_type == V3_VM_HVM ? "hvm" : "UNKNOWN");
}
}
}
+
static struct file_operations guest_full_proc_ops = {
.owner = THIS_MODULE,
.open = guests_full_proc_open,
.release = single_release,
};
+// Supply basic information that the user-space tools need
+// to manipulate Palacios. The current use case here is to
+// convey memory information
+static int read_info(struct seq_file *s, void *v)
+{
+ uint64_t mem_block_size;
+ int i,j;
+ int max_node=-1;
+ seq_printf(s,"kernel MAX_ORDER:\t%d\n",MAX_ORDER);
+ seq_printf(s,"number of nodes:\t%d\n", numa_num_nodes());
+ seq_printf(s,"number of cpus: \t%d\n", num_online_cpus());
+ seq_printf(s,"\npalacios compiled mem_block_size:\t%d\n", V3_CONFIG_MEM_BLOCK_SIZE);
+ if (!v3_lookup_option("mem_block_size")) {
+ mem_block_size = V3_CONFIG_MEM_BLOCK_SIZE;
+ } else {
+ if (strict_strtoull(v3_lookup_option("mem_block_size"), 0, &mem_block_size)) {
+ // huh?
+ mem_block_size=-1;
+ }
+ }
+ seq_printf(s,"palacios run-time mem_block_size:\t%llu\n", mem_block_size);
+
+ seq_printf(s,"\nCPU to node mappings\n");
+ for (i=0;i<num_online_cpus();i++) {
+ seq_printf(s,"cpu %d -> node %d\n", i, numa_cpu_to_node(i));
+ if (numa_cpu_to_node(i)>max_node) {
+ max_node=numa_cpu_to_node(i);
+ }
+ }
+ seq_printf(s,"\nNode to node distances\n");
+ for (j=0;j<=max_node;j++) {
+ seq_printf(s," \t%2d", j);
+ }
+ seq_printf(s,"\n");
+ for (i=0;i<=max_node;i++) {
+ seq_printf(s,"%2d ",i);
+ for (j=0;j<=max_node;j++) {
+ seq_printf(s,"\t%2d", numa_get_distance(i,j));
+ }
+ seq_printf(s,"\n");
+ }
+ seq_printf(s,"\nCPU to CPU distances\n");
+ for (j=0;j<num_online_cpus();j++) {
+ seq_printf(s," \t%2d", j);
+ }
+ seq_printf(s,"\n");
+ for (i=0;i<num_online_cpus();i++) {
+ seq_printf(s,"%2d ",i);
+ for (j=0;j<num_online_cpus();j++) {
+ seq_printf(s,"\t%2d", numa_get_distance(numa_cpu_to_node(i),numa_cpu_to_node(j)));
+ }
+ seq_printf(s,"\n");
+ }
+ return 0;
+}
+
+static int info_proc_open(struct inode * inode, struct file * filp)
+{
+ struct proc_dir_entry * proc_entry = PDE(inode);
+ return single_open(filp, read_info, proc_entry->data);
+}
+
+
+
+static struct file_operations info_proc_ops = {
+ .owner = THIS_MODULE,
+ .open = info_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int __init v3_init(void) {
palacios_allow_devmem();
}
+ // numa is now a required interface and we need it
+ // up before primary initiatilization
+ palacios_init_numa();
+
// Initialize Palacios
palacios_vmm_init(options);
v3_class = class_create(THIS_MODULE, "vms");
- if (IS_ERR(v3_class)) {
+ if (!v3_class || IS_ERR(v3_class)) {
ERROR("Failed to register V3 VM device class\n");
ret = PTR_ERR(v3_class);
goto failure3;
ERROR("Could not create proc entry\n");
goto failure7;
}
+
+ entry = create_proc_entry("v3-info", 0444, palacios_proc_dir);
+ if (entry) {
+ entry->proc_fops = &info_proc_ops;
+ INFO("/proc/v3vee/v3-info successfully created\n");
+ } else {
+ ERROR("Could not create proc entry\n");
+ goto failure8;
+ }
+
+
}
return 0;
- failure7:
+ failure8:
remove_proc_entry("v3-guests-details", palacios_proc_dir);
- failure6:
+ failure7:
remove_proc_entry("v3-guests", palacios_proc_dir);
+ failure6:
+ device_destroy(v3_class, dev);
failure5:
unregister_chrdev_region(MKDEV(v3_major_num, 0), MAX_VMS + 1);
failure4:
/* Stop and free any running VMs */
for (i = 0; i < MAX_VMS; i++) {
- if (guest_map[i] != NULL) {
- guest = (struct v3_guest *)guest_map[i];
+ if (guest_map[i] != NULL) {
+ guest = (struct v3_guest *)(guest_map[i]);
+
+ if (!guest->v3_ctx) {
+ ERROR("Orphan VM detected and skipped: index=%d name=%s\n", i, guest->name);
+ continue;
+ }
if (v3_stop_vm(guest->v3_ctx) < 0)
ERROR("Couldn't stop VM %d\n", i);
palacios_vmm_exit();
+ palacios_deinit_numa();
+
DEBUG("Palacios Mallocs = %d, Frees = %d\n", mallocs, frees);
DEBUG("Palacios Vmallocs = %d, Vfrees = %d\n", vmallocs, vfrees);
DEBUG("Palacios Page Allocs = %d, Page Frees = %d\n", pg_allocs, pg_frees);
palacios_deinit_mm();
+ remove_proc_entry("v3-info", palacios_proc_dir);
remove_proc_entry("v3-guests-details", palacios_proc_dir);
remove_proc_entry("v3-guests", palacios_proc_dir);
remove_proc_entry("v3vee", NULL);