2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * This is free software. You are permitted to use,
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20 #include <palacios/vmm_config.h>
21 #include <palacios/vmm.h>
22 #include <palacios/vmm_debug.h>
23 #include <palacios/vmm_msr.h>
24 #include <palacios/vmm_decoder.h>
25 #include <palacios/vmm_telemetry.h>
26 #include <palacios/vmm_mem.h>
27 #include <palacios/vmm_hypercall.h>
28 #include <palacios/vmm_dev_mgr.h>
29 #include <palacios/vmm_cpuid.h>
30 #include <palacios/vmm_xml.h>
31 #include <palacios/vmm_io.h>
32 #include <palacios/vmm_msr.h>
33 #include <palacios/vmm_sprintf.h>
36 #ifdef V3_CONFIG_SWAPPING
37 #include <palacios/vmm_swapping.h>
40 #ifdef V3_CONFIG_MULTIBOOT
41 #include <palacios/vmm_multiboot.h>
45 #include <palacios/vmm_hvm.h>
48 #include <palacios/vmm_host_events.h>
49 #include <palacios/vmm_perftune.h>
51 #include "vmm_config_class.h"
54 /* The Palacios cookie encodes "v3vee" followed by a
55 3 byte version code. There are currently two versions:
57 \0\0\0 => original (no checksum)
61 #define COOKIE_V0 "v3vee\0\0\0"
62 #define COOKIE_V1 "v3vee\0\0\1"
64 // This is used to access the configuration file index table
79 struct file_idx_table_v0 {
81 struct file_hdr_v0 hdrs[0];
84 struct file_idx_table_v1 {
86 struct file_hdr_v1 hdrs[0];
92 static int setup_memory_map(struct v3_vm_info * vm, v3_cfg_tree_t * cfg);
93 static int setup_extensions(struct v3_vm_info * vm, v3_cfg_tree_t * cfg);
94 static int setup_devices(struct v3_vm_info * vm, v3_cfg_tree_t * cfg);
98 char * v3_cfg_val(v3_cfg_tree_t * tree, char * tag) {
99 char * attrib = (char *)v3_xml_attr(tree, tag);
100 v3_cfg_tree_t * child_entry = v3_xml_child(tree, tag);
103 if ((child_entry != NULL) && (attrib != NULL)) {
104 PrintError(VM_NONE, VCORE_NONE, "Duplicate Configuration parameters present for %s\n", tag);
108 if (attrib == NULL) {
109 val = v3_xml_txt(child_entry);
120 v3_cfg_tree_t * v3_cfg_subtree(v3_cfg_tree_t * tree, char * tag) {
121 return v3_xml_child(tree, tag);
124 v3_cfg_tree_t * v3_cfg_next_branch(v3_cfg_tree_t * tree) {
125 return v3_xml_next(tree);
130 struct v3_cfg_file * v3_cfg_get_file(struct v3_vm_info * vm, char * tag) {
131 struct v3_cfg_file * file = NULL;
133 file = (struct v3_cfg_file *)v3_htable_search(vm->cfg_data->file_table, (addr_t)tag);
139 static uint_t file_hash_fn(addr_t key) {
140 char * name = (char *)key;
141 return v3_hash_buffer((uchar_t *)name, strlen(name));
144 static int file_eq_fn(addr_t key1, addr_t key2) {
145 char * name1 = (char *)key1;
146 char * name2 = (char *)key2;
148 return (strcmp(name1, name2) == 0);
151 static struct v3_config * parse_config(void * cfg_blob) {
152 struct v3_config * cfg = NULL;
155 struct file_idx_table_v0 * files_v0 = NULL;
156 struct file_idx_table_v1 * files_v1 = NULL;
157 v3_cfg_tree_t * file_tree = NULL;
160 V3_Print(VM_NONE, VCORE_NONE, "cfg data at %p\n", cfg_blob);
162 if (memcmp(cfg_blob, COOKIE_V0, COOKIE_LEN) == 0) {
164 } else if (memcmp(cfg_blob, COOKIE_V1, COOKIE_LEN) == 0) {
167 PrintError(VM_NONE, VCORE_NONE, "Invalid Configuration Header Or Unknown Version\n");
171 V3_Print(VM_NONE, VCORE_NONE, "Handling Palacios Image Format, Version 0x%x\n",version);
173 offset += COOKIE_LEN;
175 cfg = (struct v3_config *)V3_Malloc(sizeof(struct v3_config));
178 PrintError(VM_NONE, VCORE_NONE, "Unable to allocate while parsing\n");
182 memset(cfg, 0, sizeof(struct v3_config));
184 cfg->blob = cfg_blob;
185 INIT_LIST_HEAD(&(cfg->file_list));
186 cfg->file_table = v3_create_htable(0, file_hash_fn, file_eq_fn);
188 if (!(cfg->file_table)) {
189 PrintError(VM_NONE, VCORE_NONE, "Unable to allocate hash table while parsing\n");
194 xml_len = *(uint32_t *)(cfg_blob + offset);
197 cfg->cfg = (v3_cfg_tree_t *)v3_xml_parse((uint8_t *)(cfg_blob + offset));
202 // This is hideous, but the file formats are still very close
204 files_v0 = (struct file_idx_table_v0 *)(cfg_blob + offset);
205 V3_Print(VM_NONE, VCORE_NONE, "Number of files in cfg: %d\n", (uint32_t)(files_v0->num_files));
207 files_v1 = (struct file_idx_table_v1 *)(cfg_blob + offset);
208 V3_Print(VM_NONE, VCORE_NONE, "Number of files in cfg: %d\n", (uint32_t)(files_v1->num_files));
212 file_tree = v3_cfg_subtree(v3_cfg_subtree(cfg->cfg, "files"), "file");
215 char * id = v3_cfg_val(file_tree, "id");
216 char * index = v3_cfg_val(file_tree, "index");
217 int idx = atoi(index);
218 struct v3_cfg_file * file = NULL;
220 file = (struct v3_cfg_file *)V3_Malloc(sizeof(struct v3_cfg_file));
223 PrintError(VM_NONE, VCORE_NONE, "Could not allocate file structure\n");
224 v3_free_htable(cfg->file_table,0,0);
229 V3_Print(VM_NONE, VCORE_NONE, "File index=%d id=%s\n", idx, id);
231 strncpy(file->tag, id, V3_MAX_TAG_LEN);
232 file->tag[V3_MAX_TAG_LEN-1] = 0 ;
235 struct file_hdr_v0 * hdr = &(files_v0->hdrs[idx]);
237 file->size = hdr->size;
238 file->data = cfg_blob + hdr->offset;
241 V3_Print(VM_NONE, VCORE_NONE, "Storing file data offset = %d, size=%d\n", (uint32_t)hdr->offset, hdr->size);
242 V3_Print(VM_NONE, VCORE_NONE, "file data at %p\n", file->data);
244 } else if (version==1) {
245 struct file_hdr_v1 * hdr = &(files_v1->hdrs[idx]);
248 file->size = hdr->size;
249 file->data = cfg_blob + hdr->offset;
250 file->hash = hdr->hash;
252 V3_Print(VM_NONE, VCORE_NONE, "Storing file data offset = %d, size=%d\n", (uint32_t)hdr->offset, hdr->size);
253 V3_Print(VM_NONE, VCORE_NONE, "file data at %p\n", file->data);
254 V3_Print(VM_NONE, VCORE_NONE, "Checking file data integrity...\n");
255 if ((hash = v3_hash_buffer(file->data, file->size)) != file->hash) {
256 PrintError(VM_NONE, VCORE_NONE, "File data corrupted! (orig hash=0x%lx, new=0x%lx\n",
260 V3_Print(VM_NONE, VCORE_NONE, "File data OK\n");
265 list_add( &(file->file_node), &(cfg->file_list));
267 V3_Print(VM_NONE, VCORE_NONE, "Keying file to name\n");
268 v3_htable_insert(cfg->file_table, (addr_t)(file->tag), (addr_t)(file));
270 V3_Print(VM_NONE, VCORE_NONE, "Iterating to next file\n");
272 file_tree = v3_cfg_next_branch(file_tree);
275 V3_Print(VM_NONE, VCORE_NONE, "Configuration parsed successfully\n");
281 static inline uint32_t get_alignment(char * align_str) {
282 // default is 4KB alignment
283 uint32_t alignment = PAGE_SIZE_4KB;
285 if (align_str != NULL) {
286 if (strcasecmp(align_str, "2MB") == 0) {
287 alignment = PAGE_SIZE_2MB;
288 } else if (strcasecmp(align_str, "4MB") == 0) {
289 alignment = PAGE_SIZE_4MB;
293 #ifndef V3_CONFIG_ALIGNED_PG_ALLOC
294 if (alignment != PAGE_SIZE_4KB) {
295 PrintError(VM_NONE, VCORE_NONE, "Aligned page allocations are not supported in this host (requested alignment=%d)\n", alignment);
296 PrintError(VM_NONE, VCORE_NONE, "Ignoring alignment request\n");
297 alignment = PAGE_SIZE_4KB;
306 static int pre_config_vm(struct v3_vm_info * vm, v3_cfg_tree_t * vm_cfg) {
307 char * memory_str = v3_cfg_val(vm_cfg, "memory");
308 char * schedule_hz_str = v3_cfg_val(vm_cfg, "schedule_hz");
309 char * vm_class = v3_cfg_val(vm_cfg, "class");
310 char * align_str = v3_cfg_val(v3_cfg_subtree(vm_cfg, "memory"), "alignment");
311 uint32_t sched_hz = 100; // set the schedule frequency to 100 HZ
315 PrintError(VM_NONE, VCORE_NONE, "Memory is a required configuration parameter\n");
319 PrintDebug(VM_NONE, VCORE_NONE, "Memory=%s\n", memory_str);
321 PrintDebug(VM_NONE, VCORE_NONE, "Alignment=%s\n", align_str);
323 PrintDebug(VM_NONE, VCORE_NONE, "Alignment defaulted to 4KB.\n");
326 // Amount of ram the Guest will have, always in MB
327 vm->mem_size = (addr_t)atoi(memory_str) * 1024 * 1024;
328 vm->mem_align = get_alignment(align_str);
330 // set up defaults for memory management for threads associated
332 vm->resource_control.pg_alignment=vm->mem_align;
333 vm->resource_control.pg_node_id=-1;
335 #ifdef V3_CONFIG_SWAPPING
336 if (v3_init_swapping_vm(vm,vm_cfg)) {
337 PrintError(vm,VCORE_NONE,"Unable to initialize swapping correctly\n");
340 if (vm->swap_state.enable_swapping) {
341 PrintDebug(vm,VCORE_NONE,"Swapping enabled\n");
343 PrintDebug(vm,VCORE_NONE,"Swapping disabled\n");
347 PrintDebug(VM_NONE, VCORE_NONE, "Alignment for %lu bytes of memory computed as 0x%x\n", vm->mem_size, vm->mem_align);
349 if (strcasecmp(vm_class, "PC") == 0) {
350 vm->vm_class = V3_PC_VM;
352 PrintError(VM_NONE, VCORE_NONE, "Invalid VM class\n");
356 #ifdef V3_CONFIG_TELEMETRY
358 char * telemetry = v3_cfg_val(vm_cfg, "telemetry");
360 // This should go first, because other subsystems will depend on the guest_info flag
361 if ((telemetry) && (strcasecmp(telemetry, "enable") == 0)) {
362 vm->enable_telemetry = 1;
364 vm->enable_telemetry = 0;
369 if (v3_init_vm(vm) == -1) {
370 PrintError(VM_NONE, VCORE_NONE, "Failed to initialize VM\n");
374 #ifdef V3_CONFIG_MULTIBOOT
375 if (v3_init_multiboot_vm(vm,vm_cfg)) {
376 PrintError(vm,VCORE_NONE,"Cannot initialize Multiboot for VM\n");
381 if (v3_init_hvm_vm(vm,vm_cfg)) {
382 PrintError(vm,VCORE_NONE,"Cannot initialize HVM for VM\n");
387 if (schedule_hz_str) {
388 sched_hz = atoi(schedule_hz_str);
391 PrintDebug(VM_NONE, VCORE_NONE, "CPU_KHZ = %d, schedule_freq=%p\n", V3_CPU_KHZ(),
392 (void *)(addr_t)sched_hz);
394 vm->yield_cycle_period = (V3_CPU_KHZ() * 1000) / sched_hz;
400 static int determine_paging_mode(struct guest_info * info, v3_cfg_tree_t * core_cfg) {
401 extern v3_cpu_arch_t v3_mach_type;
403 v3_cfg_tree_t * vm_tree = info->vm_info->cfg_data->cfg;
404 v3_cfg_tree_t * pg_tree = v3_cfg_subtree(vm_tree, "paging");
405 char * pg_mode = v3_cfg_val(pg_tree, "mode");
407 PrintDebug(info->vm_info, info, "Paging mode specified as %s\n", pg_mode);
410 if ((strcasecmp(pg_mode, "nested") == 0)) {
411 // we assume symmetric cores, so if core 0 has nested paging they all do
412 if ((v3_mach_type == V3_SVM_REV3_CPU) ||
413 (v3_mach_type == V3_VMX_EPT_CPU) ||
414 (v3_mach_type == V3_VMX_EPT_UG_CPU)) {
416 V3_Print(info->vm_info, info, "Setting paging mode to NESTED\n");
417 info->shdw_pg_mode = NESTED_PAGING;
419 PrintError(info->vm_info, info, "Nested paging not supported on this hardware. Defaulting to shadow paging\n");
420 info->shdw_pg_mode = SHADOW_PAGING;
422 } else if ((strcasecmp(pg_mode, "shadow") == 0)) {
423 V3_Print(info->vm_info, info, "Setting paging mode to SHADOW\n");
424 info->shdw_pg_mode = SHADOW_PAGING;
426 PrintError(info->vm_info, info, "Invalid paging mode (%s) specified in configuration. Defaulting to shadow paging\n", pg_mode);
427 info->shdw_pg_mode = SHADOW_PAGING;
430 V3_Print(info->vm_info, info, "No paging type specified in configuration. Defaulting to shadow paging\n");
431 info->shdw_pg_mode = SHADOW_PAGING;
435 if (v3_cfg_val(pg_tree, "large_pages") != NULL) {
436 if (strcasecmp(v3_cfg_val(pg_tree, "large_pages"), "true") == 0) {
437 info->use_large_pages = 1;
438 PrintDebug(info->vm_info, info, "Use of large pages in memory virtualization enabled.\n");
444 static int pre_config_core(struct guest_info * info, v3_cfg_tree_t * core_cfg) {
445 if (determine_paging_mode(info, core_cfg) != 0) {
449 if (v3_init_core(info) == -1) {
450 PrintError(info->vm_info, info, "Error Initializing Core\n");
454 #ifdef V3_CONFIG_MULTIBOOT
455 if (v3_init_multiboot_core(info)) {
456 PrintError(info->vm_info, info, "Error Initializing Multiboot Core\n");
461 if (v3_init_hvm_core(info)) {
462 PrintError(info->vm_info, info, "Error Initializing HVM Core\n");
467 if (info->vm_info->vm_class == V3_PC_VM) {
468 if (pre_config_pc_core(info, core_cfg) == -1) {
469 PrintError(info->vm_info, info, "PC Post configuration failure\n");
473 PrintError(info->vm_info, info, "Invalid VM Class\n");
482 static int post_config_vm(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
486 // Configure the memory map for the guest
487 if (setup_memory_map(vm, cfg) == -1) {
488 PrintError(vm, VCORE_NONE,"Setting up guest memory map failed...\n");
493 if (vm->vm_class == V3_PC_VM) {
494 if (post_config_pc(vm, cfg) == -1) {
495 PrintError(vm, VCORE_NONE,"PC Post configuration failure\n");
499 PrintError(vm, VCORE_NONE,"Invalid VM Class\n");
505 // Initialize fw_cfg state for VMM<->VM SEABIOS communication
506 if (v3_fw_cfg_init(vm) == -1) {
507 PrintError(vm, VCORE_NONE, "Error initializing Firmware Config (fw_cfg) state\n");
512 * Initialize configured devices
514 if (setup_devices(vm, cfg) == -1) {
515 PrintError(vm, VCORE_NONE,"Failed to setup devices\n");
520 // v3_print_io_map(info);
521 v3_print_msr_map(vm);
527 * Initialize configured extensions
529 if (setup_extensions(vm, cfg) == -1) {
530 PrintError(vm, VCORE_NONE,"Failed to setup extensions\n");
534 if (v3_setup_performance_tuning(vm, cfg) == -1) {
535 PrintError(vm, VCORE_NONE,"Failed to configure performance tuning parameters\n");
540 vm->run_state = VM_STOPPED;
547 static int post_config_core(struct guest_info * info, v3_cfg_tree_t * cfg) {
550 if (v3_init_core_extensions(info) == -1) {
551 PrintError(info->vm_info, info, "Error intializing extension core states\n");
555 if (info->vm_info->vm_class == V3_PC_VM) {
556 if (post_config_pc_core(info, cfg) == -1) {
557 PrintError(info->vm_info, info, "PC Post configuration failure\n");
561 PrintError(info->vm_info, info, "Invalid VM Class\n");
571 static struct v3_vm_info * allocate_guest(int num_cores) {
572 int guest_state_size = sizeof(struct v3_vm_info) + (sizeof(struct guest_info) * num_cores);
573 struct v3_vm_info * vm = V3_Malloc(guest_state_size);
576 PrintError(VM_NONE, VCORE_NONE, "Unable to allocate space for guest data structures\n");
582 memset(vm, 0, guest_state_size);
584 vm->num_cores = num_cores;
586 for (i = 0; i < num_cores; i++) {
587 vm->cores[i].core_run_state = CORE_INVALID;
590 vm->run_state = VM_INVALID;
601 struct v3_vm_info * v3_config_guest(void * cfg_blob, void * priv_data) {
602 extern v3_cpu_arch_t v3_mach_type;
603 struct v3_config * cfg_data = NULL;
604 struct v3_vm_info * vm = NULL;
607 v3_cfg_tree_t * cores_cfg = NULL;
608 v3_cfg_tree_t * per_core_cfg = NULL;
611 if (v3_mach_type == V3_INVALID_CPU) {
612 PrintError(VM_NONE, VCORE_NONE, "Configuring guest on invalid CPU\n");
616 cfg_data = parse_config(cfg_blob);
620 PrintError(VM_NONE, VCORE_NONE, "Could not parse configuration\n");
625 cores_cfg = v3_cfg_subtree(cfg_data->cfg, "cores");
628 PrintError(VM_NONE, VCORE_NONE, "Could not find core configuration (new config format required)\n");
632 num_cores = atoi(v3_cfg_val(cores_cfg, "count"));
633 if (num_cores == 0) {
634 PrintError(VM_NONE, VCORE_NONE, "No cores specified in configuration\n");
638 V3_Print(VM_NONE, VCORE_NONE, "Configuring %d cores\n", num_cores);
640 vm = allocate_guest(num_cores);
643 PrintError(VM_NONE, VCORE_NONE, "Could not allocate %d core guest\n", num_cores);
647 #ifdef V3_CONFIG_CACHEPART
648 // Need to initialize cache management and resource control
649 // as early as possible so that allocations are done accordingly
650 if (v3_init_cachepart_vm(vm,cfg_data->cfg)) {
651 PrintError(VM_NONE, VCORE_NONE, "Could not initialize cache partioning\n");
657 vm->host_priv_data = priv_data;
659 vm->cfg_data = cfg_data;
661 V3_Print(vm, VCORE_NONE, "Preconfiguration\n");
663 if (pre_config_vm(vm, vm->cfg_data->cfg) == -1) {
664 PrintError(vm, VCORE_NONE, "Error in preconfiguration, attempting to free\n");
665 vm->run_state=VM_ERROR;
670 V3_Print(vm, VCORE_NONE, "Per core configuration\n");
671 per_core_cfg = v3_cfg_subtree(cores_cfg, "core");
673 // per core configuration
674 for (i = 0; i < vm->num_cores; i++) {
675 struct guest_info * info = &(vm->cores[i]);
679 info->core_cfg_data = per_core_cfg;
681 if (pre_config_core(info, per_core_cfg) == -1) {
682 PrintError(vm, VCORE_NONE, "Error in core %d preconfiguration, attempting to free guest\n", i);
683 vm->run_state=VM_ERROR;
689 per_core_cfg = v3_cfg_next_branch(per_core_cfg);
693 V3_Print(vm, VCORE_NONE, "Post Configuration\n");
695 if (post_config_vm(vm, vm->cfg_data->cfg) == -1) {
696 PrintError(vm, VCORE_NONE, "Error in postconfiguration, attempting to free guest\n");
697 vm->run_state=VM_ERROR;
703 per_core_cfg = v3_cfg_subtree(cores_cfg, "core");
705 // per core configuration
706 for (i = 0; i < vm->num_cores; i++) {
707 struct guest_info * info = &(vm->cores[i]);
709 post_config_core(info, per_core_cfg);
711 per_core_cfg = v3_cfg_next_branch(per_core_cfg);
714 V3_Print(vm, VCORE_NONE, "Configuration successfull\n");
721 int v3_free_config(struct v3_vm_info * vm) {
723 v3_free_htable(vm->cfg_data->file_table, 1, 0);
725 v3_xml_free(vm->cfg_data->cfg);
727 V3_Free(vm->cfg_data);
734 static int setup_memory_map(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
735 v3_cfg_tree_t * mem_region = v3_cfg_subtree(v3_cfg_subtree(cfg, "memmap"), "region");
738 addr_t start_addr = atox(v3_cfg_val(mem_region, "start"));
739 addr_t end_addr = atox(v3_cfg_val(mem_region, "end"));
740 addr_t host_addr = atox(v3_cfg_val(mem_region, "host_addr"));
743 if (v3_add_shadow_mem(vm, V3_MEM_CORE_ANY, start_addr, end_addr, host_addr) == -1) {
744 PrintError(vm, VCORE_NONE,"Could not map memory region: %p-%p => %p\n",
745 (void *)start_addr, (void *)end_addr, (void *)host_addr);
749 mem_region = v3_cfg_next_branch(mem_region);
756 static int setup_extensions(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
757 v3_cfg_tree_t * extension = v3_cfg_subtree(v3_cfg_subtree(cfg, "extensions"), "extension");
760 char * ext_name = v3_cfg_val(extension, "name");
763 PrintError(vm, VCORE_NONE, "Extension has no name\n");
767 V3_Print(vm, VCORE_NONE, "Configuring extension %s\n", ext_name);
769 if (v3_add_extension(vm, ext_name, extension) == -1) {
770 PrintError(vm, VCORE_NONE, "Error adding extension %s\n", ext_name);
774 extension = v3_cfg_next_branch(extension);
781 static int setup_devices(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
782 v3_cfg_tree_t * device = v3_cfg_subtree(v3_cfg_subtree(cfg, "devices"), "device");
786 char * dev_class = v3_cfg_val(device, "class");
788 V3_Print(vm, VCORE_NONE, "configuring device %s\n", dev_class);
790 if (v3_create_device(vm, dev_class, device) == -1) {
791 PrintError(vm, VCORE_NONE, "Error creating device %s\n", dev_class);
795 device = v3_cfg_next_branch(device);
798 v3_print_dev_mgr(vm);