From: Kyle Hale Date: Fri, 29 Apr 2011 22:16:39 +0000 (-0500) Subject: Merge branch 'devel' of /home-remote/palacios/palacios into devel X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=f08319bfe39e47f1d2e003b48087affa7190c997;hp=603e4c1a451138080ded3d4e3cd3b8716741db89;p=palacios-OLD.git Merge branch 'devel' of /home-remote/palacios/palacios into devel Conflicts: Kconfig merged --- diff --git a/Kconfig b/Kconfig index d312c0b..4241627 100644 --- a/Kconfig +++ b/Kconfig @@ -128,7 +128,7 @@ config MAX_CPUS endmenu source "palacios/src/interfaces/Kconfig" - +source "palacios/src/extensions/Kconfig" config TELEMETRY bool "Enable VMM telemetry support" diff --git a/Makefile b/Makefile index 46227ae..ed13298 100644 --- a/Makefile +++ b/Makefile @@ -435,6 +435,7 @@ core-y := palacios/src/palacios/ libs-y := palacios/lib/$(ARCH)/ devices-y := palacios/src/devices/ interfaces-y := palacios/src/interfaces/ +extensions-y := palacios/src/extensions/ modules-y := modules/ @@ -529,7 +530,7 @@ export INSTALL_PATH ?= /build palacios-dirs := $(patsubst %/,%,$(filter %/, \ - $(core-y) $(devices-y) $(interfaces-y) $(libs-y)) $(modules-y)) + $(core-y) $(devices-y) $(interfaces-y) $(extensions-y) $(libs-y)) $(modules-y)) @@ -540,13 +541,14 @@ palacios-dirs := $(patsubst %/,%,$(filter %/, \ palacios-cleandirs := $(sort $(palacios-dirs) $(patsubst %/,%,$(filter %/, \ $(core-n) $(core-) $(devices-n) $(devices-) \ - $(interfaces-n) $(interfaces-) $(modules-n) $(modules-)))) + $(interfaces-n) $(interfaces-) $(extensions-n) $(extensions-) $(modules-n) $(modules-)))) core-y := $(patsubst %/, %/built-in.o, $(core-y)) devices-y := $(patsubst %/, %/built-in.o, $(devices-y)) interfaces-y := $(patsubst %/, %/built-in.o, $(interfaces-y)) +extensions-y := $(patsubst %/, %/built-in.o, $(extensions-y)) libs-y := $(patsubst %/, %/built-in.o, $(libs-y)) modules-y := $(patsubst %/, %/built-in.o, $(modules-y)) #lnxmod-y := $(patsubst %/, %/built-in.o, $(lnxmod-y)) @@ -573,7 +575,7 @@ modules-y := $(patsubst %/, %/built-in.o, $(modules-y)) -palacios := $(core-y) $(devices-y) $(interfaces-y) $(libs-y) $(modules-y) +palacios := $(core-y) $(devices-y) $(interfaces-y) $(extensions-y) $(libs-y) $(modules-y) # Rule to link palacios - also used during CONFIG_CONFIGKALLSYMS diff --git a/linux_module/palacios-debugfs.c b/linux_module/palacios-debugfs.c new file mode 100644 index 0000000..b35120e --- /dev/null +++ b/linux_module/palacios-debugfs.c @@ -0,0 +1,79 @@ +/* + * DebugFS interface + * (c) Jack Lange, 2011 + */ + +#include +#include +#include +#include + +#include + +#include "palacios.h" + +struct dentry * v3_dir = NULL; + + +int palacios_init_debugfs( void ) { + + v3_dir = debugfs_create_dir("v3vee", NULL); + + if (IS_ERR(v3_dir)) { + printk("Error creating v3vee debugfs directory\n"); + return -1; + } + + return 0; +} + + +int palacios_deinit_debugfs( void ) { + debugfs_remove(v3_dir); + return 0; +} + + + +static int dfs_register_tree(struct dentry * dir, v3_inspect_node_t * root) { + v3_inspect_node_t * tmp_node = v3_inspection_first_child(root); + struct v3_inspection_value tmp_value; + + while (tmp_node) { + tmp_value = v3_inspection_value(tmp_node); + + if (tmp_value.size == 0) { + struct dentry * new_dir = debugfs_create_dir(tmp_value.name, dir); + dfs_register_tree(new_dir, tmp_node); + } else if (tmp_value.size == 1) { + debugfs_create_u8(tmp_value.name, 0644, dir, (u8 *)tmp_value.value); + } else if (tmp_value.size == 2) { + debugfs_create_u16(tmp_value.name, 0644, dir, (u16 *)tmp_value.value); + } else if (tmp_value.size == 4) { + debugfs_create_u32(tmp_value.name, 0644, dir, (u32 *)tmp_value.value); + } else if (tmp_value.size == 8) { + debugfs_create_u64(tmp_value.name, 0644, dir, (u64 *)tmp_value.value); + } else { + + // buffer + } + + tmp_node = v3_inspection_node_next(tmp_node); + + } + + return 0; +} + + +int dfs_register_vm(struct v3_guest * guest) { + v3_inspect_node_t * root = v3_get_inspection_root(guest->v3_ctx); + + if (root == NULL) { + printk("No inspection root found\n"); + return -1; + } + + dfs_register_tree(v3_dir, root); + return 0; +} diff --git a/linux_module/palacios-debugfs.h b/linux_module/palacios-debugfs.h new file mode 100644 index 0000000..1caad52 --- /dev/null +++ b/linux_module/palacios-debugfs.h @@ -0,0 +1,14 @@ +/* + * DebugFS interface + * (c) Jack Lange, 2011 + */ + +#include "palacios.h" + +int palacios_init_debugfs( void ); +int palacios_deinit_debugfs( void ); + + + +int dfs_register_vm(struct v3_guest * guest); + diff --git a/palacios/include/palacios/vmm_inspector.h b/palacios/include/interfaces/inspector.h similarity index 97% rename from palacios/include/palacios/vmm_inspector.h rename to palacios/include/interfaces/inspector.h index ee0f70d..396e490 100644 --- a/palacios/include/palacios/vmm_inspector.h +++ b/palacios/include/interfaces/inspector.h @@ -36,12 +36,6 @@ typedef void v3_inspect_node_t; #define READ_ONLY 2 #define HOOKED 4 -struct v3_inspector_state { - struct v3_mtree state_tree; - -}; - - int v3_init_inspector(struct v3_vm_info * vm); int v3_init_inspector_core(struct guest_info * core); diff --git a/palacios/include/interfaces/vmm_host_dev.h b/palacios/include/interfaces/vmm_host_dev.h index 138839f..2b893b5 100644 --- a/palacios/include/interfaces/vmm_host_dev.h +++ b/palacios/include/interfaces/vmm_host_dev.h @@ -23,7 +23,6 @@ #include - /* The purpose of this interface is to make it possible to implement @@ -78,9 +77,12 @@ typedef enum { V3_BUS_CLASS_DIRECT, V3_BUS_CLASS_PCI } v3_bus_class_t; #ifdef __V3VEE__ +struct v3_vm_info; + v3_host_dev_t v3_host_dev_open(char *impl, v3_bus_class_t bus, - v3_guest_dev_t gdev); + v3_guest_dev_t gdev, + struct v3_vm_info *vm); int v3_host_dev_close(v3_host_dev_t hdev); @@ -106,13 +108,13 @@ uint64_t v3_host_dev_write_mem(v3_host_dev_t hostdev, int v3_host_dev_ack_irq(v3_host_dev_t hostdev, uint8_t irq); -uint64_t v3_host_dev_config_read(v3_host_dev_t hostdev, +uint64_t v3_host_dev_read_config(v3_host_dev_t hostdev, uint64_t offset, void *dest, uint64_t len); -uint64_t v3_host_dev_config_write(v3_host_dev_t hostdev, - uint64_t offset, +uint64_t v3_host_dev_write_config(v3_host_dev_t hostdev, + uint64_t offset, void *src, uint64_t len); @@ -124,10 +126,12 @@ struct v3_host_dev_hooks { // this device is attached to and an opaque pointer back to the // guest device. It returns an opaque representation of // the host device it has attached to, with zero indicating - // failure + // failure. The host_priv_data arguement supplies to the + // host the pointer that the VM was originally registered with v3_host_dev_t (*open)(char *impl, v3_bus_class_t bus, - v3_guest_dev_t gdev); + v3_guest_dev_t gdev, + void *host_priv_data); int (*close)(v3_host_dev_t hdev); @@ -150,12 +154,12 @@ struct v3_host_dev_hooks { // fail, returning != len // Callee gets the host dev id, and the guest physical address uint64_t (*read_mem)(v3_host_dev_t hostdev, - addr_t gpa, + void * gpa, void *dest, uint64_t len); uint64_t (*write_mem)(v3_host_dev_t hostdev, - addr_t gpa, + void * gpa, void *src, uint64_t len); @@ -202,19 +206,16 @@ int v3_host_dev_raise_irq(v3_host_dev_t hostdev, /* These functions allow the host to read and write the guest memory by physical address, for example to implement DMA - - These functions are incremental - that is, they can return - a smaller amount than requested */ uint64_t v3_host_dev_read_guest_mem(v3_host_dev_t hostdev, v3_guest_dev_t guest_dev, - addr_t gpa, + void * gpa, void *dest, uint64_t len); uint64_t v3_host_dev_write_guest_mem(v3_host_dev_t hostdev, v3_guest_dev_t guest_dev, - addr_t gpa, + void * gpa, void *src, uint64_t len); diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h index 4b2728f..5d4527f 100644 --- a/palacios/include/palacios/vm_guest.h +++ b/palacios/include/palacios/vm_guest.h @@ -50,9 +50,6 @@ struct v3_sym_core_state; #endif -#ifdef CONFIG_INSPECTOR -#include -#endif #include @@ -187,9 +184,6 @@ struct v3_vm_info { struct v3_telemetry_state telemetry; #endif -#ifdef CONFIG_INSPECTOR - struct v3_inspector_state inspector; -#endif uint64_t yield_cycle_period; diff --git a/palacios/include/palacios/vmcs.h b/palacios/include/palacios/vmcs.h index 9129d1b..c80a23c 100644 --- a/palacios/include/palacios/vmcs.h +++ b/palacios/include/palacios/vmcs.h @@ -39,6 +39,15 @@ +struct vmcs_field_encoding { + uint8_t access_type : 1; /* 0 = full, 1 = high, (for accessing 64 bit fields on 32bit CPU) */ + uint16_t index : 9; + uint8_t type : 2; /* 0=ctrl, 1=read-only, 2 = guest state, 3 = host state */ + uint8_t rsvd1 : 1; /* MBZ */ + uint8_t width : 2; /* 0 = 16bit, 1 = 64bit, 2 = 32bit, 3 = natural width */ + uint32_t rsvd2 : 17; +} __attribute__((packed)); + typedef enum { VMCS_GUEST_ES_SELECTOR = 0x00000800, diff --git a/palacios/include/palacios/vmm.h b/palacios/include/palacios/vmm.h index ae4421c..5cb1db1 100644 --- a/palacios/include/palacios/vmm.h +++ b/palacios/include/palacios/vmm.h @@ -184,13 +184,33 @@ struct guest_info; #ifdef CONFIG_MULTITHREAD_OS -#define V3_CREATE_THREAD(fn, arg, name) \ - do { \ +#define V3_CREATE_THREAD(fn, arg, name) ({ \ + void * thread = NULL; \ extern struct v3_os_hooks * os_hooks; \ if ((os_hooks) && (os_hooks)->start_kernel_thread) { \ - (os_hooks)->start_kernel_thread(fn, arg, name); \ + thread = (os_hooks)->start_kernel_thread(fn, arg, name); \ } \ - } while (0) + thread; \ + }) + + +#define V3_THREAD_SLEEP() \ + do{ \ + extern struct v3_os_hooks * os_hooks; \ + if ((os_hooks) && (os_hooks)->kernel_thread_sleep) { \ + (os_hooks)->kernel_thread_sleep(); \ + } \ + }while(0) + + +#define V3_THREAD_WAKEUP(thread) \ + do{ \ + extern struct v3_os_hooks * os_hooks; \ + if ((os_hooks) && (os_hooks)->kernel_thread_wakeup) { \ + (os_hooks)->kernel_thread_wakeup(thread); \ + } \ + }while(0) + #define V3_Call_On_CPU(cpu, fn, arg) \ @@ -298,7 +318,9 @@ struct v3_os_hooks { - void (*start_kernel_thread)(int (*fn)(void * arg), void * arg, char * thread_name); + void * (*start_kernel_thread)(int (*fn)(void * arg), void * arg, char * thread_name); + void (*kernel_thread_sleep)(void); + void (*kernel_thread_wakeup)(void * thread); void (*interrupt_cpu)(struct v3_vm_info * vm, int logical_cpu, int vector); void (*call_on_cpu)(int logical_cpu, void (*fn)(void * arg), void * arg); void * (*start_thread_on_cpu)(int cpu_id, int (*fn)(void * arg), void * arg, char * thread_name); diff --git a/palacios/include/palacios/vmm_muxer.h b/palacios/include/palacios/vmm_barrier.h similarity index 55% copy from palacios/include/palacios/vmm_muxer.h copy to palacios/include/palacios/vmm_barrier.h index 1c50789..4513c09 100644 --- a/palacios/include/palacios/vmm_muxer.h +++ b/palacios/include/palacios/vmm_barrier.h @@ -1,4 +1,4 @@ -/* +/* * This file is part of the Palacios Virtual Machine Monitor developed * by the V3VEE Project with funding from the United States National * Science Foundation and the Department of Energy. @@ -7,31 +7,34 @@ * and the University of New Mexico. You can find out more at * http://www.v3vee.org * - * Copyright (c) 2008, Jack Lange - * Copyright (c) 2008, The V3VEE Project + * Copyright (c) 2011, Jack Lange + * Copyright (c) 2011, The V3VEE Project * All rights reserved. * - * Author: Jack Lange + * Author: Jack Lange * * This is free software. You are permitted to use, * redistribute, and modify it as specified in the file "V3VEE_LICENSE". */ -#ifndef __VMM_MUXER_H__ -#define __VMM_MUXER_H__ +#ifndef __VMM_BARRIER_H__ +#define __VMM_BARRIER_H__ #ifdef __V3VEE__ -struct v3_vm_info; +#include +struct v3_barrier { + + int active; // If 1, barrier is active, everyone must wait + // If 0, barrier is clear, can proceed -struct v3_vm_info * v3_get_foreground_vm(); -void v3_set_foreground_vm(struct v3_vm_info * vm); + v3_lock_t lock; +}; -int v3_add_mux_notification(int (*focus_change)(struct v3_vm_info * old_vm, struct v3_vm_info * new_vm)); #endif diff --git a/palacios/include/palacios/vmm_cpuid.h b/palacios/include/palacios/vmm_cpuid.h index 30467fd..88d48bd 100644 --- a/palacios/include/palacios/vmm_cpuid.h +++ b/palacios/include/palacios/vmm_cpuid.h @@ -54,6 +54,12 @@ struct v3_cpuid_map { void v3_print_cpuid_map(struct v3_vm_info * vm); +int v3_cpuid_add_fields(struct v3_vm_info * vm, uint32_t cpuid, + uint32_t rax_mask, uint32_t rax, + uint32_t rbx_mask, uint32_t rbx, + uint32_t rcx_mask, uint32_t rcx, + uint32_t rdx_mask, uint32_t rdx); + int v3_hook_cpuid(struct v3_vm_info * vm, uint32_t cpuid, int (*hook_fn)(struct guest_info * info, uint32_t cpuid, \ uint32_t * eax, uint32_t * ebx, \ diff --git a/palacios/include/palacios/vmm_dev_mgr.h b/palacios/include/palacios/vmm_dev_mgr.h index e789207..c9999bd 100644 --- a/palacios/include/palacios/vmm_dev_mgr.h +++ b/palacios/include/palacios/vmm_dev_mgr.h @@ -179,11 +179,10 @@ struct v3_dev_blk_ops { struct v3_dev_net_ops { /* Backend implemented functions */ - int (*send)(uint8_t * buf, uint32_t count, void * private_data); + int (*send)(uint8_t * buf, uint32_t len, int synchronize, void * private_data); /* Frontend implemented functions */ - int (*recv)(uint8_t * buf, uint32_t count, void * frnt_data); - void (*poll)(struct v3_vm_info * vm, int budget, void * frnt_data); + int (*recv)(uint8_t * buf, uint32_t len, void * frnt_data); /* This is ugly... */ void * frontend_data; diff --git a/palacios/include/palacios/vmm_ethernet.h b/palacios/include/palacios/vmm_ethernet.h index 3794d77..2b9319b 100644 --- a/palacios/include/palacios/vmm_ethernet.h +++ b/palacios/include/palacios/vmm_ethernet.h @@ -25,21 +25,40 @@ #define ETHERNET_PACKET_LEN (ETHERNET_HEADER_LEN + ETHERNET_MTU) #define ETH_ALEN 6 +#define MIN_MTU 68 +//#define MAX_MTU 65535 +#define MAX_MTU 9000 + +#define MAX_PACKET_LEN (ETHERNET_HEADER_LEN + MAX_MTU) + + +extern int v3_net_debug; #ifdef __V3VEE__ #include +#define V3_Net_Print(level, fmt, args...) \ + do { \ + if(level <= v3_net_debug) { \ + extern struct v3_os_hooks * os_hooks; \ + if ((os_hooks) && (os_hooks)->print) { \ + (os_hooks)->print((fmt), ##args); \ + } \ + } \ + } while (0) + struct nic_statistics { - uint32_t tx_pkts; + uint64_t tx_pkts; uint64_t tx_bytes; - uint32_t tx_dropped; + uint64_t tx_dropped; - uint32_t rx_pkts; + uint64_t rx_pkts; uint64_t rx_bytes; - uint32_t rx_dropped; + uint64_t rx_dropped; - uint32_t interrupts; + uint32_t tx_interrupts; + uint32_t rx_interrupts; }; static inline int is_multicast_ethaddr(const uint8_t * addr) diff --git a/palacios/include/palacios/vmm_extensions.h b/palacios/include/palacios/vmm_extensions.h index 0135f88..fdddb69 100644 --- a/palacios/include/palacios/vmm_extensions.h +++ b/palacios/include/palacios/vmm_extensions.h @@ -23,8 +23,8 @@ #ifdef __V3VEE__ #include -#include #include +#include struct v3_vm_info; @@ -41,10 +41,10 @@ struct v3_extension_impl { char * name; int (*init)(struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data); int (*deinit)(struct v3_vm_info * vm, void * priv_data); - int (*core_init)(struct guest_info * core); - int (*core_deinit)(struct guest_info * core); - int (*on_entry)(struct guest_info * core); - int (*on_exit)(struct guest_info * core); + int (*core_init)(struct guest_info * core, void * priv_data); + int (*core_deinit)(struct guest_info * core, void * priv_data); + int (*on_entry)(struct guest_info * core, void * priv_data); + int (*on_exit)(struct guest_info * core, void * priv_data); }; struct v3_extension { @@ -64,6 +64,9 @@ int V3_deinit_extensions(); int v3_init_ext_manager(struct v3_vm_info * vm); int v3_add_extension(struct v3_vm_info * vm, const char * name, v3_cfg_tree_t * cfg); +int v3_init_core_extensions(struct guest_info * core); + +void * v3_get_extension_state(struct v3_vm_info * vm, const char * name); #define register_extension(ext) \ diff --git a/palacios/include/palacios/vmm_instr_emulator.h b/palacios/include/palacios/vmm_instr_emulator.h index 84b07a4..7559f05 100644 --- a/palacios/include/palacios/vmm_instr_emulator.h +++ b/palacios/include/palacios/vmm_instr_emulator.h @@ -23,7 +23,7 @@ #define MAKE_1OP_8FLAGS_INST(iname) static inline void iname##8(addr_t * dst, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushf; " \ @@ -42,7 +42,7 @@ #define MAKE_1OP_16FLAGS_INST(iname) static inline void iname##16(addr_t * dst, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushf; " \ @@ -61,7 +61,7 @@ #define MAKE_1OP_32FLAGS_INST(iname) static inline void iname##32(addr_t * dst, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushf; " \ @@ -80,7 +80,7 @@ #define MAKE_1OP_64FLAGS_INST(iname) static inline void iname##64(addr_t * dst, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushfq; " \ @@ -134,7 +134,7 @@ #define MAKE_2OP_64FLAGS_INST(iname) static inline void iname##64(addr_t * dst, addr_t * src, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushfq\r\n" \ @@ -156,7 +156,7 @@ #define MAKE_2OP_32FLAGS_INST(iname) static inline void iname##32(addr_t * dst, addr_t * src, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushf; " \ @@ -175,7 +175,7 @@ #define MAKE_2OP_16FLAGS_INST(iname) static inline void iname##16(addr_t * dst, addr_t * src, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushf; " \ @@ -193,7 +193,7 @@ #define MAKE_2OP_8FLAGS_INST(iname) static inline void iname##8(addr_t * dst, addr_t * src, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushf; " \ @@ -217,7 +217,7 @@ addr_t * src, \ addr_t * ecx, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushfq; " \ @@ -239,7 +239,7 @@ addr_t * src, \ addr_t * ecx, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushf; " \ @@ -260,7 +260,7 @@ addr_t * src, \ addr_t * ecx, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushf; " \ @@ -283,7 +283,7 @@ addr_t * src, \ addr_t * ecx, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushf; " \ @@ -307,7 +307,7 @@ addr_t * src, \ addr_t * ecx, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushfq; " \ @@ -330,7 +330,7 @@ addr_t * src, \ addr_t * ecx, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushf; " \ @@ -351,7 +351,7 @@ addr_t * src, \ addr_t * ecx, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushf; " \ @@ -374,7 +374,7 @@ addr_t * src, \ addr_t * ecx, addr_t * flags) { \ /* Some of the flags values are not copied out in a pushf, we save them here */ \ - addr_t flags_rsvd = *flags & ~0xfffe7fff; \ + addr_t flags_rsvd = *flags & ~0xfffc7fff; \ \ asm volatile ( \ "pushf; " \ diff --git a/palacios/include/palacios/vmm_queue.h b/palacios/include/palacios/vmm_queue.h index e88329f..811f19d 100644 --- a/palacios/include/palacios/vmm_queue.h +++ b/palacios/include/palacios/vmm_queue.h @@ -28,30 +28,26 @@ #include -/* IMPORTANT: - * This implementation currently does no locking, and as such is not - * SMP/thread/interrupt safe - */ -struct queue_entry { +struct v3_queue_entry { addr_t entry; struct list_head entry_list; }; -struct gen_queue { +struct v3_queue { uint_t num_entries; struct list_head entries; v3_lock_t lock; }; -struct gen_queue * v3_create_queue(); -void v3_init_queue(struct gen_queue * queue); +struct v3_queue * v3_create_queue(); +void v3_init_queue(struct v3_queue * queue); -void v3_enqueue(struct gen_queue * queue, addr_t entry); -addr_t v3_dequeue(struct gen_queue * queue); +void v3_enqueue(struct v3_queue * queue, addr_t entry); +addr_t v3_dequeue(struct v3_queue * queue); diff --git a/palacios/include/palacios/vmm_vnet.h b/palacios/include/palacios/vmm_vnet.h index 1750fff..0f8c793 100644 --- a/palacios/include/palacios/vmm_vnet.h +++ b/palacios/include/palacios/vmm_vnet.h @@ -19,8 +19,8 @@ * redistribute, and modify it as specified in the file "V3VEE_LICENSE". */ -#ifndef __VNET_H__ -#define __VNET_H__ +#ifndef __VNET_CORE_H__ +#define __VNET_CORE_H__ #include #include @@ -38,7 +38,8 @@ #define VNET_HASH_SIZE 17 -//routing table entry +extern int v3_vnet_debug; + struct v3_vnet_route { uint8_t src_mac[ETH_ALEN]; uint8_t dst_mac[ETH_ALEN]; @@ -100,7 +101,7 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm, uint8_t type, void * priv_data); int v3_vnet_add_route(struct v3_vnet_route route); -int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data); +int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data, int synchronize); int v3_vnet_find_dev(uint8_t * mac); int v3_vnet_stat(struct vnet_stat * stats); @@ -110,19 +111,17 @@ struct v3_vnet_dev_ops { int (*input)(struct v3_vm_info * vm, struct v3_vnet_pkt * pkt, void * dev_data); - void (*poll) (struct v3_vm_info * vm, int budget, void * dev_data); }; int v3_init_vnet(void); void v3_deinit_vnet(void); -void v3_vnet_do_poll(struct v3_vm_info * vm); - int v3_vnet_add_dev(struct v3_vm_info * info, uint8_t * mac, struct v3_vnet_dev_ops * ops, void * priv_data); int v3_vnet_del_dev(int dev_id); + #endif #endif diff --git a/palacios/include/palacios/vmx.h b/palacios/include/palacios/vmx.h index fd5e6ce..7a0a039 100644 --- a/palacios/include/palacios/vmx.h +++ b/palacios/include/palacios/vmx.h @@ -30,31 +30,14 @@ #include #include -// Intel VMX Specific MSRs -#define VMX_FEATURE_CONTROL_MSR 0x0000003a -#define VMX_BASIC_MSR 0x00000480 -#define VMX_PINBASED_CTLS_MSR 0x00000481 -#define VMX_PROCBASED_CTLS_MSR 0x00000482 -#define VMX_EXIT_CTLS_MSR 0x00000483 -#define VMX_ENTRY_CTLS_MSR 0x00000484 -#define VMX_MISC_MSR 0x00000485 -#define VMX_CR0_FIXED0_MSR 0x00000486 -#define VMX_CR0_FIXED1_MSR 0x00000487 -#define VMX_CR4_FIXED0_MSR 0x00000488 -#define VMX_CR4_FIXED1_MSR 0x00000489 -#define VMX_VMCS_ENUM_MSR 0x0000048A #define VMX_SUCCESS 0 #define VMX_FAIL_INVALID 1 #define VMX_FAIL_VALID 2 #define VMM_ERROR 3 -#define FEATURE_CONTROL_LOCK 0x00000001 -#define FEATURE_CONTROL_VMXON 0x00000004 -#define FEATURE_CONTROL_VALID ( FEATURE_CONTROL_LOCK | FEATURE_CONTROL_VMXON ) -#define CPUID_1_ECX_VTXFLAG 0x00000020 struct vmx_pin_ctrls { @@ -168,15 +151,6 @@ struct vmx_entry_ctrls { } __attribute__((packed)); } __attribute__((packed)); -struct vmx_basic_msr { - uint32_t revision; - uint_t regionSize : 13; - uint_t rsvd1 : 4; // Always 0 - uint_t physWidth : 1; - uint_t smm : 1; // Always 1 - uint_t memType : 4; - uint_t rsvd2 : 10; // Always 0 -} __attribute__((packed)); typedef enum { VMXASSIST_DISABLED, diff --git a/palacios/include/palacios/vmx_ept.h b/palacios/include/palacios/vmx_ept.h new file mode 100644 index 0000000..55cb363 --- /dev/null +++ b/palacios/include/palacios/vmx_ept.h @@ -0,0 +1,124 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2011, Jack Lange + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + + +#ifndef __VMX_EPT_H__ +#define __VMX_EPT_H__ + + +#ifdef __V3VEE__ + +/* The actual format of these data structures is specified as being machine + dependent. Thus the lengths of the base address fields are defined as variable. + To be safe we assume the maximum(?) size fields +*/ + + +typedef struct vmx_eptp { + uint8_t psmt : 3; + uint8_t pwl1 : 3; + uint8_t rsvd1 : 6; + uint64_t pml_base_addr : 39; + uint16_t rsvd2 : 13; +} __attribute__((packed)) vmx_eptp_t; + + +typedef struct vmx_pml4 { + uint8_t read : 1; + uint8_t write : 1; + uint8_t exec : 1; + uint8_t rsvd1 : 5; + uint8_t ignore1 : 4; + uint64_t pdp_base_addr : 39; + uint8_t rsvd2 : 1; + uint32_t ignore2 : 12; +} __attribute__((packed)) vmx_pml4_t; + + +typedef struct vmx_pdp_1GB { + uint8_t read : 1; + uint8_t write : 1; + uint8_t exec : 1; + uint8_t mt : 3; + uint8_t ipat : 1; + uint8_t large_page : 1; + uint8_t ignore1 : 4; + uint32_t rsvd1 : 18; + uint32_t page_base_addr : 21; + uint8_t rsvd2 : 1; + uint32_t ignore2 : 12; +} __attribute__((packed)) vmx_pdp_1GB_t; + +typedef struct vmx_pdp { + uint8_t read : 1; + uint8_t write : 1; + uint8_t exec : 1; + uint8_t rsvd1 : 4; + uint8_t large_page : 1; + uint8_t ignore1 : 4; + uint32_t page_base_addr : 39; + uint8_t rsvd2 : 1; + uint32_t ignore2 : 12; +} __attribute__((packed)) vmx_pdp_t; + + +typedef struct vmx_pde_2MB { + uint8_t read : 1; + uint8_t write : 1; + uint8_t exec : 1; + uint8_t mt : 3; + uint8_t ipat : 1; + uint8_t large_page : 1; + uint8_t ignore1 : 4; + uint32_t rsvd1 : 9; + uint32_t page_base_addr : 30; + uint8_t rsvd2 : 1; + uint32_t ignore2 : 12; +} __attribute__((packed)) vmx_pde_2MB_t; + + +typedef struct vmx_pde { + uint8_t read : 1; + uint8_t write : 1; + uint8_t exec : 1; + uint8_t rsvd1 : 4; + uint8_t large_page : 1; + uint8_t ignore1 : 4; + uint32_t page_base_addr : 39; + uint8_t rsvd2 : 1; + uint32_t ignore2 : 12; +} __attribute__((packed)) vmx_pde_t; + + + +typedef struct vmx_pte { + uint8_t read : 1; + uint8_t write : 1; + uint8_t exec : 1; + uint8_t mt : 3; + uint8_t ipat : 1; + uint8_t ignore1 : 5; + uint32_t page_base_addr : 39; + uint8_t rsvd2 : 1; + uint32_t ignore2 : 12; +} __attribute__((packed)) vmx_pte_t; + +#endif + +#endif + diff --git a/palacios/include/palacios/vmx_hw_info.h b/palacios/include/palacios/vmx_hw_info.h new file mode 100644 index 0000000..e130545 --- /dev/null +++ b/palacios/include/palacios/vmx_hw_info.h @@ -0,0 +1,169 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2011, Jack Lange + * Copyright (c) 2011, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + + +#ifndef __VMX_HW_INFO_H__ +#define __VMX_HW_INFO_H__ + +#ifdef __V3VEE__ + + + +#define VMX_BASIC_MSR 0x00000480 +#define VMX_PINBASED_CTLS_MSR 0x00000481 +#define VMX_PROCBASED_CTLS_MSR 0x00000482 +#define VMX_EXIT_CTLS_MSR 0x00000483 +#define VMX_ENTRY_CTLS_MSR 0x00000484 +#define VMX_MISC_MSR 0x00000485 +#define VMX_CR0_FIXED0_MSR 0x00000486 +#define VMX_CR0_FIXED1_MSR 0x00000487 +#define VMX_CR4_FIXED0_MSR 0x00000488 +#define VMX_CR4_FIXED1_MSR 0x00000489 +#define VMX_VMCS_ENUM_MSR 0x0000048A +#define VMX_PROCBASED_CTLS2_MSR 0x0000048B +#define VMX_EPT_VPID_CAP_MSR 0x0000048C +#define VMX_TRUE_PINBASED_CTLS_MSR 0x0000048D +#define VMX_TRUE_PROCBASED_CTLS_MSR 0x0000048E +#define VMX_TRUE_EXIT_CTLS_MSR 0x0000048F +#define VMX_TRUE_ENTRY_CTLS_MSR 0x00000490 + + + +struct vmx_basic_msr { + union { + struct { + uint32_t lo; + uint32_t hi; + } __attribute__((packed)); + + struct { uint32_t revision; + uint32_t regionSize : 13; + uint8_t rsvd1 : 3; /* Always 0 */ + uint8_t physWidth : 1; /* VMCS address field widths + (1=32bits, 0=natural width) */ + uint8_t smm : 1; + uint8_t memType : 4; /* 0 = UC, 6 = WriteBack */ + uint8_t io_str_info : 1; + uint8_t def1_maybe_0 : 1; /* 1="Any VMX ctrls that default to 1 may be cleared to 0" */ + uint32_t rsvd2 : 8; /* Always 0 */ + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + + +struct vmx_misc_msr { + union { + struct { + uint32_t lo; + uint32_t hi; + } __attribute__((packed)); + + struct { + uint8_t tsc_multiple : 5; /* Bit position in TSC field that drives vmx timer step */ + uint8_t exits_store_LMA : 1; + uint8_t can_halt : 1; + uint8_t can_shtdown : 1; + uint8_t can_wait_for_sipi : 1; + uint8_t rsvd1 : 7; + uint16_t num_cr3_targets : 9; + uint8_t max_msr_cache_size : 3; /* (512 * (max_msr_cache_size + 1)) == max msr load/store list size */ + uint8_t SMM_ctrl_avail : 1; + uint8_t rsvd2 : 3; + uint32_t MSEG_rev_id; + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); + + +struct vmx_ept_msr { + union { + struct { + uint32_t lo; + uint32_t hi; + } __attribute__((packed)); + + struct { + uint8_t exec_only_ok : 1; + uint8_t rsvd1 : 5; + uint8_t pg_walk_len4 : 1; /* support for a page walk of length 4 */ + uint8_t rsvd2 : 1; + uint8_t ept_uc_ok : 1; /* EPT page tables can be uncacheable */ + uint8_t rsvd3 : 5; + uint8_t ept_wb_ok : 1; /* EPT page tables can be writeback */ + uint8_t rsvd4 : 1; + uint8_t ept_2MB_ok : 1; /* 2MB EPT pages supported */ + uint8_t ept_1GB_ok : 1; /* 1GB EPT pages supported */ + uint8_t rsvd5 : 2; + uint8_t INVEPT_avail : 1; /* INVEPT instruction is available */ + uint8_t rsvd6 : 4; + uint8_t INVEPT_single_ctx_avail : 1; + uint8_t INVEPT_all_ctx_avail : 1; + uint8_t rsvd7 : 5; + uint8_t INVVPID_avail : 1; + uint8_t rsvd8 : 7; + uint8_t INVVPID_1addr_avail : 1; + uint8_t INVVPID_single_ctx_avail : 1; + uint8_t INVVPID_all_ctx_avail : 1; + uint8_t INVVPID_single_ctx_w_glbls_avail : 1; + uint32_t rsvd9 : 20; + } __attribute__((packed)); + } __attribute__((packed)); +}__attribute__((packed)); + + +struct vmx_ctrl_field { + uint32_t def_val; + uint32_t req_val; /* Required values: field_val & req_mask == req_val */ + uint32_t req_mask; /* If a mask bit is set it's value is restricted (i.e. the VMM cannot change it) */ +}; + + +struct vmx_cr_field { + uint64_t def_val; + uint64_t req_val; /* Required values: field_val & req_mask == req_val */ + uint64_t req_mask; /* If a mask bit is set it's value is restricted (i.e. the VMM cannot change it) */ +}; + + + + +struct vmx_hw_info { + struct vmx_basic_msr basic_info; + struct vmx_misc_msr misc_info; + struct vmx_ept_msr ept_info; + + struct vmx_ctrl_field pin_ctrls; + struct vmx_ctrl_field proc_ctrls; + struct vmx_ctrl_field exit_ctrls; + struct vmx_ctrl_field entry_ctrls; + struct vmx_ctrl_field proc_ctrls_2; + + struct vmx_cr_field cr0; + struct vmx_cr_field cr4; +}; + + +int v3_init_vmx_hw(struct vmx_hw_info * hw_info); + + + + +#endif + +#endif diff --git a/palacios/include/palacios/vmx_lowlevel.h b/palacios/include/palacios/vmx_lowlevel.h index 6db9f17..ce6a440 100644 --- a/palacios/include/palacios/vmx_lowlevel.h +++ b/palacios/include/palacios/vmx_lowlevel.h @@ -57,24 +57,7 @@ -static inline int v3_enable_vmx(addr_t vmxon_ptr) { - uint64_t vmxon_ptr_64 __attribute__((aligned(8))) = (uint64_t)vmxon_ptr; - uint8_t ret_invalid = 0; - __asm__ __volatile__ ( - VMXON_OPCODE - EAX_06_MODRM - "setnaeb %0;" // fail invalid (CF=1) - : "=q"(ret_invalid) - : "a"(&vmxon_ptr_64),"0"(ret_invalid) - : "memory"); - - if (ret_invalid) { - return VMX_FAIL_INVALID; - } else { - return VMX_SUCCESS; - } -} static inline int vmcs_clear(addr_t vmcs_ptr) { uint64_t vmcs_ptr_64 __attribute__ ((aligned(8))) = (uint64_t)vmcs_ptr; @@ -181,6 +164,26 @@ static inline int vmcs_write(vmcs_field_t vmcs_field, addr_t value) { return VMX_SUCCESS; } + +static inline int vmx_on(addr_t vmxon_ptr) { + uint64_t vmxon_ptr_64 __attribute__((aligned(8))) = (uint64_t)vmxon_ptr; + uint8_t ret_invalid = 0; + + __asm__ __volatile__ ( + VMXON_OPCODE + EAX_06_MODRM + "setnaeb %0;" // fail invalid (CF=1) + : "=q"(ret_invalid) + : "a"(&vmxon_ptr_64),"0"(ret_invalid) + : "memory"); + + if (ret_invalid) { + return VMX_FAIL_INVALID; + } else { + return VMX_SUCCESS; + } +} + static inline int vmx_off() { uint8_t ret_valid = 0; uint8_t ret_invalid = 0; @@ -198,6 +201,57 @@ static inline int vmx_off() { return VMX_SUCCESS; } + +static inline int enable_vmx() { +#ifdef __V3_64BIT__ + __asm__ __volatile__ ( + "movq %%cr4, %%rbx;" + "orq $0x00002000, %%rbx;" + "movq %%rbx, %%cr4;" + : + : + : "%rbx" + ); + + + __asm__ __volatile__ ( + "movq %%cr0, %%rbx; " + "orq $0x00000020,%%rbx; " + "movq %%rbx, %%cr0;" + : + : + : "%rbx" + ); +#elif __V3_32BIT__ + __asm__ __volatile__ ( + "movl %%cr4, %%ecx;" + "orl $0x00002000, %%ecx;" + "movl %%ecx, %%cr4;" + : + : + : "%ecx" + ); + + + + __asm__ __volatile__ ( + "movl %%cr0, %%ecx; " + "orl $0x00000020,%%ecx; " + "movl %%ecx, %%cr0;" + : + : + : "%ecx" + ); + +#endif + + return 0; +} + + + + + #endif #endif diff --git a/palacios/src/devices/Kconfig b/palacios/src/devices/Kconfig index eb15aca..35b8523 100644 --- a/palacios/src/devices/Kconfig +++ b/palacios/src/devices/Kconfig @@ -43,7 +43,16 @@ config GENERIC bool "Generic Device" default y help - Includes the Virtual Generic device + Includes the virtual generic device. This device allows you + to see guest I/O port and memory region interaction with a physical + device on the underlying hardware, as well as to ignore such + interaction. The generic device also serves as a front-end + device for non-PCI host-based virtual device implementations. If + you want to handle either host-based virtual or physical devices + that are not PCI devices, this is what you want. If you want + to handle a host-based virtual device that is a PCI device, you + want to use the PCI front-end device. If you want to handle + a physical PCI device, you want the passthrough PCI device. config DEBUG_GENERIC bool "Generic device Debugging" @@ -156,7 +165,7 @@ config LINUX_VIRTIO_VNET default n depends on PCI && EXPERIMENTAL && VNET help - Enable the Virtio VNET interface + Enable the Virtio VNET interface for Control VM config DEBUG_LINUX_VIRTIO_VNET bool "Virtio VNET Interface Debugging" @@ -167,11 +176,11 @@ config DEBUG_LINUX_VIRTIO_VNET config VNET_NIC - bool "Enable VNET VIrtio NIC Device" + bool "Enable VNET Backend Device" default n depends on PCI && EXPERIMENTAL && VNET help - Enable the VNET Virtio backend device + Enable the VNET backend device config DEBUG_VNET_NIC bool "VNET NIC Device Debugging" @@ -267,6 +276,7 @@ config PASSTHROUGH_PCI help Enables hardware devices to be passed through to the VM + config DEBUG_PCI bool "PCI debugging" depends on PCI && DEBUG_ON @@ -274,6 +284,26 @@ config DEBUG_PCI Enable debugging for the PCI +config PCI_FRONT + bool "PCI front-end device" + default y + depends on PCI && HOST_DEVICE + help + PCI front-end device for a host-based PCI device implementation + This device allows you to project a host-based *virtual* device + into the guest as a PCI device. If you want to project a + physical PCI device, use Passthrough PCI instead. If you want + to project a non-PCI virtual or physical device, + use the generic device. + + +config DEBUG_PCI_FRONT + bool "PCI front-end debugging" + depends on PCI_FRONT && DEBUG_ON + help + Enable debugging for the PCI front-end device + + config PIC bool "8259A PIC" diff --git a/palacios/src/devices/Makefile b/palacios/src/devices/Makefile index f5b40be..51b43e9 100644 --- a/palacios/src/devices/Makefile +++ b/palacios/src/devices/Makefile @@ -45,3 +45,5 @@ obj-$(CONFIG_MCHECK) += mcheck.o obj-$(CONFIG_VGA) += vga.o +obj-$(CONFIG_PCI_FRONT) += pci_front.o + diff --git a/palacios/src/devices/generic.c b/palacios/src/devices/generic.c index b7d1a18..73b778a 100644 --- a/palacios/src/devices/generic.c +++ b/palacios/src/devices/generic.c @@ -24,12 +24,19 @@ #include #include #include +#include + +#ifdef CONFIG_HOST_DEVICE +#include +#endif #ifndef CONFIG_DEBUG_GENERIC #undef PrintDebug #define PrintDebug(fmt, args...) #endif +#define MAX_NAME 32 +#define MAX_MEM_HOOKS 16 typedef enum {GENERIC_IGNORE, GENERIC_PASSTHROUGH, @@ -37,32 +44,64 @@ typedef enum {GENERIC_IGNORE, GENERIC_PRINT_AND_IGNORE} generic_mode_t; struct generic_internal { + enum {GENERIC_PHYSICAL, GENERIC_HOST} forward_type; +#ifdef CONFIG_HOST_DEVICE + v3_host_dev_t host_dev; +#endif + struct vm_device *dev; // me + + char name[MAX_NAME]; + + uint32_t num_mem_hooks; + addr_t mem_hook[MAX_MEM_HOOKS]; }; -static int generic_write_port_passthrough(struct guest_info * core, uint16_t port, void * src, - uint_t length, void * priv_data) { +static int generic_write_port_passthrough(struct guest_info * core, + uint16_t port, + void * src, + uint_t length, + void * priv_data) +{ + struct generic_internal *state = (struct generic_internal *) priv_data; uint_t i; - switch (length) { - case 1: - v3_outb(port, ((uint8_t *)src)[0]); - break; - case 2: - v3_outw(port, ((uint16_t *)src)[0]); + switch (state->forward_type) { + case GENERIC_PHYSICAL: + switch (length) { + case 1: + v3_outb(port, ((uint8_t *)src)[0]); + break; + case 2: + v3_outw(port, ((uint16_t *)src)[0]); + break; + case 4: + v3_outdw(port, ((uint32_t *)src)[0]); + break; + default: + for (i = 0; i < length; i++) { + v3_outb(port, ((uint8_t *)src)[i]); + } + break; + } + return length; break; - case 4: - v3_outdw(port, ((uint32_t *)src)[0]); +#ifdef CONFIG_HOST_DEVICE + case GENERIC_HOST: + if (state->host_dev) { + return v3_host_dev_write_io(state->host_dev,port,src,length); + } else { + return -1; + } break; +#endif default: - for (i = 0; i < length; i++) { - v3_outb(port, ((uint8_t *)src)[i]); - } + PrintError("generic (%s): unknown forwarding type\n", state->name); + return -1; + break; } - - return length; } static int generic_write_port_print_and_passthrough(struct guest_info * core, uint16_t port, void * src, @@ -70,7 +109,16 @@ static int generic_write_port_print_and_passthrough(struct guest_info * core, ui uint_t i; int rc; - PrintDebug("generic: writing 0x"); +#ifdef CONFIG_DEBUG_GENERIC + struct generic_internal *state = (struct generic_internal *) priv_data; +#endif + + PrintDebug("generic (%s): writing 0x%x bytes to port 0x%x using %s ...", state->name, + length, port, + state->forward_type == GENERIC_PHYSICAL ? "physical" : + state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN"); + + PrintDebug("generic (%s): writing 0x", state->name); for (i = 0; i < length; i++) { PrintDebug("%x", ((uint8_t *)src)[i]); @@ -85,35 +133,63 @@ static int generic_write_port_print_and_passthrough(struct guest_info * core, ui return rc; } -static int generic_read_port_passthrough(struct guest_info * core, uint16_t port, void * src, - uint_t length, void * priv_data) { +static int generic_read_port_passthrough(struct guest_info * core, + uint16_t port, + void * dst, + uint_t length, + void * priv_data) +{ + struct generic_internal *state = (struct generic_internal *) priv_data; + uint_t i; - switch (length) { - case 1: - ((uint8_t *)src)[0] = v3_inb(port); - break; - case 2: - ((uint16_t *)src)[0] = v3_inw(port); + switch (state->forward_type) { + case GENERIC_PHYSICAL: + switch (length) { + case 1: + ((uint8_t *)dst)[0] = v3_inb(port); + break; + case 2: + ((uint16_t *)dst)[0] = v3_inw(port); + break; + case 4: + ((uint32_t *)dst)[0] = v3_indw(port); + break; + default: + for (i = 0; i < length; i++) { + ((uint8_t *)dst)[i] = v3_inb(port); + } + } + return length; break; - case 4: - ((uint32_t *)src)[0] = v3_indw(port); +#ifdef CONFIG_HOST_DEVICE + case GENERIC_HOST: + if (state->host_dev) { + return v3_host_dev_read_io(state->host_dev,port,dst,length); + } break; +#endif default: - for (i = 0; i < length; i++) { - ((uint8_t *)src)[i] = v3_inb(port); - } + PrintError("generic (%s): unknown forwarding type\n", state->name); + return -1; + break; } - return length; + return -1; } static int generic_read_port_print_and_passthrough(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) { uint_t i; int rc; - - PrintDebug("generic: reading 0x%x bytes from port 0x%x ...", length, port); + +#ifdef CONFIG_DEBUG_GENERIC + struct generic_internal *state = (struct generic_internal *) priv_data; +#endif + + PrintDebug("generic (%s): reading 0x%x bytes from port 0x%x using %s ...", state->name, length, port, + state->forward_type == GENERIC_PHYSICAL ? "physical" : + state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN"); rc=generic_read_port_passthrough(core,port,src,length,priv_data); @@ -141,7 +217,14 @@ static int generic_read_port_ignore(struct guest_info * core, uint16_t port, voi static int generic_read_port_print_and_ignore(struct guest_info * core, uint16_t port, void * src, uint_t length, void * priv_data) { - PrintDebug("generic: reading 0x%x bytes from port 0x%x ...", length, port); +#ifdef CONFIG_DEBUG_GENERIC + struct generic_internal *state = (struct generic_internal *) priv_data; +#endif + + PrintDebug("generic (%s): reading 0x%x bytes from port 0x%x using %s ...", state->name, length, port, + state->forward_type == GENERIC_PHYSICAL ? "physical" : + state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN"); + memset((uint8_t *)src, 0, length); PrintDebug(" ignored (return zeroed buffer)\n"); @@ -159,8 +242,14 @@ static int generic_write_port_print_and_ignore(struct guest_info * core, uint16_ uint_t length, void * priv_data) { int i; - PrintDebug("generic: writing 0x%x bytes to port 0x%x ", length, port); +#ifdef CONFIG_DEBUG_GENERIC + struct generic_internal *state = (struct generic_internal *) priv_data; +#endif + PrintDebug("generic (%s): writing 0x%x bytes to port 0x%x using %s ", state->name, length, port, + state->forward_type == GENERIC_PHYSICAL ? "physical" : + state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN"); + memset((uint8_t *)src, 0, length); PrintDebug(" ignored - data was: 0x"); @@ -175,10 +264,197 @@ static int generic_write_port_print_and_ignore(struct guest_info * core, uint16_ +static int generic_write_mem_passthrough(struct guest_info * core, + addr_t gpa, + void * src, + uint_t len, + void * priv) +{ + struct vm_device *dev = (struct vm_device *) priv; + struct generic_internal *state = (struct generic_internal *) dev->private_data; + + switch (state->forward_type) { + case GENERIC_PHYSICAL: + memcpy(V3_VAddr((void*)gpa),src,len); + return len; + break; +#ifdef CONFIG_HOST_DEVICE + case GENERIC_HOST: + if (state->host_dev) { + return v3_host_dev_write_mem(state->host_dev,gpa,src,len); + } else { + return -1; + } + break; +#endif + default: + PrintError("generic (%s): unknown forwarding type\n", state->name); + return -1; + break; + } +} -static int generic_free(struct generic_internal * state) { - PrintDebug("generic: deinit_device\n"); +static int generic_write_mem_print_and_passthrough(struct guest_info * core, + addr_t gpa, + void * src, + uint_t len, + void * priv) +{ +#ifdef CONFIG_DEBUG_GENERIC + struct vm_device *dev = (struct vm_device *) priv; + struct generic_internal *state = (struct generic_internal *) dev->private_data; +#endif + + PrintDebug("generic (%s): writing %u bytes to GPA 0x%p via %s ... ", state->name, + len,(void*)gpa, + state->forward_type == GENERIC_PHYSICAL ? "physical" : + state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN"); + + int rc = generic_write_mem_passthrough(core,gpa,src,len,priv); + + PrintDebug("done\n"); + + return rc; +} +static int generic_write_mem_ignore(struct guest_info * core, + addr_t gpa, + void * src, + uint_t len, + void * priv) +{ + return len; +} + +static int generic_write_mem_print_and_ignore(struct guest_info * core, + addr_t gpa, + void * src, + uint_t len, + void * priv) +{ +#ifdef CONFIG_DEBUG_GENERIC + struct vm_device *dev = (struct vm_device *) priv; + struct generic_internal *state = (struct generic_internal *) dev->private_data; +#endif + + PrintDebug("generic (%s): ignoring write of %u bytes to GPA 0x%p via %s", state->name, + len,(void*)gpa, + state->forward_type == GENERIC_PHYSICAL ? "physical" : + state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN"); + + return len; +} + +static int generic_read_mem_passthrough(struct guest_info * core, + addr_t gpa, + void * dst, + uint_t len, + void * priv) +{ + struct vm_device *dev = (struct vm_device *) priv; + struct generic_internal *state = (struct generic_internal *) dev->private_data; + + switch (state->forward_type) { + case GENERIC_PHYSICAL: + memcpy(dst,V3_VAddr((void*)gpa),len); + return len; + break; +#ifdef CONFIG_HOST_DEVICE + case GENERIC_HOST: + if (state->host_dev) { + return v3_host_dev_read_mem(state->host_dev,gpa,dst,len); + } else { + return -1; + } + break; +#endif + default: + PrintError("generic (%s): unknown forwarding type\n", state->name); + break; + } + + return -1; +} + +static int generic_read_mem_print_and_passthrough(struct guest_info * core, + addr_t gpa, + void * dst, + uint_t len, + void * priv) +{ +#ifdef CONFIG_DEBUG_GENERIC + struct vm_device *dev = (struct vm_device *) priv; + struct generic_internal *state = (struct generic_internal *) dev->private_data; +#endif + + PrintDebug("generic (%s): attempting to read %u bytes from GPA 0x%p via %s ... ", state->name, + len,(void*)gpa, + state->forward_type == GENERIC_PHYSICAL ? "physical" : + state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN"); + + int rc = generic_read_mem_passthrough(core,gpa,dst,len,priv); + + PrintDebug("done - read %d bytes\n", rc); + + return rc; +} + +static int generic_read_mem_ignore(struct guest_info * core, + addr_t gpa, + void * dst, + uint_t len, + void * priv) +{ +#ifdef CONFIG_DEBUG_GENERIC + struct vm_device *dev = (struct vm_device *) priv; + struct generic_internal *state = (struct generic_internal *) dev->private_data; +#endif + + PrintDebug("generic (%s): ignoring attempt to read %u bytes from GPA 0x%p via %s ... ", state->name, + len,(void*)gpa, + state->forward_type == GENERIC_PHYSICAL ? "physical" : + state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN"); + + memset((uint8_t *)dst, 0, len); + + PrintDebug("returning zeros\n"); + + return len; +} + + +static int generic_read_mem_print_and_ignore(struct guest_info * core, + addr_t gpa, + void * dst, + uint_t len, + void * priv) +{ + memset((uint8_t *)dst, 0, len); + return len; +} + + +static int generic_free(struct generic_internal * state) { + int i; + + PrintDebug("generic (%s): deinit_device\n", state->name); + +#ifdef CONFIG_HOST_DEVICE + if (state->host_dev) { + v3_host_dev_close(state->host_dev); + state->host_dev=0; + } +#endif + + // Note that the device manager handles unhooking the I/O ports + // We need to handle unhooking memory regions + for (i=0;inum_mem_hooks;i++) { + if (v3_unhook_mem(state->dev->vm,V3_MEM_CORE_ANY,state->mem_hook[i])<0) { + PrintError("generic (%s): unable to unhook memory starting at 0x%p\n", state->name,(void*)(state->mem_hook[i])); + return -1; + } + } + V3_Free(state); return 0; } @@ -197,73 +473,223 @@ static struct v3_device_ops dev_ops = { static int add_port_range(struct vm_device * dev, uint_t start, uint_t end, generic_mode_t mode) { uint_t i = 0; - PrintDebug("generic: Adding Port Range: 0x%x to 0x%x as %s\n", + struct generic_internal *state = (struct generic_internal *) dev->private_data; + + PrintDebug("generic (%s): adding port range 0x%x to 0x%x as %s\n", state->name, start, end, - (mode == GENERIC_PRINT_AND_PASSTHROUGH) ? "print-and-passthrough" : "print-and-ignore"); - + (mode == GENERIC_PRINT_AND_PASSTHROUGH) ? "print-and-passthrough" : + (mode == GENERIC_PRINT_AND_IGNORE) ? "print-and-ignore" : + (mode == GENERIC_PASSTHROUGH) ? "passthrough" : + (mode == GENERIC_IGNORE) ? "ignore" : "UNKNOWN"); + for (i = start; i <= end; i++) { - if (mode == GENERIC_PRINT_AND_PASSTHROUGH) { - if (v3_dev_hook_io(dev, i, - &generic_read_port_print_and_passthrough, - &generic_write_port_print_and_passthrough) == -1) { - PrintError("generic: can't hook port 0x%x (already hooked?)\n", i); + switch (mode) { + case GENERIC_PRINT_AND_PASSTHROUGH: + if (v3_dev_hook_io(dev, i, + &generic_read_port_print_and_passthrough, + &generic_write_port_print_and_passthrough) == -1) { + PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i); + return -1; + } + break; + + case GENERIC_PRINT_AND_IGNORE: + if (v3_dev_hook_io(dev, i, + &generic_read_port_print_and_ignore, + &generic_write_port_print_and_ignore) == -1) { + PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i); + return -1; + } + break; + case GENERIC_PASSTHROUGH: + if (v3_dev_hook_io(dev, i, + &generic_read_port_passthrough, + &generic_write_port_passthrough) == -1) { + PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i); + return -1; + } + break; + case GENERIC_IGNORE: + if (v3_dev_hook_io(dev, i, + &generic_read_port_ignore, + &generic_write_port_ignore) == -1) { + PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i); + return -1; + } + break; + default: + PrintError("generic (%s): huh?\n", state->name); + break; + } + } + + return 0; +} + + +static int add_mem_range(struct vm_device * dev, addr_t start, addr_t end, generic_mode_t mode) { + + struct generic_internal *state = (struct generic_internal *) dev->private_data; + + PrintDebug("generic (%s): adding memory range 0x%p to 0x%p as %s\n", state->name, + (void*)start, (void*)end, + (mode == GENERIC_PRINT_AND_PASSTHROUGH) ? "print-and-passthrough" : + (mode == GENERIC_PRINT_AND_IGNORE) ? "print-and-ignore" : + (mode == GENERIC_PASSTHROUGH) ? "passthrough" : + (mode == GENERIC_IGNORE) ? "ignore" : "UNKNOWN"); + + switch (mode) { + case GENERIC_PRINT_AND_PASSTHROUGH: + if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1, + &generic_read_mem_print_and_passthrough, + &generic_write_mem_print_and_passthrough, dev) == -1) { + PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end); return -1; } - } else if (mode == GENERIC_PRINT_AND_IGNORE) { - if (v3_dev_hook_io(dev, i, - &generic_read_port_print_and_ignore, - &generic_write_port_print_and_ignore) == -1) { - PrintError("generic: can't hook port 0x%x (already hooked?)\n", i); + break; + + case GENERIC_PRINT_AND_IGNORE: + if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1, + &generic_read_mem_print_and_ignore, + &generic_write_mem_print_and_ignore, dev) == -1) { + PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end); return -1; } - } else if (mode == GENERIC_PASSTHROUGH) { - if (v3_dev_hook_io(dev, i, - &generic_read_port_passthrough, - &generic_write_port_passthrough) == -1) { - PrintError("generic: can't hook port 0x%x (already hooked?)\n", i); + break; + + case GENERIC_PASSTHROUGH: + if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1, + &generic_read_mem_passthrough, + &generic_write_mem_passthrough, dev) == -1) { + PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end); return -1; } - } else if (mode == GENERIC_IGNORE) { - if (v3_dev_hook_io(dev, i, - &generic_read_port_ignore, - &generic_write_port_ignore) == -1) { - PrintError("generic: can't hook port 0x%x (already hooked?)\n", i); + break; + + case GENERIC_IGNORE: + if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1, + &generic_read_mem_ignore, + &generic_write_mem_ignore, dev) == -1) { + PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end); return -1; } - } + break; + default: + PrintError("generic (%s): huh?\n",state->name); + break; } - + return 0; } +/* + The device can be used to forward to the underlying physical device + or to a host device that has a given url. Both memory and ports can be forwarded as + + GENERIC_PASSTHROUGH => send writes and reads to physical device or host + GENERIC_PRINT_AND_PASSTHROUGH => also print what it's doing + + GENERIC_IGNORE => ignore writes and reads + GENERIC_PRINT_AND_PASSTHROUGH => also print what it's doing + + + The purpose of the "PRINT" variants is to make it easy to spy on + device interactions (although you will not see DMA or interrupts) + + + + (empty implies physical_dev) + + + portno1 + portno2 => portno1 through portno2 (inclusive) + PRINT_AND_PASSTHROUGH (as above) + + + + gpa1 + gpa2 => memory addreses gpa1 through gpa2 (inclusive); page granularity + ... as above + + +*/ static int generic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { struct generic_internal * state = NULL; char * dev_id = v3_cfg_val(cfg, "ID"); + char * forward = v3_cfg_val(cfg, "forward"); +#ifdef CONFIG_HOST_DEVICE + char * host_dev = v3_cfg_val(cfg, "hostdev"); +#endif v3_cfg_tree_t * port_cfg = v3_cfg_subtree(cfg, "ports"); + v3_cfg_tree_t * mem_cfg = v3_cfg_subtree(cfg, "memory"); state = (struct generic_internal *)V3_Malloc(sizeof(struct generic_internal)); if (state == NULL) { - PrintError("Could not allocate generic state\n"); + PrintError("generic (%s): could not allocate generic state\n",dev_id); return -1; } memset(state, 0, sizeof(struct generic_internal)); + strncpy(state->name,dev_id,MAX_NAME); + + if (!forward) { + state->forward_type=GENERIC_PHYSICAL; + } else { + if (!strcasecmp(forward,"physical_device")) { + state->forward_type=GENERIC_PHYSICAL; + } else if (!strcasecmp(forward,"host_device")) { +#ifdef CONFIG_HOST_DEVICE + state->forward_type=GENERIC_HOST; +#else + PrintError("generic (%s): cannot configure host device since host device support is not built in\n", state->name); + V3_Free(state); + return -1; +#endif + } else { + PrintError("generic (%s): unknown forwarding type \"%s\"\n", state->name, forward); + V3_Free(state); + return -1; + } + } struct vm_device * dev = v3_add_device(vm, dev_id, &dev_ops, state); if (dev == NULL) { - PrintError("Could not attach device %s\n", dev_id); + PrintError("generic: could not attach device %s\n", state->name); V3_Free(state); return -1; } - PrintDebug("generic: init_device\n"); + state->dev=dev; + + +#ifdef CONFIG_HOST_DEVICE + if (state->forward_type==GENERIC_HOST) { + if (!host_dev) { + PrintError("generic (%s): host forwarding requested, but no host device given\n", state->name); + v3_remove_device(dev); + return -1; + } else { + state->host_dev = v3_host_dev_open(host_dev,V3_BUS_CLASS_DIRECT,dev,vm); + if (!(state->host_dev)) { + PrintError("generic (%s): unable to open host device \"%s\"\n", state->name,host_dev); + v3_remove_device(dev); + return -1; + } else { + PrintDebug("generic (%s): successfully attached host device \"%s\"\n", state->name,host_dev); + } + } + } +#endif + + PrintDebug("generic (%s): init_device\n", state->name); // scan port list.... while (port_cfg) { @@ -271,7 +697,6 @@ static int generic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { uint16_t end = atox(v3_cfg_val(port_cfg, "end")); char * mode_str = v3_cfg_val(port_cfg, "mode"); generic_mode_t mode = GENERIC_IGNORE; - if (strcasecmp(mode_str, "print_and_ignore") == 0) { mode = GENERIC_PRINT_AND_IGNORE; } else if (strcasecmp(mode_str, "print_and_passthrough") == 0) { @@ -281,13 +706,14 @@ static int generic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { } else if (strcasecmp(mode_str, "ignore") == 0) { mode = GENERIC_IGNORE; } else { - PrintError("Invalid Mode %s\n", mode_str); + PrintError("generic (%s): invalid mode %s in adding ports\n", state->name, mode_str); v3_remove_device(dev); return -1; } + if (add_port_range(dev, start, end, mode) == -1) { - PrintError("Could not add port range %d-%d\n", start, end); + PrintError("generic (%s): could not add port range 0x%x to 0x%x\n", state->name, start, end); v3_remove_device(dev); return -1; } @@ -295,6 +721,46 @@ static int generic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { port_cfg = v3_cfg_next_branch(port_cfg); } + // scan memory list.... + while (mem_cfg) { + addr_t start = atox(v3_cfg_val(mem_cfg, "start")); + addr_t end = atox(v3_cfg_val(mem_cfg, "end")); + char * mode_str = v3_cfg_val(mem_cfg, "mode"); + generic_mode_t mode = GENERIC_IGNORE; + + if (strcasecmp(mode_str, "print_and_ignore") == 0) { + mode = GENERIC_PRINT_AND_IGNORE; + } else if (strcasecmp(mode_str, "print_and_passthrough") == 0) { + mode = GENERIC_PRINT_AND_PASSTHROUGH; + } else if (strcasecmp(mode_str, "passthrough") == 0) { + mode = GENERIC_PASSTHROUGH; + } else if (strcasecmp(mode_str, "ignore") == 0) { + mode = GENERIC_IGNORE; + } else { + PrintError("generic (%s): invalid mode %s for adding memory\n", state->name, mode_str); + v3_remove_device(dev); + return -1; + } + + if (state->num_mem_hooks>=MAX_MEM_HOOKS) { + PrintError("generic (%s): cannot add another memory hook (increase MAX_MEM_HOOKS)\n", state->name); + v3_remove_device(dev); + return -1; + } + + if (add_mem_range(dev, start, end, mode) == -1) { + PrintError("generic (%s): could not add memory range 0x%p to 0x%p\n", state->name, (void*)start, (void*)end); + v3_remove_device(dev); + return -1; + } + + state->mem_hook[state->num_mem_hooks] = start; + state->num_mem_hooks++; + + mem_cfg = v3_cfg_next_branch(port_cfg); + } + + PrintDebug("generic (%s): initialization complete\n", state->name); return 0; } diff --git a/palacios/src/devices/lnx_virtio_nic.c b/palacios/src/devices/lnx_virtio_nic.c index bb13a69..07c7d7b 100644 --- a/palacios/src/devices/lnx_virtio_nic.c +++ b/palacios/src/devices/lnx_virtio_nic.c @@ -38,38 +38,62 @@ #define PrintDebug(fmt, args...) #endif +#define TX_QUEUE_SIZE 4096 +#define RX_QUEUE_SIZE 4096 +#define CTRL_QUEUE_SIZE 64 + +/* The feature bitmap for virtio nic + * from Linux */ +#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ +#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ +#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ +#define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ +#define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */ +#define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */ +#define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */ +#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */ +#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ +#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ +#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */ +#define VIRTIO_NET_F_STATUS 16 /* virtio_net_config.status available */ + +/* Port to get virtio config */ +#define VIRTIO_NET_CONFIG 20 + #define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10)) +/* for gso_type in virtio_net_hdr */ +#define VIRTIO_NET_HDR_GSO_NONE 0 +#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ +#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ +#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ +#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ -struct virtio_net_hdr { - uint8_t flags; - - uint8_t gso_type; - uint16_t hdr_len; /* Ethernet + IP + tcp/udp hdrs */ - uint16_t gso_size; /* Bytes to append to hdr_len per frame */ - uint16_t csum_start; /* Position to start checksumming from */ - uint16_t csum_offset; /* Offset after that to place checksum */ -}__attribute__((packed)); +/* for flags in virtio_net_hdr */ +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ -struct virtio_net_hdr_mrg_rxbuf { - struct virtio_net_hdr hdr; - uint16_t num_buffers; /* Number of merged rx buffers */ -}; - -#define TX_QUEUE_SIZE 256 -#define RX_QUEUE_SIZE 4096 -#define CTRL_QUEUE_SIZE 64 +/* First element of the scatter-gather list, used with GSO or CSUM features */ +struct virtio_net_hdr +{ + uint8_t flags; + uint8_t gso_type; + uint16_t hdr_len; /* Ethernet + IP + tcp/udp hdrs */ + uint16_t gso_size; /* Bytes to append to hdr_len per frame */ + uint16_t csum_start; /* Position to start checksumming from */ + uint16_t csum_offset; /* Offset after that to place checksum */ +}__attribute__((packed)); -#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */ -#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ -#define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ -#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */ -#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ -/* Port to get virtio config */ -#define VIRTIO_NET_CONFIG 20 +/* The header to use when the MRG_RXBUF + * feature has been negotiated. */ +struct virtio_net_hdr_mrg_rxbuf { + struct virtio_net_hdr hdr; + uint16_t num_buffers; /* Number of merged rx buffers */ +}; struct virtio_net_config { @@ -89,6 +113,7 @@ struct virtio_net_state { struct virtio_net_config net_cfg; struct virtio_config virtio_cfg; + struct v3_vm_info * vm; struct vm_device * dev; struct pci_device * pci_dev; int io_range_size; @@ -98,21 +123,23 @@ struct virtio_net_state { struct virtio_queue ctrl_vq; /* idx 2*/ struct v3_timer * timer; + void * poll_thread; - struct nic_statistics statistics; + struct nic_statistics stats; struct v3_dev_net_ops * net_ops; v3_lock_t rx_lock, tx_lock; uint8_t tx_notify, rx_notify; uint32_t tx_pkts, rx_pkts; - uint64_t past_ms; + uint64_t past_us; void * backend_data; struct virtio_dev_state * virtio_dev; struct list_head dev_link; }; + static int virtio_init_state(struct virtio_net_state * virtio) { virtio->rx_vq.queue_size = RX_QUEUE_SIZE; @@ -139,9 +166,10 @@ static int virtio_init_state(struct virtio_net_state * virtio) virtio->virtio_cfg.pci_isr = 0; - virtio->virtio_cfg.host_features = 0 | (1 << VIRTIO_NET_F_MAC) | - (1 << VIRTIO_NET_F_HOST_UFO) | - (1 << VIRTIO_NET_F_HOST_TSO4); + virtio->virtio_cfg.host_features = 0 | (1 << VIRTIO_NET_F_MAC); + // (1 << VIRTIO_NET_F_GSO) | + // (1 << VIRTIO_NET_F_HOST_UFO) | + // (1 << VIRTIO_NET_F_HOST_TSO4); if ((v3_lock_init(&(virtio->rx_lock)) == -1) || (v3_lock_init(&(virtio->tx_lock)) == -1)){ @@ -152,32 +180,36 @@ static int virtio_init_state(struct virtio_net_state * virtio) } static int tx_one_pkt(struct guest_info * core, - struct virtio_net_state * virtio, - struct vring_desc * buf_desc) + struct virtio_net_state * virtio, + struct vring_desc * buf_desc) { uint8_t * buf = NULL; uint32_t len = buf_desc->length; + int synchronize = 1; // (virtio->tx_notify == 1)?1:0; if (v3_gpa_to_hva(core, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) { PrintDebug("Could not translate buffer address\n"); return -1; } - if(virtio->net_ops->send(buf, len, virtio->backend_data) >= 0){ - virtio->statistics.tx_pkts ++; - virtio->statistics.tx_bytes += len; + V3_Net_Print(2, "Virtio-NIC: virtio_tx: size: %d\n", len); + if(v3_net_debug >= 4){ + v3_hexdump(buf, len, NULL, 0); + } - return 0; + if(virtio->net_ops->send(buf, len, synchronize, virtio->backend_data) < 0){ + virtio->stats.tx_dropped ++; + return -1; } - virtio->statistics.tx_dropped ++; + virtio->stats.tx_pkts ++; + virtio->stats.tx_bytes += len; - return -1; + return 0; } -static int -copy_data_to_desc(struct guest_info * core, +static inline int copy_data_to_desc(struct guest_info * core, struct virtio_net_state * virtio_state, struct vring_desc * desc, uchar_t * buf, @@ -188,7 +220,7 @@ copy_data_to_desc(struct guest_info * core, uint8_t * desc_buf = NULL; if (v3_gpa_to_hva(core, desc->addr_gpa, (addr_t *)&(desc_buf)) == -1) { - PrintError("Could not translate buffer address\n"); + PrintDebug("Could not translate buffer address\n"); return -1; } len = (desc->length < buf_len)?(desc->length - offset):buf_len; @@ -198,7 +230,7 @@ copy_data_to_desc(struct guest_info * core, } -static int get_desc_count(struct virtio_queue * q, int index) { +static inline int get_desc_count(struct virtio_queue * q, int index) { struct vring_desc * tmp_desc = &(q->desc[index]); int cnt = 1; @@ -218,32 +250,10 @@ static inline void disable_cb(struct virtio_queue *queue) { queue->used->flags |= VRING_NO_NOTIFY_FLAG; } - -/* interrupt the guest, so the guest core get EXIT to Palacios */ -static inline void notify_guest(struct virtio_net_state * virtio){ - v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0); -} - - -/* guest free some pkts for rx queue */ -static int handle_rx_queue_kick(struct guest_info * core, - struct virtio_net_state * virtio) -{ - return 0; -} - - -static int handle_ctrl(struct guest_info * core, - struct virtio_net_state * virtio) { - - return 0; -} - static int handle_pkt_tx(struct guest_info * core, struct virtio_net_state * virtio_state) { struct virtio_queue *q = &(virtio_state->tx_vq); - struct virtio_net_hdr *hdr = NULL; int txed = 0; unsigned long flags; @@ -253,12 +263,16 @@ static int handle_pkt_tx(struct guest_info * core, flags = v3_lock_irqsave(virtio_state->tx_lock); while (q->cur_avail_idx != q->avail->index) { + struct virtio_net_hdr *hdr = NULL; struct vring_desc * hdr_desc = NULL; addr_t hdr_addr = 0; uint16_t desc_idx = q->avail->ring[q->cur_avail_idx % q->queue_size]; int desc_cnt = get_desc_count(q, desc_idx); - uint32_t req_len = 0; - int i = 0; + + if(desc_cnt > 2){ + PrintError("VNIC: merged rx buffer not supported, desc_cnt %d\n", desc_cnt); + goto exit_error; + } hdr_desc = &(q->desc[desc_idx]); if (v3_gpa_to_hva(core, hdr_desc->addr_gpa, &(hdr_addr)) == -1) { @@ -269,25 +283,15 @@ static int handle_pkt_tx(struct guest_info * core, hdr = (struct virtio_net_hdr *)hdr_addr; desc_idx = hdr_desc->next; - if(desc_cnt > 2){ - PrintError("VNIC: merged rx buffer not supported, desc_cnt %d\n", desc_cnt); + /* here we assumed that one ethernet pkt is not splitted into multiple buffer */ + struct vring_desc * buf_desc = &(q->desc[desc_idx]); + if (tx_one_pkt(core, virtio_state, buf_desc) == -1) { + PrintError("Virtio NIC: Error handling nic operation\n"); goto exit_error; } - - /* here we assumed that one ethernet pkt is not splitted into multiple virtio buffer */ - for (i = 0; i < desc_cnt - 1; i++) { - struct vring_desc * buf_desc = &(q->desc[desc_idx]); - if (tx_one_pkt(core, virtio_state, buf_desc) == -1) { - PrintError("Error handling nic operation\n"); - goto exit_error; - } - - req_len += buf_desc->length; - desc_idx = buf_desc->next; - } - + q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size]; - q->used->ring[q->used->index % q->queue_size].length = req_len; /* What do we set this to???? */ + q->used->ring[q->used->index % q->queue_size].length = buf_desc->length; /* What do we set this to???? */ q->used->index ++; q->cur_avail_idx ++; @@ -296,12 +300,17 @@ static int handle_pkt_tx(struct guest_info * core, } v3_unlock_irqrestore(virtio_state->tx_lock, flags); - + + //virtio_state->virtio_cfg.pci_isr == 0 && if (txed && !(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) { v3_pci_raise_irq(virtio_state->virtio_dev->pci_bus, 0, virtio_state->pci_dev); virtio_state->virtio_cfg.pci_isr = 0x1; - virtio_state->statistics.interrupts ++; + virtio_state->stats.rx_interrupts ++; + } + + if(txed > 0) { + V3_Net_Print(2, "Virtio Handle TX: txed pkts: %d\n", txed); } return 0; @@ -386,6 +395,10 @@ static int virtio_io_write(struct guest_info *core, break; case 1: virtio_setup_queue(core, virtio, &virtio->tx_vq, pfn, page_addr); + if(virtio->tx_notify == 0){ + disable_cb(&virtio->tx_vq); + V3_THREAD_WAKEUP(virtio->poll_thread); + } break; case 2: virtio_setup_queue(core, virtio, &virtio->ctrl_vq, pfn, page_addr); @@ -408,20 +421,16 @@ static int virtio_io_write(struct guest_info *core, { uint16_t queue_idx = *(uint16_t *)src; if (queue_idx == 0){ - if(handle_rx_queue_kick(core, virtio) == -1){ - PrintError("Could not handle Virtio NIC rx kick\n"); - return -1; - } + /* receive queue refill */ + virtio->stats.tx_interrupts ++; } else if (queue_idx == 1){ if (handle_pkt_tx(core, virtio) == -1) { PrintError("Could not handle Virtio NIC tx kick\n"); return -1; } + virtio->stats.tx_interrupts ++; } else if (queue_idx == 2){ - if (handle_ctrl(core, virtio) == -1) { - PrintError("Could not handle Virtio NIC ctrl kick\n"); - return -1; - } + /* ctrl */ } else { PrintError("Wrong queue index %d\n", queue_idx); } @@ -462,7 +471,7 @@ static int virtio_io_read(struct guest_info *core, case HOST_FEATURES_PORT: if (length != 4) { PrintError("Illegal read length for host features\n"); - return -1; + //return -1; } *(uint32_t *)dst = virtio->virtio_cfg.host_features; break; @@ -541,13 +550,13 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) { struct virtio_net_hdr_mrg_rxbuf hdr; uint32_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); uint32_t data_len; - uint32_t offset = 0; + //uint32_t offset = 0; unsigned long flags; -#ifdef CONFIG_DEBUG_VIRTIO_NET - PrintDebug("Virtio-NIC: virtio_rx: size: %d\n", size); - v3_hexdump(buf, size, NULL, 0); -#endif + V3_Net_Print(2, "Virtio-NIC: virtio_rx: size: %d\n", size); + if(v3_net_debug >= 4){ + v3_hexdump(buf, size, NULL, 0); + } flags = v3_lock_irqsave(virtio->rx_lock); @@ -555,29 +564,28 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) { memset(&hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); if (q->ring_avail_addr == 0) { - PrintDebug("Queue is not set\n"); + V3_Net_Print(2, "Virtio NIC: RX Queue not set\n"); + virtio->stats.rx_dropped ++; goto err_exit; } if (q->cur_avail_idx != q->avail->index){ addr_t hdr_addr = 0; - uint16_t hdr_idx = q->avail->ring[q->cur_avail_idx % q->queue_size]; uint16_t buf_idx = 0; + uint16_t hdr_idx = q->avail->ring[q->cur_avail_idx % q->queue_size]; struct vring_desc * hdr_desc = NULL; + struct vring_desc * buf_desc = NULL; + uint32_t len; hdr_desc = &(q->desc[hdr_idx]); if (v3_gpa_to_hva(&(virtio->virtio_dev->vm->cores[0]), hdr_desc->addr_gpa, &(hdr_addr)) == -1) { - PrintDebug("Could not translate receive buffer address\n"); + V3_Net_Print(2, "Virtio NIC: Could not translate receive buffer address\n"); + virtio->stats.rx_dropped ++; goto err_exit; } - hdr.num_buffers = 1; - memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf)); - if (offset >= data_len) { - hdr_desc->flags &= ~VIRTIO_NEXT_FLAG; - } - struct vring_desc * buf_desc = NULL; - for (buf_idx = hdr_desc->next; offset < data_len; buf_idx = q->desc[hdr_idx].next) { +#if 0 /* merged buffer */ + for(buf_idx = hdr_desc->next; offset < data_len; buf_idx = q->desc[hdr_idx].next) { uint32_t len = 0; buf_desc = &(q->desc[buf_idx]); @@ -587,33 +595,65 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) { buf_desc->flags = VIRTIO_NEXT_FLAG; } buf_desc->length = len; + hdr.num_buffers ++; } buf_desc->flags &= ~VIRTIO_NEXT_FLAG; - + memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf)); +#endif + + hdr.num_buffers = 1; + memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf)); + if (data_len == 0) { + hdr_desc->flags &= ~VIRTIO_NEXT_FLAG; + } + + buf_idx = hdr_desc->next; + buf_desc = &(q->desc[buf_idx]); + len = copy_data_to_desc(&(virtio->virtio_dev->vm->cores[0]), virtio, buf_desc, buf, data_len, 0); + if (len < data_len) { + V3_Net_Print(2, "Virtio NIC: ring buffer len less than pkt size, merged buffer not supported\n"); + virtio->stats.rx_dropped ++; + + goto err_exit; + } + buf_desc->flags &= ~VIRTIO_NEXT_FLAG; + q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size]; q->used->ring[q->used->index % q->queue_size].length = data_len + hdr_len; /* This should be the total length of data sent to guest (header+pkt_data) */ q->used->index++; q->cur_avail_idx++; - virtio->statistics.rx_pkts ++; - virtio->statistics.rx_bytes += size; + virtio->stats.rx_pkts ++; + virtio->stats.rx_bytes += size; } else { - virtio->statistics.rx_dropped ++; + V3_Net_Print(2, "Virtio NIC: Guest RX queue is full\n"); + virtio->stats.rx_dropped ++; + + /* kick guest to refill the queue */ + virtio->virtio_cfg.pci_isr = 0x1; + v3_pci_raise_irq(virtio->virtio_dev->pci_bus, 0, virtio->pci_dev); + v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0); + virtio->stats.rx_interrupts ++; goto err_exit; } + V3_Net_Print(2, "pci_isr %d, virtio flags %d\n", virtio->virtio_cfg.pci_isr, q->avail->flags); + //virtio->virtio_cfg.pci_isr == 0 && + if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) { - PrintDebug("Raising IRQ %d\n", virtio->pci_dev->config_header.intr_line); - + V3_Net_Print(2, "Raising IRQ %d\n", virtio->pci_dev->config_header.intr_line); + + virtio->virtio_cfg.pci_isr = 0x1; v3_pci_raise_irq(virtio->virtio_dev->pci_bus, 0, virtio->pci_dev); - virtio->virtio_cfg.pci_isr = 0x1; - virtio->statistics.interrupts ++; + + virtio->stats.rx_interrupts ++; } v3_unlock_irqrestore(virtio->rx_lock, flags); - /* notify guest if guest is running */ + /* notify guest if it is in guest mode */ + /* ISSUE: What is gonna happen if guest thread is running on the same core as this thread? */ if(virtio->rx_notify == 1){ v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0); } @@ -650,12 +690,21 @@ static struct v3_device_ops dev_ops = { }; -static void virtio_nic_poll(struct v3_vm_info * vm, int budget, void * data){ - struct virtio_net_state * virtio = (struct virtio_net_state *)data; +static int virtio_tx_flush(void * args){ + struct virtio_net_state *virtio = (struct virtio_net_state *)args; + + V3_Print("Virtio TX Poll Thread Starting for %s\n", virtio->vm->name); - if(virtio->tx_notify == 0){ - handle_pkt_tx(&(vm->cores[0]), virtio); + while(1){ + if(virtio->tx_notify == 0){ + handle_pkt_tx(&(virtio->vm->cores[0]), virtio); + v3_yield(NULL); + }else { + V3_THREAD_SLEEP(); + } } + + return 0; } static int register_dev(struct virtio_dev_state * virtio, @@ -732,31 +781,33 @@ static int register_dev(struct virtio_dev_state * virtio, #define RATE_UPPER_THRESHOLD 10 /* 10000 pkts per second, around 100Mbits */ #define RATE_LOWER_THRESHOLD 1 -#define PROFILE_PERIOD 50 /*50ms*/ +#define PROFILE_PERIOD 10000 /*us*/ -/* Timer Functions */ static void virtio_nic_timer(struct guest_info * core, uint64_t cpu_cycles, uint64_t cpu_freq, void * priv_data) { struct virtio_net_state * net_state = (struct virtio_net_state *)priv_data; - uint64_t period_ms; + uint64_t period_us; + static int profile_ms = 0; - period_ms = cpu_cycles/cpu_freq; - net_state->past_ms += period_ms; + period_us = (1000*cpu_cycles)/cpu_freq; + net_state->past_us += period_us; - if(net_state->past_ms > PROFILE_PERIOD){ +#if 0 + if(net_state->past_us > PROFILE_PERIOD){ uint32_t tx_rate, rx_rate; - tx_rate = (net_state->statistics.tx_pkts - net_state->tx_pkts)/net_state->past_ms; /* pkts/per ms */ - rx_rate = (net_state->statistics.rx_pkts - net_state->rx_pkts)/net_state->past_ms; + tx_rate = (net_state->stats.tx_pkts - net_state->tx_pkts)/(net_state->past_us/1000); /* pkts/per ms */ + rx_rate = (net_state->stats.rx_pkts - net_state->rx_pkts)/(net_state->past_us/1000); - net_state->tx_pkts = net_state->statistics.tx_pkts; - net_state->rx_pkts = net_state->statistics.rx_pkts; + net_state->tx_pkts = net_state->stats.tx_pkts; + net_state->rx_pkts = net_state->stats.rx_pkts; if(tx_rate > RATE_UPPER_THRESHOLD && net_state->tx_notify == 1){ V3_Print("Virtio NIC: Switch TX to VMM driven mode\n"); disable_cb(&(net_state->tx_vq)); net_state->tx_notify = 0; + V3_THREAD_WAKEUP(net_state->poll_thread); } if(tx_rate < RATE_LOWER_THRESHOLD && net_state->tx_notify == 0){ @@ -766,19 +817,30 @@ static void virtio_nic_timer(struct guest_info * core, } if(rx_rate > RATE_UPPER_THRESHOLD && net_state->rx_notify == 1){ - PrintDebug("Virtio NIC: Switch RX to VMM None notify mode\n"); + V3_Print("Virtio NIC: Switch RX to VMM None notify mode\n"); net_state->rx_notify = 0; } if(rx_rate < RATE_LOWER_THRESHOLD && net_state->rx_notify == 0){ - PrintDebug("Virtio NIC: Switch RX to VMM notify mode\n"); + V3_Print("Virtio NIC: Switch RX to VMM notify mode\n"); net_state->rx_notify = 1; } - net_state->past_ms = 0; + net_state->past_us = 0; } -} +#endif + profile_ms += period_us/1000; + if(profile_ms > 20000){ + V3_Net_Print(1, "Virtio NIC: TX: Pkt: %lld, Bytes: %lld\n\t\tRX Pkt: %lld. Bytes: %lld\n\t\tDropped: tx %lld, rx %lld\nInterrupts: tx %d, rx %d\nTotal Exit: %lld\n", + net_state->stats.tx_pkts, net_state->stats.tx_bytes, + net_state->stats.rx_pkts, net_state->stats.rx_bytes, + net_state->stats.tx_dropped, net_state->stats.rx_dropped, + net_state->stats.tx_interrupts, net_state->stats.rx_interrupts, + net_state->vm->cores[0].num_exits); + profile_ms = 0; + } +} static struct v3_timer_ops timer_ops = { .update_timer = virtio_nic_timer, @@ -796,19 +858,21 @@ static int connect_fn(struct v3_vm_info * info, memset(net_state, 0, sizeof(struct virtio_net_state)); register_dev(virtio, net_state); + net_state->vm = info; net_state->net_ops = ops; net_state->backend_data = private_data; net_state->virtio_dev = virtio; - net_state->tx_notify = 1; - net_state->rx_notify = 1; - + net_state->tx_notify = 0; + net_state->rx_notify = 0; + net_state->timer = v3_add_timer(&(info->cores[0]),&timer_ops,net_state); ops->recv = virtio_rx; - ops->poll = virtio_nic_poll; ops->frontend_data = net_state; memcpy(ops->fnt_mac, virtio->mac, ETH_ALEN); + net_state->poll_thread = V3_CREATE_THREAD(virtio_tx_flush, (void *)net_state, "Virtio_Poll"); + return 0; } @@ -834,14 +898,7 @@ static int virtio_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { if (macstr != NULL && !str2mac(macstr, virtio_state->mac)) { PrintDebug("Virtio NIC: Mac specified %s\n", macstr); - PrintDebug("MAC: %x:%x:%x:%x:%x:%x\n", virtio_state->mac[0], - virtio_state->mac[1], - virtio_state->mac[2], - virtio_state->mac[3], - virtio_state->mac[4], - virtio_state->mac[5]); }else { - PrintDebug("Virtio NIC: MAC not specified\n"); random_ethaddr(virtio_state->mac); } diff --git a/palacios/src/devices/lnx_virtio_vnet.c b/palacios/src/devices/lnx_virtio_vnet.c index b4b7342..87f158d 100644 --- a/palacios/src/devices/lnx_virtio_vnet.c +++ b/palacios/src/devices/lnx_virtio_vnet.c @@ -303,7 +303,7 @@ static int do_tx_pkts(struct guest_info * core, memcpy(pkt.header, virtio_pkt->pkt, ETHERNET_HEADER_LEN); pkt.data = virtio_pkt->pkt; - v3_vnet_send_pkt(&pkt, NULL); + v3_vnet_send_pkt(&pkt, NULL, 1); q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size]; q->used->ring[q->used->index % q->queue_size].length = pkt_desc->length; // What do we set this to???? diff --git a/palacios/src/devices/nic_bridge.c b/palacios/src/devices/nic_bridge.c index 9bc28d2..5eec5e6 100644 --- a/palacios/src/devices/nic_bridge.c +++ b/palacios/src/devices/nic_bridge.c @@ -36,6 +36,7 @@ struct nic_bridge_state { }; static int bridge_send(uint8_t * buf, uint32_t len, + int synchronize, void * private_data) { #ifdef CONFIG_DEBUG_NIC_BRIDGE diff --git a/palacios/src/devices/nvram.c b/palacios/src/devices/nvram.c index f2c45a3..785d38b 100644 --- a/palacios/src/devices/nvram.c +++ b/palacios/src/devices/nvram.c @@ -472,7 +472,8 @@ static void nvram_update_timer(struct guest_info *vm, uint64_t period_us; - period_us = (1000000*cpu_cycles/cpu_freq); + // cpu freq in khz + period_us = (1000*cpu_cycles/cpu_freq); update_time(nvram_state,period_us); diff --git a/palacios/src/devices/pci_front.c b/palacios/src/devices/pci_front.c new file mode 100644 index 0000000..487ab5c --- /dev/null +++ b/palacios/src/devices/pci_front.c @@ -0,0 +1,824 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2011, Peter Dinda + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Authors: + * Peter Dinda (PCI front device forwarding to host dev interface) + * Jack Lange (original PCI passthrough to physical hardware) + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + + +/* + This is front-end PCI device intended to be used together with the + host device interface and a *virtual* PCI device implementation in + the host OS. It makes it possible to project such a virtual device + into the guest as a PCI device. It's based on the PCI passthrough + device, which projects *physical* PCI devices into the guest. + + If you need to project a non-PCI host-based virtual or physical + device into the guest, you should use the generic device. + +*/ + +/* + * The basic idea is that we do not change the hardware PCI configuration + * Instead we modify the guest environment to map onto the physical configuration + * + * The pci subsystem handles most of the configuration space, except for the bar registers. + * We handle them here, by either letting them go directly to hardware or remapping through virtual hooks + * + * Memory Bars are always remapped via the shadow map, + * IO Bars are selectively remapped through hooks if the guest changes them + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + + +#ifndef CONFIG_DEBUG_PCI_FRONT +#undef PrintDebug +#define PrintDebug(fmt, args...) +#endif + + +// Our own address in PCI-land +union pci_addr_reg { + uint32_t value; + struct { + uint_t rsvd1 : 2; + uint_t reg : 6; + uint_t func : 3; + uint_t dev : 5; + uint_t bus : 8; + uint_t rsvd2 : 7; + uint_t enable : 1; + } __attribute__((packed)); +} __attribute__((packed)); + + +// identical to PCI passthrough device +typedef enum { PT_BAR_NONE, + PT_BAR_IO, + PT_BAR_MEM32, + PT_BAR_MEM24, + PT_BAR_MEM64_LO, + PT_BAR_MEM64_HI, + PT_EXP_ROM } pt_bar_type_t; + +// identical to PCI passthrough device +struct pt_bar { + uint32_t size; + pt_bar_type_t type; + + /* We store 64 bit memory bar addresses in the high BAR + * because they are the last to be updated + * This means that the addr field must be 64 bits + */ + uint64_t addr; + + uint32_t val; +}; + + + + +struct pci_front_internal { + // this is our local cache of what the host device has + union { + uint8_t config_space[256]; + struct pci_config_header real_hdr; + } __attribute__((packed)); + + // We do need a representation of the bars + // since we need to be made aware when they are written + // so that we can change the hooks. + // + // We assume here that the PCI subsystem, on a bar write + // will first send us a config_update, which we forward to + // the host dev. Then it will send us a bar update + // which we will use to rehook the device + // + struct pt_bar bars[6]; // our bars (for update purposes) + // + // Currently unsupported + // + //struct pt_bar exp_rom; // and exp ram areas of the config space, above + + struct vm_device *pci_bus; // what bus we are attached to + struct pci_device *pci_dev; // our representation as a registered PCI device + + union pci_addr_reg pci_addr; // our pci address + + char name[32]; + + v3_host_dev_t host_dev; // the actual implementation +}; + + + +/* +static int push_config(struct pci_front_internal *state, uint8_t *config) +{ + if (v3_host_dev_config_write(state->host_dev, 0, config, 256) != 256) { + return -1; + } else { + return 0; + } +} +*/ + +static int pull_config(struct pci_front_internal *state, uint8_t *config) +{ + if (v3_host_dev_read_config(state->host_dev, 0, config, 256) != 256) { + return -1; + } else { + return 0; + } +} + + +static int pci_front_read_mem(struct guest_info * core, + addr_t gpa, + void * dst, + uint_t len, + void * priv) +{ + int i; + int rc; + struct vm_device *dev = (struct vm_device *) priv; + struct pci_front_internal *state = (struct pci_front_internal *) dev->private_data; + + PrintDebug("pci_front (%s): reading 0x%x bytes from gpa 0x%p from host dev 0x%p ...", + state->name, len, (void*)gpa, state->host_dev); + + rc = v3_host_dev_read_mem(state->host_dev, gpa, dst, len); + + PrintDebug(" done ... read %d bytes: 0x", rc); + + for (i = 0; i < rc; i++) { + PrintDebug("%x", ((uint8_t *)dst)[i]); + } + + PrintDebug("\n"); + + return rc; +} + +static int pci_front_write_mem(struct guest_info * core, + addr_t gpa, + void * src, + uint_t len, + void * priv) +{ + int i; + int rc; + struct vm_device *dev = (struct vm_device *) priv; + struct pci_front_internal *state = (struct pci_front_internal *) dev->private_data; + + PrintDebug("pci_front (%s): writing 0x%x bytes to gpa 0x%p to host dev 0x%p bytes=0x", + state->name, len, (void*)gpa, state->host_dev); + + for (i = 0; i < len; i++) { + PrintDebug("%x", ((uint8_t *)src)[i]); + } + + rc = v3_host_dev_write_mem(state->host_dev, gpa, src, len); + + PrintDebug(" %d bytes written\n",rc); + + return rc; +} + + +static int pci_front_read_port(struct guest_info * core, + uint16_t port, + void * dst, + uint_t len, + void * priv_data) +{ + int i; + struct pci_front_internal *state = (struct pci_front_internal *) priv_data; + + PrintDebug("pci_front (%s): reading 0x%x bytes from port 0x%x from host dev 0x%p ...", + state->name, len, port, state->host_dev); + + int rc = v3_host_dev_read_io(state->host_dev, port, dst, len); + + PrintDebug(" done ... read %d bytes: 0x", rc); + + for (i = 0; i < rc; i++) { + PrintDebug("%x", ((uint8_t *)dst)[i]); + } + + PrintDebug("\n"); + + return rc; + +} + +static int pci_front_write_port(struct guest_info * core, + uint16_t port, + void * src, + uint_t len, + void * priv_data) +{ + int i; + struct pci_front_internal *state = (struct pci_front_internal *) priv_data; + + PrintDebug("pci_front (%s): writing 0x%x bytes to port 0x%x to host dev 0x%p bytes=0x", + state->name, len, port, state->host_dev); + + for (i = 0; i < len; i++) { + PrintDebug("%x", ((uint8_t *)src)[i]); + } + + int rc = v3_host_dev_write_io(state->host_dev, port, src, len); + + PrintDebug(" %d bytes written\n",rc); + + return rc; +} + + + +// +// This is called at registration time for the device +// +// We assume that someone has called pull_config to get a local +// copy of the config data from the host device by this point +// +static int pci_bar_init(int bar_num, uint32_t * dst, void * private_data) { + struct vm_device * dev = (struct vm_device *)private_data; + struct pci_front_internal * state = (struct pci_front_internal *)(dev->private_data); + + + const uint32_t bar_base_reg = 4; // offset in 32bit words to skip to the first bar + + union pci_addr_reg pci_addr = {state->pci_addr.value}; // my address + + uint32_t bar_val = 0; + uint32_t max_val = 0; + + struct pt_bar * pbar = &(state->bars[bar_num]); + + pci_addr.reg = bar_base_reg + bar_num; + + PrintDebug("pci_front (%s): pci_bar_init: PCI Address = 0x%x\n", state->name, pci_addr.value); + + // This assumees that pull_config() has been previously called and + // we have a local copy of the host device's configuration space + bar_val = *((uint32_t*)(&(state->config_space[(bar_base_reg+bar_num)*4]))); + + // Now let's set our copy of the relevant bar accordingly + pbar->val = bar_val; + + // Now we will configure the hooks relevant to this bar + + // We preset this type when we encounter a MEM64 Low BAR + // This is a 64 bit memory region that we turn into a memory hook + if (pbar->type == PT_BAR_MEM64_HI) { + struct pt_bar * lo_pbar = &(state->bars[bar_num - 1]); + + max_val = PCI_MEM64_MASK_HI; + + pbar->size += lo_pbar->size; + + PrintDebug("pci_front (%s): pci_bar_init: Adding 64 bit PCI mem region: start=0x%p, end=0x%p as a full hook\n", + state->name, + (void *)(addr_t)pbar->addr, + (void *)(addr_t)(pbar->addr + pbar->size)); + + if (v3_hook_full_mem(dev->vm, + V3_MEM_CORE_ANY, + pbar->addr, + pbar->addr+pbar->size-1, + pci_front_read_mem, + pci_front_write_mem, + dev)<0) { + + PrintError("pci_front (%s): pci_bar_init: failed to hook 64 bit region (0x%p, 0x%p)\n", + state->name, + (void *)(addr_t)pbar->addr, + (void *)(addr_t)(pbar->addr + pbar->size - 1)); + return -1; + } + + } else if ((bar_val & 0x3) == 0x1) { + // This an I/O port region which we will turn into a range of hooks + + int i = 0; + + pbar->type = PT_BAR_IO; + pbar->addr = PCI_IO_BASE(bar_val); + + max_val = bar_val | PCI_IO_MASK; + + pbar->size = (uint16_t)~PCI_IO_BASE(max_val) + 1; + + + PrintDebug("pci_front (%s): pci_bar_init: hooking ports 0x%x through 0x%x\n", + state->name, (uint32_t)pbar->addr, (uint32_t)pbar->addr + pbar->size - 1); + + for (i = 0; i < pbar->size; i++) { + if (v3_dev_hook_io(dev, + pbar->addr + i, + pci_front_read_port, + pci_front_write_port)<0) { + PrintError("pci_front (%s): pci_bar_init: unabled to hook I/O port 0x%x\n",state->name, (unsigned)(pbar->addr+i)); + return -1; + } + } + + } else { + + // might be a 32 bit memory region or an empty bar + + max_val = bar_val | PCI_MEM_MASK; + + if (max_val == 0) { + // nothing, so just ignore it + pbar->type = PT_BAR_NONE; + } else { + + // memory region - hook it + + if ((bar_val & 0x6) == 0x0) { + // 32 bit memory region + + pbar->type = PT_BAR_MEM32; + pbar->addr = PCI_MEM32_BASE(bar_val); + pbar->size = ~PCI_MEM32_BASE(max_val) + 1; + + PrintDebug("pci_front (%s): pci_init_bar: adding 32 bit PCI mem region: start=0x%p, end=0x%p\n", + state->name, + (void *)(addr_t)pbar->addr, + (void *)(addr_t)(pbar->addr + pbar->size)); + + if (v3_hook_full_mem(dev->vm, + V3_MEM_CORE_ANY, + pbar->addr, + pbar->addr+pbar->size-1, + pci_front_read_mem, + pci_front_write_mem, + dev) < 0 ) { + PrintError("pci_front (%s): pci_init_bar: unable to hook 32 bit memory region 0x%p to 0x%p\n", + state->name, (void*)(pbar->addr), (void*)(pbar->addr+pbar->size-1)); + return -1; + } + + } else if ((bar_val & 0x6) == 0x2) { + + // 24 bit memory region + + pbar->type = PT_BAR_MEM24; + pbar->addr = PCI_MEM24_BASE(bar_val); + pbar->size = ~PCI_MEM24_BASE(max_val) + 1; + + + if (v3_hook_full_mem(dev->vm, + V3_MEM_CORE_ANY, + pbar->addr, + pbar->addr+pbar->size-1, + pci_front_read_mem, + pci_front_write_mem, + dev) < 0 ) { + PrintError("pci_front (%s): pci_init_bar: unable to hook 24 bit memory region 0x%p to 0x%p\n", + state->name, (void*)(pbar->addr), (void*)(pbar->addr+pbar->size-1)); + return -1; + } + + } else if ((bar_val & 0x6) == 0x4) { + + // partial update of a 64 bit region, no hook done yet + + struct pt_bar * hi_pbar = &(state->bars[bar_num + 1]); + + pbar->type = PT_BAR_MEM64_LO; + hi_pbar->type = PT_BAR_MEM64_HI; + + // Set the low bits, only for temporary storage until we calculate the high BAR + pbar->addr = PCI_MEM64_BASE_LO(bar_val); + pbar->size = ~PCI_MEM64_BASE_LO(max_val) + 1; + + PrintDebug("pci_front (%s): pci_bar_init: partial 64 bit update\n",state->name); + + } else { + PrintError("pci_front (%s): pci_bar_init: invalid memory bar type\n",state->name); + return -1; + } + + } + } + + + + // Update the pci subsystem versions + *dst = bar_val; + + return 0; +} + + +// +// If the guest modifies a BAR, we expect that pci.c will do the following, +// in this order +// +// 1. notify us via the config_update callback, which we will feed back +// to the host device +// 2. notify us of the bar change via the following callback +// +// This callback will unhook as needed for the old bar value and rehook +// as needed for the new bar value +// +static int pci_bar_write(int bar_num, uint32_t * src, void * private_data) { + struct vm_device * dev = (struct vm_device *)private_data; + struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data; + + struct pt_bar * pbar = &(state->bars[bar_num]); + + PrintDebug("pci_front (%s): bar update: bar_num=%d, src=0x%x\n", state->name, bar_num, *src); + PrintDebug("pci_front (%s): the current bar has size=%u, type=%d, addr=%p, val=0x%x\n", + state->name, pbar->size, pbar->type, (void *)(addr_t)pbar->addr, pbar->val); + + + + if (pbar->type == PT_BAR_NONE) { + PrintDebug("pci_front (%s): bar update is to empty bar - ignored\n",state->name); + return 0; + } else if (pbar->type == PT_BAR_IO) { + int i = 0; + + // unhook old ports + PrintDebug("pci_front (%s): unhooking I/O ports 0x%x through 0x%x\n", + state->name, + (unsigned)(pbar->addr), (unsigned)(pbar->addr+pbar->size-1)); + for (i = 0; i < pbar->size; i++) { + if (v3_dev_unhook_io(dev, pbar->addr + i) == -1) { + PrintError("pci_front (%s): could not unhook previously hooked port.... 0x%x\n", + state->name, + (uint32_t)pbar->addr + i); + return -1; + } + } + + PrintDebug("pci_front (%s): setting I/O Port range size=%d\n", state->name, pbar->size); + + // + // Not clear if this cooking is needed... why not trust + // the write? Who cares if it wants to suddenly hook more ports? + // + + // clear the low bits to match the size + *src &= ~(pbar->size - 1); + + // Set reserved bits + *src |= (pbar->val & ~PCI_IO_MASK); + + pbar->addr = PCI_IO_BASE(*src); + + PrintDebug("pci_front (%s): cooked src=0x%x\n", state->name, *src); + + PrintDebug("pci_front (%s): rehooking I/O ports 0x%x through 0x%x\n", + state->name, (unsigned)(pbar->addr), (unsigned)(pbar->addr+pbar->size-1)); + + for (i = 0; i < pbar->size; i++) { + if (v3_dev_hook_io(dev, + pbar->addr + i, + pci_front_read_port, + pci_front_write_port)<0) { + PrintError("pci_front (%s): unable to rehook port 0x%x\n",state->name, (unsigned)(pbar->addr+i)); + return -1; + } + } + + } else if (pbar->type == PT_BAR_MEM32) { + + if (v3_unhook_mem(dev->vm,V3_MEM_CORE_ANY,pbar->addr)<0) { + PrintError("pci_front (%s): unable to unhook 32 bit memory region starting at 0x%p\n", + state->name, (void*)(pbar->addr)); + return -1; + } + + // Again, not sure I need to do this cooking... + + // clear the low bits to match the size + *src &= ~(pbar->size - 1); + + // Set reserved bits + *src |= (pbar->val & ~PCI_MEM_MASK); + + PrintDebug("pci_front (%s): cooked src=0x%x\n", state->name, *src); + + pbar->addr = PCI_MEM32_BASE(*src); + + PrintDebug("pci_front (%s): rehooking 32 bit memory region 0x%p through 0x%p\n", + state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1)); + + if (v3_hook_full_mem(dev->vm, + V3_MEM_CORE_ANY, + pbar->addr, + pbar->addr+pbar->size-1, + pci_front_read_mem, + pci_front_write_mem, + dev)<0) { + PrintError("pci_front (%s): unable to rehook 32 bit memory region 0x%p through 0x%p\n", + state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1)); + return -1; + } + + } else if (pbar->type == PT_BAR_MEM64_LO) { + // We only store the written values here, the actual reconfig comes when the high BAR is updated + + // clear the low bits to match the size + *src &= ~(pbar->size - 1); + + // Set reserved bits + *src |= (pbar->val & ~PCI_MEM_MASK); + + // Temp storage, used when hi bar is written + pbar->addr = PCI_MEM64_BASE_LO(*src); + + PrintDebug("pci_front (%s): handled partial update for 64 bit memory region\n",state->name); + + } else if (pbar->type == PT_BAR_MEM64_HI) { + struct pt_bar * lo_vbar = &(state->bars[bar_num - 1]); + + if (v3_unhook_mem(dev->vm,V3_MEM_CORE_ANY,pbar->addr)<0) { + PrintError("pci_front (%s): unable to unhook 64 bit memory region starting at 0x%p\n", + state->name, (void*)(pbar->addr)); + return -1; + } + + + // We don't set size, because we assume region is less than 4GB + + // Set reserved bits + *src |= (pbar->val & ~PCI_MEM64_MASK_HI); + + pbar->addr = PCI_MEM64_BASE_HI(*src); + pbar->addr <<= 32; + pbar->addr += lo_vbar->addr; + + PrintDebug("pci_front (%s): rehooking 64 bit memory region 0x%p through 0x%p\n", + state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1)); + + if (v3_hook_full_mem(dev->vm, + V3_MEM_CORE_ANY, + pbar->addr, + pbar->addr+pbar->size-1, + pci_front_read_mem, + pci_front_write_mem, + dev)<0) { + PrintError("pci_front (%s): unable to rehook 64 bit memory region 0x%p through 0x%p\n", + state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1)); + return -1; + } + + } else { + PrintError("pci_front (%s): unhandled PCI bar type %d\n", state->name, pbar->type); + return -1; + } + + pbar->val = *src; + + return 0; +} + + +static int pci_front_config_update(uint_t reg_num, void * src, uint_t length, void * private_data) +{ + int i; + struct vm_device * dev = (struct vm_device *)private_data; + struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data; + union pci_addr_reg pci_addr = {state->pci_addr.value}; + + pci_addr.reg = reg_num >> 2; + + PrintDebug("pci_front (%s): configuration update: writing 0x%x bytes at offset 0x%x to host device 0x%p, bytes=0x", + state->name, length, pci_addr.value, state->host_dev); + + for (i = 0; i < length; i++) { + PrintDebug("%x", ((uint8_t *)src)[i]); + } + + PrintDebug("\n"); + + if (v3_host_dev_write_config(state->host_dev, + pci_addr.value, + src, + length) != length) { + PrintError("pci_front (%s): configuration update: unable to write all bytes\n",state->name); + return -1; + } + + + return 0; +} + + +static int unhook_all_mem(struct pci_front_internal *state) +{ + int bar_num; + struct vm_device *bus = state->pci_bus; + + + for (bar_num=0;bar_num<6;bar_num++) { + struct pt_bar * pbar = &(state->bars[bar_num]); + + PrintDebug("pci_front (%s): unhooking for bar %d\n", state->name, bar_num); + + if (pbar->type == PT_BAR_MEM32) { + if (v3_unhook_mem(bus->vm,V3_MEM_CORE_ANY,pbar->addr)<0) { + PrintError("pci_front (%s): unable to unhook 32 bit memory region starting at 0x%p\n", + state->name, (void*)(pbar->addr)); + return -1; + } + } else if (pbar->type == PT_BAR_MEM64_HI) { + + if (v3_unhook_mem(bus->vm,V3_MEM_CORE_ANY,pbar->addr)<0) { + PrintError("pci_front (%s): unable to unhook 64 bit memory region starting at 0x%p\n", + state->name, (void*)(pbar->addr)); + return -1; + } + } + } + + return 0; +} + + + +static int setup_virt_pci_dev(struct v3_vm_info * vm_info, struct vm_device * dev) +{ + struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data; + struct pci_device * pci_dev = NULL; + struct v3_pci_bar bars[6]; + int bus_num = 0; + int i; + + for (i = 0; i < 6; i++) { + bars[i].type = PCI_BAR_PASSTHROUGH; + bars[i].private_data = dev; + bars[i].bar_init = pci_bar_init; + bars[i].bar_write = pci_bar_write; + } + + pci_dev = v3_pci_register_device(state->pci_bus, + PCI_STD_DEVICE, + bus_num, -1, 0, + state->name, bars, + pci_front_config_update, + NULL, // no support for command updates + NULL, // no support for expansion roms + dev); + + + state->pci_dev = pci_dev; + + + // EXPANSION ROMS CURRENTLY UNSUPPORTED + + // COMMANDS CURRENTLY UNSUPPORTED + + return 0; +} + + + +// +// Note: potential bug: not clear what pointer I get here +// +static int pci_front_free(struct pci_front_internal *state) +{ + + if (unhook_all_mem(state)<0) { + return -1; + } + + // the device manager will unhook the i/o ports for us + + if (state->host_dev) { + v3_host_dev_close(state->host_dev); + state->host_dev=0; + } + + + V3_Free(state); + + PrintDebug("pci_front (%s): freed\n",state->name); + + return 0; +} + + +static struct v3_device_ops dev_ops = { +// +// Note: potential bug: not clear what pointer I get here +// + .free = (int (*)(void*))pci_front_free, +}; + + + + + + + +static int pci_front_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) +{ + struct vm_device * dev; + struct vm_device * bus; + struct pci_front_internal *state; + char *dev_id; + char *bus_id; + char *url; + + + if (!(dev_id = v3_cfg_val(cfg, "ID"))) { + PrintError("pci_front: no id given!\n"); + return -1; + } + + if (!(bus_id = v3_cfg_val(cfg, "bus"))) { + PrintError("pci_front (%s): no bus given!\n",dev_id); + return -1; + } + + if (!(url = v3_cfg_val(cfg, "hostdev"))) { + PrintError("pci_front (%s): no host device url given!\n",dev_id); + return -1; + } + + if (!(bus = v3_find_dev(vm,bus_id))) { + PrintError("pci_front (%s): cannot attach to bus %s\n",dev_id,bus_id); + return -1; + } + + if (!(state = V3_Malloc(sizeof(struct pci_front_internal)))) { + PrintError("pci_front (%s): cannot allocate state for device\n",dev_id); + return -1; + } + + memset(state, 0, sizeof(struct pci_front_internal)); + + state->pci_bus = bus; + strncpy(state->name, dev_id, 32); + + if (!(dev = v3_add_device(vm, dev_id, &dev_ops, state))) { + PrintError("pci_front (%s): unable to add device\n",state->name); + return -1; + } + + if (!(state->host_dev=v3_host_dev_open(url,V3_BUS_CLASS_PCI,dev,vm))) { + PrintError("pci_front (%s): unable to attach to host device %s\n",state->name, url); + v3_remove_device(dev); + return -1; + } + + // fetch config space from the host + if (pull_config(state,state->config_space)) { + PrintError("pci_front (%s): cannot initially configure device\n",state->name); + v3_remove_device(dev); + return -1; + } + + // setup virtual device for now + if (setup_virt_pci_dev(vm,dev)<0) { + PrintError("pci_front (%s): cannot set up virtual pci device\n", state->name); + v3_remove_device(dev); + return -1; + } + + // We do not need to hook anything here since pci will call + // us back via the bar_init functions + + PrintDebug("pci_front (%s): inited and ready to be Potemkinized\n",state->name); + + return 0; + +} + + +device_register("PCI_FRONT", pci_front_init) diff --git a/palacios/src/devices/vga.c b/palacios/src/devices/vga.c index 834f780..ce28940 100644 --- a/palacios/src/devices/vga.c +++ b/palacios/src/devices/vga.c @@ -26,6 +26,16 @@ #include "vga_regs.h" +#ifndef CONFIG_DEBUG_VGA +#undef PrintDebug +#define PrintDebug(fmt, args...) +#endif + +#define DEBUG_MEM_DATA 0 +#define DEBUG_DEEP_MEM 0 +#define DEBUG_DEEP_RENDER 0 + + #define MEM_REGION_START 0xa0000 #define MEM_REGION_END 0xc0000 #define MEM_REGION_NUM_PAGES (((MEM_REGION_END)-(MEM_REGION_START))/4096) @@ -272,7 +282,7 @@ struct vga_dac_regs { struct vga_internal { - struct vm_device *dev; + struct vm_device *dev; bool passthrough; bool skip_next_passthrough_out; // for word access @@ -320,6 +330,8 @@ struct vga_internal { }; +typedef enum {PLANAR_SHIFT, PACKED_SHIFT, C256_SHIFT} shift_mode_t; + static void find_text_char_dim(struct vga_internal *vga, uint32_t *w, uint32_t *h) { @@ -363,6 +375,7 @@ static void find_text_data_start(struct vga_internal *vga, void **data) } + static void find_text_attr_start(struct vga_internal *vga, void **data) { uint32_t offset; @@ -444,6 +457,29 @@ static int blinking(struct vga_internal *vga) } +static void find_graphics_data_starting_offset(struct vga_internal *vga, uint32_t *offset) +{ + + *offset = vga->vga_crt_controller.vga_start_address_high; + *offset <<= 8; + *offset += vga->vga_crt_controller.vga_start_address_low; +} + + +static void find_shift_mode(struct vga_internal *vga, shift_mode_t *mode) +{ + if (vga->vga_graphics_controller.vga_graphics_mode.c256) { + *mode=C256_SHIFT; + } else { + if (vga->vga_graphics_controller.vga_graphics_mode.shift_reg_mode) { + *mode=PACKED_SHIFT; + } else { + *mode=PLANAR_SHIFT; + } + } +} + + static void find_graphics_res(struct vga_internal *vga, uint32_t *width, uint32_t *height) { uint32_t vert_lsb, vert_msb; @@ -458,34 +494,42 @@ static void find_graphics_res(struct vga_internal *vga, uint32_t *width, uint32_ + (vga->vga_crt_controller.vga_overflow.vertical_disp_enable_end8); *height = ( (vert_msb << 8) + vert_lsb + 1) ; // pixels high (scanlines) - -} - - -static void find_graphics_cursor_pos(struct vga_internal *vga, uint32_t *width, uint32_t *height) -{ - -} -static void render_graphics(struct vga_internal *vga, void *fb) -{ + // At this point we have the resolution in dot clocks across and scanlines top-to-bottom + // This is usually the resolution in pixels, but it can be monkeyed with + // at least in the following ways - PrintDebug("vga: render_graphics is unimplemented\n"); - // Multiuplane 16 - // Packed pixel mono - // packed pixel 4 color - // packed pixel 256 color + // vga sequencer dot clock divide by two + if (vga->vga_sequencer.vga_clocking_mode.dot_clock) { + *width/=2; + *height/=2; + } - find_graphics_cursor_pos(0,0,0); + // crt_controller.max_row_scan.double_scan => each row twice for 200=>400 + if (vga->vga_crt_controller.vga_max_row_scan.double_scan) { + *height/=2; + } + + // crt_controller.crt_mode_control.count_by_two => pixels twice as wide as normal + if (vga->vga_crt_controller.vga_crt_mode_control.count_by_two) { + *width /= 2; + } + // crt_controller.crt_mode_control.horizontal_retrace_select => pixels twice as tall as normal + if (vga->vga_crt_controller.vga_crt_mode_control.horizontal_retrace_select) { + *height /= 2; + } + } -static void render_text_cursor(struct vga_internal *vga, void *fb) + +static void find_graphics_cursor_pos(struct vga_internal *vga, uint32_t *x, uint32_t *y) { + // todo + *x=*y=0; } - static void dac_lookup_24bit_color(struct vga_internal *vga, uint8_t entry, uint8_t *red, @@ -503,6 +547,209 @@ static void dac_lookup_24bit_color(struct vga_internal *vga, } + +/* + Colors work like this: + + 4 bit modes: index is to the internal palette on the attribute controller + that supplies 6 bits, but we need 8 to index the dac + 2 more (the msbs) are supplied from the color select register + we can optionally overwrite bits 5 and 4 from the color + select register as well, depending on a selection bit + in the mode control register. The result of all this is + 8 bit index for the dac + + 8 bit modes: the attribute controller passes the index straight through + to the DAC. + + + The DAC translates from the 8 bit index into 6 bits per color channel + (18 bit color). We mulitply by 4 to get 24 bit color. +*/ + +static void find_24bit_color(struct vga_internal *vga, + uint8_t val, + uint8_t *red, + uint8_t *green, + uint8_t *blue) +{ + uint8_t di; // ultimate dac index + + if (vga->vga_attribute_controller.vga_attribute_mode_control.pixel_width) { + // 8 bit mode does right to the DAC + di=val; + } else { + struct vga_internal_palette_reg pr = vga->vga_attribute_controller.vga_internal_palette[val%16]; + di = pr.palette_data; + + // Fix bits 5-4 if needed + if (vga->vga_attribute_controller.vga_attribute_mode_control.p54_select) { + di &= ~0x30; // clear 5-4 + di |= vga->vga_attribute_controller.vga_color_select.sc4 << 4; + di |= vga->vga_attribute_controller.vga_color_select.sc5 << 5; + } + + // We must always produce bits 6 and 7 + di &= ~0xc0; // clear 7-6 + di |= vga->vga_attribute_controller.vga_color_select.sc6 << 6; + di |= vga->vga_attribute_controller.vga_color_select.sc7 << 7; + } + + dac_lookup_24bit_color(vga,di,red,green,blue); +} + +static void render_graphics(struct vga_internal *vga, void *fb) +{ + + struct v3_frame_buffer_spec *spec = &(vga->target_spec); + + uint32_t gw, gh; // graphics w/h + uint32_t fw, fh; // fb w/h + uint32_t rgw, rgh; // region we can actually show on the frame buffer + + + uint32_t fx, fy; // pixel position within the frame buffer + + uint32_t offset; // offset into the maps + uint8_t m; // map + uint8_t p; // pixel in the current map byte (0..7) + + uint8_t r,g,b; // looked up colors for entry + + void *pixel; // current pixel in the fb + uint8_t *red; // and the channels in the pixel + uint8_t *green; // + uint8_t *blue; // + + uint8_t db[4]; // 4 bytes read at a time + uint8_t pb[8]; // 8 pixels assembled at a time + + shift_mode_t sm; // shift mode + + uint32_t cur_x, cur_y; + + + find_graphics_res(vga,&gw,&gh); + + find_shift_mode(vga,&sm); + + find_graphics_cursor_pos(vga,&cur_x,&cur_y); + + find_graphics_data_starting_offset(vga,&offset); + + fw = spec->width; + fh = spec->height; + + + PrintDebug("vga: attempting graphics render (%s): graphics_res=(%u,%u), fb_res=(%u,%u), " + "fb=0x%p offset=0x%x\n", + sm == PLANAR_SHIFT ? "planar shift" : + sm == PACKED_SHIFT ? "packed shift" : + sm == C256_SHIFT ? "color256 shift" : "UNKNOWN", + gw,gh,fw,fh,fb,offset); + + // First we need to clip to what we can actually show + rgw = gw < fw ? gw : fw; + rgh = gh < fh ? gh : fh; + + if (gw%8) { + PrintError("vga: warning: graphics width is not a multiple of 8\n"); + } + + + + // Now we scan across by row + for (fy=0;fymap[m]+offset)); + } + + // assemble + switch (sm) { + case PLANAR_SHIFT: + for (p=0;p<8;p++) { + pb[p]= + (( db[0] >> 7) & 0x1) | + (( db[1] >> 6) & 0x2) | + (( db[2] >> 5) & 0x4) | + (( db[3] >> 4) & 0x8) ; + + for (m=0;m<4;m++) { + db[m] <<= 1; + } + } + break; + + case PACKED_SHIFT: + // first 4 pixels use planes 0 and 2 + for (p=0;p<4;p++) { + pb[p] = + ((db[2] >> 4) & 0xc) | + ((db[0] >> 6) & 0x3) ; + db[2] <<= 2; + db[0] <<= 2; + } + break; + + // next 4 pixels use planes 1 and 3 + for (p=4;p<8;p++) { + pb[p] = + ((db[3] >> 4) & 0xc) | + ((db[1] >> 6) & 0x3) ; + db[3] <<= 2; + db[1] <<= 2; + } + break; + + case C256_SHIFT: + // this one is either very bizarre or as simple as this + for (p=0;p<4;p++) { + pb[p] = db[p]; + } + break; + } + + // draw each pixel + for (p=0;p< (sm==C256_SHIFT ? 4 : 8);p++) { + + // find its color + find_24bit_color(vga,pb[p],&r,&g,&b); + + // find its position in the framebuffer; + pixel = fb + (((fx + p) + (fy*spec->width)) * spec->bytes_per_pixel); + red = pixel + spec->red_offset; + green = pixel + spec->green_offset; + blue = pixel + spec->blue_offset; + + // draw it + *red=r; + *green=g; + *blue=b; + } + } + } + } + + PrintDebug("vga: render done\n"); +} + + +static void render_text_cursor(struct vga_internal *vga, void *fb) +{ +} + + + + // // A variant of this function could render to // a text console interface as well @@ -593,22 +840,22 @@ static void render_text(struct vga_internal *vga, void *fb) // foreground if (!extended_fontset(vga)) { - fg_entry = ((uint8_t)(a.foreground_intensity_or_font_select)) << 3; + fg_entry = a.foreground_intensity_or_font_select << 3; } else { fg_entry = 0; } fg_entry |= a.fore; - dac_lookup_24bit_color(vga,fg_entry,&fgr,&fgg,&fgb); + find_24bit_color(vga,fg_entry,&fgr,&fgg,&fgb); if (!blinking(vga)) { - bg_entry = ((uint8_t)(a.blinking_or_bg_intensity)) << 3; + bg_entry = a.blinking_or_bg_intensity << 3; } else { bg_entry = 0; } bg_entry |= a.back; - dac_lookup_24bit_color(vga,bg_entry,&bgr,&bgg,&bgb); + find_24bit_color(vga,bg_entry,&bgr,&bgg,&bgb); // Draw the character for (l=0; ltarget_spec); + + memset(fb,0,s->height*s->width*s->bytes_per_pixel); +} + static void render_maps(struct vga_internal *vga, void *fb) { @@ -757,20 +1013,21 @@ static int render(struct vga_internal *vga) fb = v3_graphics_console_get_frame_buffer_data_rw(vga->host_cons,&(vga->target_spec)); - // Draw some crap for testing for now - if (0) { render_test(vga,fb);} - // Draw the maps for debugging - if (0) { render_maps(vga,fb);} - - if (vga->vga_graphics_controller.vga_misc.graphics_mode) { - render_graphics(vga,fb); + if (!(vga->vga_sequencer.vga_clocking_mode.screen_off)) { + if (vga->vga_attribute_controller.vga_attribute_mode_control.graphics) { + render_graphics(vga,fb); + } else { + render_text(vga,fb); + render_text_cursor(vga,fb); + } } else { - render_text(vga,fb); - render_text_cursor(vga,fb); + render_black(vga,fb); } - render_maps(vga,fb); + if (0) { render_test(vga,fb); } + // always render maps for now + render_maps(vga,fb); v3_graphics_console_release_frame_buffer_data_rw(vga->host_cons); } @@ -870,7 +1127,7 @@ static int vga_write(struct guest_info * core, memcpy(V3_VAddr((void*)guest_addr),src,length); } -#if 0 +#if DEBUG_MEM_DATA int i; PrintDebug("vga: data written was 0x"); for (i=0;ivga_sequencer.vga_mem_mode.odd_even); - switch (vga->vga_graphics_controller.vga_graphics_mode.write_mode) { case 0: { @@ -916,7 +1169,9 @@ static int vga_write(struct guest_info * core, offset = find_offset_write(vga, guest_addr); +#if DEBUG_DEEP_MEM PrintDebug("vga: mode 0 write, offset=0x%llx, ror=%u, func=%u\n", offset,ror,func); +#endif for (i=0;ivga_graphics_controller.vga_bit_mask; uint8_t mm = find_map_write(vga,guest_addr+i); - PrintDebug("vga: write i=%u, mm=0x%x, offset=0x%x\n",i,(unsigned int)mm,(unsigned int)offset); +#if DEBUG_DEEP_MEM + PrintDebug("vga: write i=%u, mm=0x%x, bm=0x%x sr=0x%x esr=0x%x offset=0x%x\n",i,(unsigned int)mm,(unsigned int)bm, (unsigned int)sr, (unsigned int)esr,(unsigned int)offset); +#endif - for (mapnum=0;mapnum<4;mapnum++, sr>>=1, esr>>=1, bm>>=1, mm>>=1) { + for (mapnum=0;mapnum<4;mapnum++, sr>>=1, esr>>=1, mm>>=1) { vga_map map = vga->map[mapnum]; uint8_t data = ((uint8_t *)src)[i]; uint8_t latchval = vga->latch[mapnum]; - + +#if DEBUG_DEEP_MEM + PrintDebug("vga: raw data=0x%x\n",data); +#endif // rotate data right - data = (data>>ror) | data<<(8-ror); - + if (ror) { + data = (data>>ror) | data<<(8-ror); + } + +#if DEBUG_DEEP_MEM + PrintDebug("vga: data after ror=0x%x\n",data); +#endif // use SR bit if ESR is on for this map - if (esr & 0x1) { - data = (uint8_t)((((sint8_t)(sr&0x1))<<7)>>7); // expand sr bit + if (esr & 0x1) { + data = (sr&0x1) * -1; + } +#if DEBUG_DEEP_MEM + PrintDebug("vga: data after esrr=0x%x\n",data); +#endif + // Apply function switch (func) { case 0: // NOP @@ -954,19 +1224,26 @@ static int vga_write(struct guest_info * core, data ^= latchval; break; } - - // mux between latch and alu output - if (bm & 0x1) { - // use alu output, which is in data - } else { - // use latch value - data=latchval; - } + +#if DEBUG_DEEP_MEM + PrintDebug("vga: data after func=0x%x\n",data); +#endif + + // mux between the data byte and the latch byte on + // a per-bit basis + data = (bm & data) | ((~bm) & latchval); + + +#if DEBUG_DEEP_MEM + PrintDebug("vga: data after bm mux=0x%x\n",data); +#endif // selective write if (mm & 0x1) { // write to this map - //PrintDebug("vga: write map %u offset 0x%p map=0x%p pointer=0x%p\n",mapnum,(void*)offset,map,&(map[offset])); +#if DEBUG_DEEP_MEM + PrintDebug("vga: write map %u offset 0x%p map=0x%p pointer=0x%p\n",mapnum,(void*)offset,map,&(map[offset])); +#endif map[offset] = data; } else { // skip this map @@ -989,7 +1266,9 @@ static int vga_write(struct guest_info * core, uint64_t offset = find_offset_write(vga,guest_addr); +#if DEBUG_DEEP_MEM PrintDebug("vga: mode 1 write, offset=0x%llx\n", offset); +#endif for (i=0;ilatch[mapnum]; // expand relevant bit to 8 bit - // it's basically esr=1, sr=bit from write - data = (uint8_t)(((sint8_t)(((data>>mapnum)&0x1)<<7))>>7); - + // it's basically esr=1, sr=bit from mode 0 write + data = ((data>>mapnum)&0x1) * -1; + // Apply function switch (func) { case 0: // NOP @@ -1060,14 +1341,9 @@ static int vga_write(struct guest_info * core, data ^= latchval; break; } - + // mux between latch and alu output - if (bm & 0x1) { - // use alu output, which is in data - } else { - // use latch value - data=latchval; - } + data = (bm & data) | ((~bm) & latchval); // selective write if (mm & 0x1) { @@ -1107,7 +1383,9 @@ static int vga_write(struct guest_info * core, // now for each map uint8_t data = ((uint8_t *)src)[i]; - data = (data>>ror) | data<<(8-ror); + if (ror) { + data = (data>>ror) | data<<(8-ror); + } uint8_t bm = vga->vga_graphics_controller.vga_bit_mask & data; uint8_t sr = vga->vga_graphics_controller.vga_set_reset.val & 0xf; @@ -1117,16 +1395,11 @@ static int vga_write(struct guest_info * core, vga_map map = vga->map[mapnum]; uint8_t latchval = vga->latch[mapnum]; - data = (uint8_t)((((sint8_t)(sr&0x1))<<7)>>7); // expand sr bit - - + // expand SR bit + data = (sr&0x1) * -1; + // mux between latch and alu output - if (bm & 0x1) { - // use alu output, which is in data - } else { - // use latch value - data=latchval; - } + data = (bm & data) | ((~bm) & latchval); // selective write if (mm & 0x1) { @@ -1212,7 +1485,15 @@ static int vga_read(struct guest_info * core, // address bytes select the map for (i=0;ilatch[mapnum] = *(vga->map[mapnum]+offset); +#if DEBUG_DEEP_MEM + PrintDebug("vga: mode 0 read, chain4, offset=0x%llx, mapnum=%u\n",offset,mapnum); +#endif + ((uint8_t*)dst)[i] = *(vga->map[mapnum]+offset); + + // presumably all the latches are to be reloaded, not just the selected one? + for (mapnum=0;mapnum<4;mapnum++) { + vga->latch[mapnum] = *(vga->map[mapnum]+offset); + } } } else { mapnum = vga->vga_graphics_controller.vga_read_map_select.map_select; @@ -1222,6 +1503,10 @@ static int vga_read(struct guest_info * core, PrintError("vga: read to offset=%llu map=%u (%u bytes)\n",offset,mapnum,length); } +#if DEBUG_DEEP_MEM + PrintDebug("vga: mode 0 read, not-chain4, offset=0x%llx, mapnum=%u\n",offset,mapnum); +#endif + memcpy(dst,(vga->map[mapnum])+offset,length); // load the latches with the last item read @@ -1252,6 +1537,11 @@ static int vga_read(struct guest_info * core, uint8_t bits; offset = find_offset_read(vga,guest_addr); + +#if DEBUG_DEEP_MEM + PrintDebug("vga: mode 1 read, offset=0x%llx, cc=0x%x, dc-0x%x\n",offset,cc,dc); +#endif + for (i=0;i use the internal color palette (load the regs) // 1 => use the external color palette uint8_t reserved:2; @@ -866,27 +865,13 @@ struct vga_attribute_byte { union { uint8_t val; struct { - union { - uint8_t fore:3; - struct { - uint8_t fore_red:1; - uint8_t fore_green:1; - uint8_t fore_blue:1; - } __attribute__((packed)); - } __attribute__((packed)); + uint8_t fore:3; //foreground color uint8_t foreground_intensity_or_font_select:1; // depends on char map select reg // character map selection is effected // when memory_mode.extended meomory=1 // and the two character map enteries on character_map_select are // different - union { - uint8_t back:3; - struct { - uint8_t back_red:1; - uint8_t back_green:1; - uint8_t back_blue:1; - } __attribute__((packed)); - } __attribute__((packed)); + uint8_t back:3; //background color uint8_t blinking_or_bg_intensity:1; // attribute mode control.enableblink = 1 => blink // =0 => intensity (16 colors of bg) diff --git a/palacios/src/devices/vnet_nic.c b/palacios/src/devices/vnet_nic.c index 0fdaaba..05117e5 100644 --- a/palacios/src/devices/vnet_nic.c +++ b/palacios/src/devices/vnet_nic.c @@ -42,7 +42,7 @@ struct vnet_nic_state { /* called by frontend, send pkt to VNET */ static int vnet_nic_send(uint8_t * buf, uint32_t len, - void * private_data) { + int synchronize, void * private_data) { struct vnet_nic_state * vnetnic = (struct vnet_nic_state *)private_data; struct v3_vnet_pkt pkt; @@ -52,15 +52,13 @@ static int vnet_nic_send(uint8_t * buf, uint32_t len, memcpy(pkt.header, buf, ETHERNET_HEADER_LEN); pkt.data = buf; -#ifdef CONFIG_DEBUG_VNET_NIC - { - PrintDebug("VNET-NIC: send pkt (size: %d, src_id: %d, src_type: %d)\n", + V3_Net_Print(2, "VNET-NIC: send pkt (size: %d, src_id: %d, src_type: %d)\n", pkt.size, pkt.src_id, pkt.src_type); - v3_hexdump(buf, len, NULL, 0); + if(v3_net_debug >= 4){ + v3_hexdump(buf, len, NULL, 0); } -#endif - return v3_vnet_send_pkt(&pkt, NULL);; + return v3_vnet_send_pkt(&pkt, NULL, synchronize); } @@ -70,22 +68,13 @@ static int virtio_input(struct v3_vm_info * info, void * private_data){ struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data; - PrintDebug("VNET-NIC: receive pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n", + V3_Net_Print(2, "VNET-NIC: receive pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n", pkt->size, pkt->src_id, pkt->src_type, pkt->dst_id, pkt->dst_type); return vnetnic->net_ops.recv(pkt->data, pkt->size, vnetnic->net_ops.frontend_data); } -/* poll data from front-end */ -static void virtio_poll(struct v3_vm_info * info, - int budget, - void * private_data){ - struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data; - - vnetnic->net_ops.poll(info, budget, vnetnic->net_ops.frontend_data); -} - static int vnet_nic_free(struct vnet_nic_state * vnetnic) { @@ -102,7 +91,6 @@ static struct v3_device_ops dev_ops = { static struct v3_vnet_dev_ops vnet_dev_ops = { .input = virtio_input, - .poll = virtio_poll, }; diff --git a/palacios/src/extensions/Kconfig b/palacios/src/extensions/Kconfig new file mode 100644 index 0000000..c7b7d69 --- /dev/null +++ b/palacios/src/extensions/Kconfig @@ -0,0 +1,41 @@ +menu "Extensions" + +config EXT_VTIME + bool "Enable Time virtualization" + default n + help + Enables the timer virtualization extensions. These hide the cost of + running inside the VMM context. This can aid the consistency of + time between multiple timers, but can cause the guest to run + a good bit slower than the host in VM-intensive parts of the code. + + +config EXT_VTSC + bool "Fully virtualize guest TSC" + default n + depends on EXT_VTIME + help + Virtualize the processor time stamp counter in the guest, + generally increasing consistency between various time sources + but also potentially making guest time run slower than real time. + +config EXT_MTRRS + bool "Support virtualized MTTRs" + default n + help + Provides a virtualized set of MTTR registers + +config EXT_MACH_CHECK + bool "Support Machine Check functionality" + default n + help + Provides a virtualized machine check architecture + + +config EXT_INSPECTOR + bool "VM Inspector" + default n + help + Provides the inspection extension + +endmenu diff --git a/palacios/src/extensions/Makefile b/palacios/src/extensions/Makefile new file mode 100644 index 0000000..ac19202 --- /dev/null +++ b/palacios/src/extensions/Makefile @@ -0,0 +1,5 @@ +obj-y += null.o +obj-$(CONFIG_EXT_MTRRS) += ext_mtrr.o +obj-$(CONFIG_EXT_VTSC) += ext_vtsc.o +obj-$(CONFIG_EXT_VTIME) += ext_vtime.o +obj-$(CONFIG_EXT_INSPECTOR) += ext_inspector.o diff --git a/palacios/src/palacios/vmm_inspector.c b/palacios/src/extensions/ext_inspector.c similarity index 81% rename from palacios/src/palacios/vmm_inspector.c rename to palacios/src/extensions/ext_inspector.c index 2ff611b..7b89a91 100644 --- a/palacios/src/palacios/vmm_inspector.c +++ b/palacios/src/extensions/ext_inspector.c @@ -18,29 +18,39 @@ */ -#include +//#include #include #include #include +#include + +#include +#include // Note that v3_inspect_node_t is actuall a struct v3_mtree // Its set as void for opaque portability +struct v3_inspector_state { + struct v3_mtree state_tree; + +}; -int v3_init_inspector(struct v3_vm_info * vm) { - struct v3_inspector_state * state = (struct v3_inspector_state *)&(vm->inspector); +static int init_inspector(struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data) { + struct v3_inspector_state * state = V3_Malloc(sizeof(struct v3_inspector_state)); memset(state, 0, sizeof(struct v3_inspector_state)); strncpy(state->state_tree.name, "vm->name", 50); state->state_tree.subtree = 1; + *priv_data = state; + return 0; } -int v3_init_inspector_core(struct guest_info * core) { - struct v3_inspector_state * vm_state = &(core->vm_info->inspector); +static int init_inspector_core(struct guest_info * core, void * priv_data) { + struct v3_inspector_state * vm_state = priv_data; char core_name[50]; snprintf(core_name, 50, "core.%d", core->cpu_id); @@ -72,7 +82,7 @@ int v3_init_inspector_core(struct guest_info * core) { v3_inspect_64(cr_node, "EFER", (uint64_t *)&(core->ctrl_regs.efer)); - // struct v3_mtree * seg_node = v3_mtree_create_subtree(core_node, "SEGMENTS"); + //struct v3_mtree * seg_node = v3_mtree_create_subtree(core_node, "SEGMENTS"); @@ -82,6 +92,23 @@ int v3_init_inspector_core(struct guest_info * core) { } + + + +static struct v3_extension_impl inspector_impl = { + .name = "inspector", + .init = init_inspector, + .deinit = NULL, + .core_init = init_inspector_core, + .core_deinit = NULL, + .on_entry = NULL, + .on_exit = NULL +}; + + +register_extension(&inspector_impl); + + v3_inspect_node_t * v3_inspect_add_subtree(v3_inspect_node_t * root, char * name) { return v3_mtree_create_subtree(root, name); } @@ -122,8 +149,6 @@ int v3_inspect_buf(v3_inspect_node_t * node, char * name, - - int v3_find_inspection_value(v3_inspect_node_t * node, char * name, struct v3_inspection_value * value) { struct v3_mtree * mt_node = v3_mtree_find_node(node, name); @@ -152,7 +177,13 @@ struct v3_inspection_value v3_inspection_value(v3_inspect_node_t * node) { v3_inspect_node_t * v3_get_inspection_root(struct v3_vm_info * vm) { - return &(vm->inspector.state_tree); + struct v3_inspector_state * inspector = v3_get_extension_state(vm, inspector_impl.name); + + if (inspector == NULL) { + return NULL; + } + + return &(inspector->state_tree); } v3_inspect_node_t * v3_get_inspection_subtree(v3_inspect_node_t * root, char * name) { @@ -167,3 +198,7 @@ v3_inspect_node_t * v3_inspection_node_next(v3_inspect_node_t * node) { v3_inspect_node_t * v3_inspection_first_child(v3_inspect_node_t * root) { return v3_mtree_first_child(root); } + + + + diff --git a/palacios/src/extensions/ext_vtime.c b/palacios/src/extensions/ext_vtime.c new file mode 100644 index 0000000..665d6be --- /dev/null +++ b/palacios/src/extensions/ext_vtime.c @@ -0,0 +1,169 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * Patrick G. Bridges + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ +#include +#include +#include + + + + +/* Overview + * + * Time handling in VMMs is challenging, and Palacios uses the highest + * resolution, lowest overhead timer on modern CPUs that it can - the + * processor timestamp counter (TSC). Note that on somewhat old processors + * this can be problematic; in particular, older AMD processors did not + * have a constant rate timestamp counter in the face of power management + * events. However, the latest Intel and AMD CPUs all do (should...) have a + * constant rate TSC, and Palacios relies on this fact. + * + * Basically, Palacios keeps track of three quantities as it runs to manage + * the passage of time: + * (1) The host timestamp counter - read directly from HW and never written + * (2) A monotonic guest timestamp counter used to measure the progression of + * time in the guest. This is computed using an offsets from (1) above. + * (3) The actual guest timestamp counter (which can be written by + * writing to the guest TSC MSR - MSR 0x10) from the monotonic guest TSC. + * This is also computed as an offset from (2) above when the TSC and + * this offset is updated when the TSC MSR is written. + * + * The value used to offset the guest TSC from the host TSC is the *sum* of all + * of these offsets (2 and 3) above + * + * Because all other devices are slaved off of the passage of time in the guest, + * it is (2) above that drives the firing of other timers in the guest, + * including timer devices such as the Programmable Interrupt Timer (PIT). + * + * Future additions: + * (1) Add support for temporarily skewing guest time off of where it should + * be to support slack simulation of guests. The idea is that simulators + * set this skew to be the difference between how much time passed for a + * simulated feature and a real implementation of that feature, making + * pass at a different rate from real time on this core. The VMM will then + * attempt to move this skew back towards 0 subject to resolution/accuracy + * constraints from various system timers. + * + * The main effort in doing this will be to get accuracy/resolution + * information from each local timer and to use this to bound how much skew + * is removed on each exit. + */ + + + +struct vtime_state { + uint32_t guest_cpu_freq; // can be lower than host CPU freq! + uint64_t initial_time; // Time when VMM started. + sint64_t guest_host_offset;// Offset of monotonic guest time from host time +}; + + + + +static int offset_time( struct guest_info * info, sint64_t offset ) +{ + struct vm_time * time_state = &(info->time_state); +// PrintDebug("Adding additional offset of %lld to guest time.\n", offset); + time_state->guest_host_offset += offset; + return 0; +} + + +// Control guest time in relation to host time so that the two stay +// appropriately synchronized to the extent possible. +int v3_adjust_time(struct guest_info * info) { + struct vm_time * time_state = &(info->time_state); + uint64_t host_time, target_host_time; + uint64_t guest_time, target_guest_time, old_guest_time; + uint64_t guest_elapsed, host_elapsed, desired_elapsed; + + /* Compute the target host time given how much time has *already* + * passed in the guest */ + guest_time = v3_get_guest_time(time_state); + guest_elapsed = (guest_time - time_state->initial_time); + desired_elapsed = (guest_elapsed * time_state->host_cpu_freq) / time_state->guest_cpu_freq; + target_host_time = time_state->initial_time + desired_elapsed; + + /* Now, let the host run while the guest is stopped to make the two + * sync up. */ + host_time = v3_get_host_time(time_state); + old_guest_time = v3_get_guest_time(time_state); + + while (target_host_time > host_time) { + v3_yield(info); + host_time = v3_get_host_time(time_state); + } + + guest_time = v3_get_guest_time(time_state); + + // We do *not* assume the guest timer was paused in the VM. If it was + // this offseting is 0. If it wasn't we need this. + offset_time(info, (sint64_t)old_guest_time - (sint64_t)guest_time); + + /* Now the host may have gotten ahead of the guest because + * yielding is a coarse grained thing. Figure out what guest time + * we want to be at, and use the use the offsetting mechanism in + * the VMM to make the guest run forward. We limit *how* much we skew + * it forward to prevent the guest time making large jumps, + * however. */ + host_elapsed = host_time - time_state->initial_time; + desired_elapsed = (host_elapsed * time_state->guest_cpu_freq) / time_state->host_cpu_freq; + target_guest_time = time_state->initial_time + desired_elapsed; + + if (guest_time < target_guest_time) { + uint64_t max_skew, desired_skew, skew; + + if (time_state->enter_time) { + max_skew = (time_state->exit_time - time_state->enter_time) / 10; + } else { + max_skew = 0; + } + + desired_skew = target_guest_time - guest_time; + skew = desired_skew > max_skew ? max_skew : desired_skew; +/* PrintDebug("Guest %llu cycles behind where it should be.\n", + desired_skew); + PrintDebug("Limit on forward skew is %llu. Skewing forward %llu.\n", + max_skew, skew); */ + + offset_time(info, skew); + } + + return 0; +} + + +static int init() { + khz = v3_cfg_val(cfg_tree, "khz"); + + if (khz) { + time_state->guest_cpu_freq = atoi(khz); + PrintDebug("Core %d CPU frequency requested at %d khz.\n", + info->cpu_id, time_state->guest_cpu_freq); + } + + if ( (khz == NULL) || + (time_state->guest_cpu_freq <= 0) || + (time_state->guest_cpu_freq > time_state->host_cpu_freq) ) { + + time_state->guest_cpu_freq = time_state->host_cpu_freq; + } + + +} diff --git a/palacios/src/extensions/ext_vtsc.c b/palacios/src/extensions/ext_vtsc.c new file mode 100644 index 0000000..96e7ce3 --- /dev/null +++ b/palacios/src/extensions/ext_vtsc.c @@ -0,0 +1,195 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * Patrick G. Bridges + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#include +#include +#include + + +// Functions for handling exits on the TSC when fully virtualizing +// the timestamp counter. +#define TSC_MSR 0x10 +#define TSC_AUX_MSR 0xC0000103 + +int v3_handle_rdtscp(struct guest_info *info); +int v3_handle_rdtsc(struct guest_info *info); + + +struct vtsc_state { + + struct v3_msr tsc_aux; // Auxilliary MSR for RDTSCP + +}; + + + +/* + * Handle full virtualization of the time stamp counter. As noted + * above, we don't store the actual value of the TSC, only the guest's + * offset from monotonic guest's time. If the guest writes to the TSC, we + * handle this by changing that offset. + * + * Possible TODO: Proper hooking of TSC read/writes? + */ + +static int rdtsc(struct guest_info * info) { + uint64_t tscval = v3_get_guest_tsc(&info->time_state); + + info->vm_regs.rdx = tscval >> 32; + info->vm_regs.rax = tscval & 0xffffffffLL; + + return 0; +} + +int v3_handle_rdtsc(struct guest_info * info) { + rdtsc(info); + + info->vm_regs.rax &= 0x00000000ffffffffLL; + info->vm_regs.rdx &= 0x00000000ffffffffLL; + + info->rip += 2; + + return 0; +} + +int v3_rdtscp(struct guest_info * info) { + int ret; + /* First get the MSR value that we need. It's safe to futz with + * ra/c/dx here since they're modified by this instruction anyway. */ + info->vm_regs.rcx = TSC_AUX_MSR; + ret = v3_handle_msr_read(info); + + if (ret != 0) { + return ret; + } + + info->vm_regs.rcx = info->vm_regs.rax; + + /* Now do the TSC half of the instruction */ + ret = v3_rdtsc(info); + + if (ret != 0) { + return ret; + } + + return 0; +} + + +int v3_handle_rdtscp(struct guest_info * info) { + PrintDebug("Handling virtual RDTSCP call.\n"); + + v3_rdtscp(info); + + info->vm_regs.rax &= 0x00000000ffffffffLL; + info->vm_regs.rcx &= 0x00000000ffffffffLL; + info->vm_regs.rdx &= 0x00000000ffffffffLL; + + info->rip += 3; + + return 0; +} + + + + +static int tsc_aux_msr_read_hook(struct guest_info *info, uint_t msr_num, + struct v3_msr *msr_val, void *priv) { + struct vm_time * time_state = &(info->time_state); + + V3_ASSERT(msr_num == TSC_AUX_MSR); + + msr_val->lo = time_state->tsc_aux.lo; + msr_val->hi = time_state->tsc_aux.hi; + + return 0; +} + + +static int tsc_aux_msr_write_hook(struct guest_info *info, uint_t msr_num, + struct v3_msr msr_val, void *priv) { + struct vm_time * time_state = &(info->time_state); + + V3_ASSERT(msr_num == TSC_AUX_MSR); + + time_state->tsc_aux.lo = msr_val.lo; + time_state->tsc_aux.hi = msr_val.hi; + + return 0; +} + + +static int tsc_msr_read_hook(struct guest_info *info, uint_t msr_num, + struct v3_msr *msr_val, void *priv) { + uint64_t time = v3_get_guest_tsc(&info->time_state); + + V3_ASSERT(msr_num == TSC_MSR); + + msr_val->hi = time >> 32; + msr_val->lo = time & 0xffffffffLL; + + return 0; +} + + +static int tsc_msr_write_hook(struct guest_info *info, uint_t msr_num, + struct v3_msr msr_val, void *priv) { + struct vm_time * time_state = &(info->time_state); + uint64_t guest_time, new_tsc; + + V3_ASSERT(msr_num == TSC_MSR); + + new_tsc = (((uint64_t)msr_val.hi) << 32) | (uint64_t)msr_val.lo; + guest_time = v3_get_guest_time(time_state); + time_state->tsc_guest_offset = (sint64_t)new_tsc - (sint64_t)guest_time; + + return 0; +} + + +static int deinit() { + v3_unhook_msr(vm, TSC_MSR); + v3_unhook_msr(vm, TSC_AUX_MSR); +} + + +static int init() { + + time_state->tsc_aux.lo = 0; + time_state->tsc_aux.hi = 0; + + + + PrintDebug("Installing TSC MSR hook.\n"); + ret = v3_hook_msr(vm, TSC_MSR, + tsc_msr_read_hook, tsc_msr_write_hook, NULL); + + if (ret != 0) { + return ret; + } + + PrintDebug("Installing TSC_AUX MSR hook.\n"); + ret = v3_hook_msr(vm, TSC_AUX_MSR, tsc_aux_msr_read_hook, + tsc_aux_msr_write_hook, NULL); + + if (ret != 0) { + return ret; + } +} diff --git a/palacios/src/extensions/null.c b/palacios/src/extensions/null.c new file mode 100644 index 0000000..1828c44 --- /dev/null +++ b/palacios/src/extensions/null.c @@ -0,0 +1,6 @@ +/** \file + * Do nothing module. + * + * This file only exists to appease the kbuild gods. + */ + diff --git a/palacios/src/interfaces/vmm_host_dev.c b/palacios/src/interfaces/vmm_host_dev.c index 064d17f..0002cb2 100644 --- a/palacios/src/interfaces/vmm_host_dev.c +++ b/palacios/src/interfaces/vmm_host_dev.c @@ -29,12 +29,13 @@ struct v3_host_dev_hooks * host_dev_hooks = 0; v3_host_dev_t v3_host_dev_open(char *impl, v3_bus_class_t bus, - v3_guest_dev_t gdev) + v3_guest_dev_t gdev, + struct v3_vm_info *vm) { V3_ASSERT(host_dev_hooks != NULL); V3_ASSERT(host_dev_hooks->open != NULL); - return host_dev_hooks->open(impl,bus,gdev); + return host_dev_hooks->open(impl,bus,gdev,vm->host_priv_data); } int v3_host_dev_close(v3_host_dev_t hdev) @@ -75,7 +76,7 @@ uint64_t v3_host_dev_read_mem(v3_host_dev_t hdev, V3_ASSERT(host_dev_hooks != NULL); V3_ASSERT(host_dev_hooks->read_mem != NULL); - return host_dev_hooks->read_mem(hdev,gpa,dst,len); + return host_dev_hooks->read_mem(hdev,(void*)gpa,dst,len); } uint64_t v3_host_dev_write_mem(v3_host_dev_t hdev, @@ -86,7 +87,7 @@ uint64_t v3_host_dev_write_mem(v3_host_dev_t hdev, V3_ASSERT(host_dev_hooks != NULL); V3_ASSERT(host_dev_hooks->write_mem != NULL); - return host_dev_hooks->write_mem(hdev,gpa,src,len); + return host_dev_hooks->write_mem(hdev,(void*)gpa,src,len); } uint64_t v3_host_dev_read_config(v3_host_dev_t hdev, @@ -140,7 +141,7 @@ int v3_host_dev_raise_irq(v3_host_dev_t hostdev, uint64_t v3_host_dev_read_guest_mem(v3_host_dev_t hostdev, v3_guest_dev_t guest_dev, - addr_t gpa, + void * gpa, void *dst, uint64_t len) { @@ -154,14 +155,14 @@ uint64_t v3_host_dev_read_guest_mem(v3_host_dev_t hostdev, if (!vm) { return 0; } else { - return v3_read_gpa_memory(&(vm->cores[0]), gpa, len, dst); + return v3_read_gpa_memory(&(vm->cores[0]), (addr_t)gpa, len, dst); } } } uint64_t v3_host_dev_write_guest_mem(v3_host_dev_t hostdev, v3_guest_dev_t guest_dev, - addr_t gpa, + void * gpa, void *src, uint64_t len) { @@ -175,7 +176,7 @@ uint64_t v3_host_dev_write_guest_mem(v3_host_dev_t hostdev, if (!vm) { return 0; } else { - return v3_write_gpa_memory(&(vm->cores[0]), gpa, len, src); + return v3_write_gpa_memory(&(vm->cores[0]), (addr_t)gpa, len, src); } } } diff --git a/palacios/src/palacios/Makefile b/palacios/src/palacios/Makefile index c505b60..6a24b89 100644 --- a/palacios/src/palacios/Makefile +++ b/palacios/src/palacios/Makefile @@ -31,7 +31,6 @@ obj-y := \ vmm_binaries.o \ vmm_cpuid.o \ vmm_xml.o \ - vmm_muxer.o \ vmm_mem_hook.o \ vmm_mptable.o \ vmm_extensions.o \ @@ -39,7 +38,6 @@ obj-y := \ vmm_multitree.o \ -obj-$(CONFIG_INSPECTOR) += vmm_inspector.o obj-$(CONFIG_XED) += vmm_xed.o @@ -59,6 +57,7 @@ obj-$(CONFIG_VMX) += vmx.o \ vmx_io.o \ vmx_lowlevel.o \ vmx_msr.o \ + vmx_hw_info.o \ vmcs.o \ vmx_ctrl_regs.o \ vmx_assist.o diff --git a/palacios/src/palacios/vm_guest.c b/palacios/src/palacios/vm_guest.c index b8cc549..4ab0134 100644 --- a/palacios/src/palacios/vm_guest.c +++ b/palacios/src/palacios/vm_guest.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -526,13 +525,7 @@ static int info_hcall(struct guest_info * core, uint_t hcall_id, void * priv_dat int v3_init_vm(struct v3_vm_info * vm) { v3_cpu_arch_t cpu_type = v3_get_cpu_type(V3_Get_CPU()); - if (v3_get_foreground_vm() == NULL) { - v3_set_foreground_vm(vm); - } -#ifdef CONFIG_INSPECTOR - v3_init_inspector(vm); -#endif #ifdef CONFIG_TELEMETRY v3_init_telemetry(vm); @@ -661,9 +654,7 @@ int v3_init_core(struct guest_info * core) { v3_cpu_arch_t cpu_type = v3_get_cpu_type(V3_Get_CPU()); struct v3_vm_info * vm = core->vm_info; -#ifdef CONFIG_INSPECTOR - v3_init_inspector_core(core); -#endif + /* * Initialize the subsystem data strutures diff --git a/palacios/src/palacios/vm_guest_mem.c b/palacios/src/palacios/vm_guest_mem.c index bf4c30e..ca6c601 100644 --- a/palacios/src/palacios/vm_guest_mem.c +++ b/palacios/src/palacios/vm_guest_mem.c @@ -73,9 +73,9 @@ int v3_gpa_to_hpa(struct guest_info * info, addr_t gpa, addr_t * hpa) { } if (reg->flags.alloced == 0) { - PrintError("In GPA->HPA: Tried to translate physical address of non allocated page (addr=%p)\n", - (void *)gpa); - v3_print_mem_map(info->vm_info); + //PrintError("In GPA->HPA: Tried to translate physical address of non allocated page (addr=%p)\n", + // (void *)gpa); + //v3_print_mem_map(info->vm_info); return -1; } @@ -133,8 +133,8 @@ int v3_gpa_to_hva(struct guest_info * guest_info, addr_t gpa, addr_t * hva) { *hva = 0; if (v3_gpa_to_hpa(guest_info, gpa, &hpa) != 0) { - PrintError("In GPA->HVA: Invalid GPA(%p)->HPA lookup\n", - (void *)gpa); + // PrintError("In GPA->HVA: Invalid GPA(%p)->HPA lookup\n", + // (void *)gpa); return -1; } diff --git a/palacios/src/palacios/vmcs.c b/palacios/src/palacios/vmcs.c index 0b874fd..f8fe322 100644 --- a/palacios/src/palacios/vmcs.c +++ b/palacios/src/palacios/vmcs.c @@ -624,6 +624,7 @@ static void print_exec_ctrls() { #ifdef __V3_32BIT__ print_vmcs_field(VMCS_IO_BITMAP_A_ADDR_HIGH); #endif + print_vmcs_field(VMCS_IO_BITMAP_B_ADDR); #ifdef __V3_32BIT__ print_vmcs_field(VMCS_IO_BITMAP_B_ADDR_HIGH); @@ -762,162 +763,29 @@ void v3_print_vmcs() { /* * Returns the field length in bytes + * It doesn't get much uglier than this... Thanks Intel */ int v3_vmcs_get_field_len(vmcs_field_t field) { - switch(field) { - /* 16 bit Control Fields */ - case VMCS_GUEST_ES_SELECTOR: - case VMCS_GUEST_CS_SELECTOR: - case VMCS_GUEST_SS_SELECTOR: - case VMCS_GUEST_DS_SELECTOR: - case VMCS_GUEST_FS_SELECTOR: - case VMCS_GUEST_GS_SELECTOR: - case VMCS_GUEST_LDTR_SELECTOR: - case VMCS_GUEST_TR_SELECTOR: - case VMCS_HOST_ES_SELECTOR: - case VMCS_HOST_CS_SELECTOR: - case VMCS_HOST_SS_SELECTOR: - case VMCS_HOST_DS_SELECTOR: - case VMCS_HOST_FS_SELECTOR: - case VMCS_HOST_GS_SELECTOR: - case VMCS_HOST_TR_SELECTOR: - return 2; - - /* 32 bit Control Fields */ - case VMCS_PIN_CTRLS: - case VMCS_PROC_CTRLS: - case VMCS_SEC_PROC_CTRLS: - case VMCS_EXCP_BITMAP: - case VMCS_PG_FAULT_ERR_MASK: - case VMCS_PG_FAULT_ERR_MATCH: - case VMCS_CR3_TGT_CNT: - case VMCS_EXIT_CTRLS: - case VMCS_EXIT_MSR_STORE_CNT: - case VMCS_EXIT_MSR_LOAD_CNT: - case VMCS_ENTRY_CTRLS: - case VMCS_ENTRY_MSR_LOAD_CNT: - case VMCS_ENTRY_INT_INFO: - case VMCS_ENTRY_EXCP_ERR: - case VMCS_ENTRY_INSTR_LEN: - case VMCS_TPR_THRESHOLD: - case VMCS_INSTR_ERR: - case VMCS_EXIT_REASON: - case VMCS_EXIT_INT_INFO: - case VMCS_EXIT_INT_ERR: - case VMCS_IDT_VECTOR_INFO: - case VMCS_IDT_VECTOR_ERR: - case VMCS_EXIT_INSTR_LEN: - case VMCS_EXIT_INSTR_INFO: - case VMCS_GUEST_ES_LIMIT: - case VMCS_GUEST_CS_LIMIT: - case VMCS_GUEST_SS_LIMIT: - case VMCS_GUEST_DS_LIMIT: - case VMCS_GUEST_FS_LIMIT: - case VMCS_GUEST_GS_LIMIT: - case VMCS_GUEST_LDTR_LIMIT: - case VMCS_GUEST_TR_LIMIT: - case VMCS_GUEST_GDTR_LIMIT: - case VMCS_GUEST_IDTR_LIMIT: - case VMCS_GUEST_ES_ACCESS: - case VMCS_GUEST_CS_ACCESS: - case VMCS_GUEST_SS_ACCESS: - case VMCS_GUEST_DS_ACCESS: - case VMCS_GUEST_FS_ACCESS: - case VMCS_GUEST_GS_ACCESS: - case VMCS_GUEST_LDTR_ACCESS: - case VMCS_GUEST_TR_ACCESS: - case VMCS_GUEST_INT_STATE: - case VMCS_GUEST_ACTIVITY_STATE: - case VMCS_GUEST_SMBASE: - case VMCS_GUEST_SYSENTER_CS: - case VMCS_HOST_SYSENTER_CS: - return 4; + struct vmcs_field_encoding * enc = (struct vmcs_field_encoding *)&field; - - /* high bits of variable width fields - * We can probably just delete most of these.... - */ - case VMCS_IO_BITMAP_A_ADDR_HIGH: - case VMCS_IO_BITMAP_B_ADDR_HIGH: - case VMCS_MSR_BITMAP_HIGH: - case VMCS_EXIT_MSR_STORE_ADDR_HIGH: - case VMCS_EXIT_MSR_LOAD_ADDR_HIGH: - case VMCS_ENTRY_MSR_LOAD_ADDR_HIGH: - case VMCS_EXEC_PTR_HIGH: - case VMCS_TSC_OFFSET_HIGH: - case VMCS_VAPIC_ADDR_HIGH: - case VMCS_APIC_ACCESS_ADDR_HIGH: - case VMCS_LINK_PTR_HIGH: - case VMCS_GUEST_DBG_CTL_HIGH: - case VMCS_GUEST_PERF_GLOBAL_CTRL_HIGH: - case VMCS_HOST_PERF_GLOBAL_CTRL_HIGH: - case VMCS_GUEST_EFER_HIGH: + switch (enc->width) { + case 0: + return 2; + case 1: { + if (enc->access_type == 1) { + return 4; + } else { +#ifdef __V3_64BIT__ + return 8; +#else + return 4; +#endif + } + } + case 2: return 4; - - /* Natural Width Control Fields */ - case VMCS_IO_BITMAP_A_ADDR: - case VMCS_IO_BITMAP_B_ADDR: - case VMCS_MSR_BITMAP: - case VMCS_EXIT_MSR_STORE_ADDR: - case VMCS_EXIT_MSR_LOAD_ADDR: - case VMCS_ENTRY_MSR_LOAD_ADDR: - case VMCS_EXEC_PTR: - case VMCS_TSC_OFFSET: - case VMCS_VAPIC_ADDR: - case VMCS_APIC_ACCESS_ADDR: - case VMCS_LINK_PTR: - case VMCS_GUEST_DBG_CTL: - case VMCS_GUEST_PERF_GLOBAL_CTRL: - case VMCS_HOST_PERF_GLOBAL_CTRL: - case VMCS_CR0_MASK: - case VMCS_CR4_MASK: - case VMCS_CR0_READ_SHDW: - case VMCS_CR4_READ_SHDW: - case VMCS_CR3_TGT_VAL_0: - case VMCS_CR3_TGT_VAL_1: - case VMCS_CR3_TGT_VAL_2: - case VMCS_CR3_TGT_VAL_3: - case VMCS_EXIT_QUAL: - case VMCS_IO_RCX: - case VMCS_IO_RSI: - case VMCS_IO_RDI: - case VMCS_IO_RIP: - case VMCS_GUEST_LINEAR_ADDR: - case VMCS_GUEST_CR0: - case VMCS_GUEST_CR3: - case VMCS_GUEST_CR4: - case VMCS_GUEST_ES_BASE: - case VMCS_GUEST_CS_BASE: - case VMCS_GUEST_SS_BASE: - case VMCS_GUEST_DS_BASE: - case VMCS_GUEST_FS_BASE: - case VMCS_GUEST_GS_BASE: - case VMCS_GUEST_LDTR_BASE: - case VMCS_GUEST_TR_BASE: - case VMCS_GUEST_GDTR_BASE: - case VMCS_GUEST_IDTR_BASE: - case VMCS_GUEST_DR7: - case VMCS_GUEST_RSP: - case VMCS_GUEST_RIP: - case VMCS_GUEST_RFLAGS: - case VMCS_GUEST_PENDING_DBG_EXCP: - case VMCS_GUEST_SYSENTER_ESP: - case VMCS_GUEST_SYSENTER_EIP: - case VMCS_HOST_CR0: - case VMCS_HOST_CR3: - case VMCS_HOST_CR4: - case VMCS_HOST_FS_BASE: - case VMCS_HOST_GS_BASE: - case VMCS_HOST_TR_BASE: - case VMCS_HOST_GDTR_BASE: - case VMCS_HOST_IDTR_BASE: - case VMCS_HOST_SYSENTER_ESP: - case VMCS_HOST_SYSENTER_EIP: - case VMCS_HOST_RSP: - case VMCS_HOST_RIP: - case VMCS_GUEST_EFER: + case 3: return sizeof(addr_t); - default: PrintError("Invalid VMCS field: 0x%x\n", field); return -1; diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c index ea24ca3..cb38aab 100644 --- a/palacios/src/palacios/vmm.c +++ b/palacios/src/palacios/vmm.c @@ -183,14 +183,13 @@ v3_cpu_arch_t v3_get_cpu_type(int cpu_id) { struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) { struct v3_vm_info * vm = v3_config_guest(cfg, priv_data); - V3_Print("CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip)); - - if (vm == NULL) { PrintError("Could not configure guest\n"); return NULL; } + V3_Print("CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip)); + if (name == NULL) { name = "[V3_VM]"; } else if (strlen(name) >= 128) { @@ -374,8 +373,6 @@ int v3_stop_vm(struct v3_vm_info * vm) { break; } - V3_Print("Yielding\n"); - v3_yield(NULL); } diff --git a/palacios/include/palacios/vmm_muxer.h b/palacios/src/palacios/vmm_barrier.c similarity index 51% rename from palacios/include/palacios/vmm_muxer.h rename to palacios/src/palacios/vmm_barrier.c index 1c50789..1115ce0 100644 --- a/palacios/include/palacios/vmm_muxer.h +++ b/palacios/src/palacios/vmm_barrier.c @@ -1,4 +1,4 @@ -/* +/* * This file is part of the Palacios Virtual Machine Monitor developed * by the V3VEE Project with funding from the United States National * Science Foundation and the Department of Energy. @@ -7,33 +7,15 @@ * and the University of New Mexico. You can find out more at * http://www.v3vee.org * - * Copyright (c) 2008, Jack Lange - * Copyright (c) 2008, The V3VEE Project + * Copyright (c) 2011, Jack Lange + * Copyright (c) 2011, The V3VEE Project * All rights reserved. * - * Author: Jack Lange + * Author: Jack Lange * * This is free software. You are permitted to use, * redistribute, and modify it as specified in the file "V3VEE_LICENSE". */ -#ifndef __VMM_MUXER_H__ -#define __VMM_MUXER_H__ -#ifdef __V3VEE__ - - -struct v3_vm_info; - - - -struct v3_vm_info * v3_get_foreground_vm(); -void v3_set_foreground_vm(struct v3_vm_info * vm); - - -int v3_add_mux_notification(int (*focus_change)(struct v3_vm_info * old_vm, struct v3_vm_info * new_vm)); - - -#endif - -#endif +#include diff --git a/palacios/src/palacios/vmm_config.c b/palacios/src/palacios/vmm_config.c index b1d747e..25d8b23 100644 --- a/palacios/src/palacios/vmm_config.c +++ b/palacios/src/palacios/vmm_config.c @@ -410,6 +410,11 @@ static int post_config_core(struct guest_info * info, v3_cfg_tree_t * cfg) { info->core_run_state = CORE_STOPPED; + if (v3_init_core_extensions(info) == -1) { + PrintError("Error intializing extension core states\n"); + return -1; + } + if (info->vm_info->vm_class == V3_PC_VM) { if (post_config_pc_core(info, cfg) == -1) { PrintError("PC Post configuration failure\n"); @@ -552,6 +557,7 @@ int v3_free_config(struct v3_vm_info * vm) { + static int setup_memory_map(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { v3_cfg_tree_t * mem_region = v3_cfg_subtree(v3_cfg_subtree(cfg, "memmap"), "region"); diff --git a/palacios/src/palacios/vmm_config_class.h b/palacios/src/palacios/vmm_config_class.h index 9fcf197..0c7ef4c 100644 --- a/palacios/src/palacios/vmm_config_class.h +++ b/palacios/src/palacios/vmm_config_class.h @@ -77,13 +77,21 @@ static int post_config_pc(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { } - if (vm->num_cores > 1) { + if (vm->num_cores>1 && !v3_find_dev(vm,"apic")) { + PrintError("palacios: VM has more than one core, but no device named \"apic\"!\n"); + return -1; + } + + if (v3_find_dev(vm,"apic")) { + if (!v3_find_dev(vm,"ioapic")) { + PrintError("palacios: VM cores have apics, but there is no device named \"ioapic\"!\n"); + } if (v3_inject_mptable(vm) == -1) { PrintError("Failed to inject mptable during configuration\n"); return -1; } } - + return 0; } diff --git a/palacios/src/palacios/vmm_cpuid.c b/palacios/src/palacios/vmm_cpuid.c index a093bf7..ce7c244 100644 --- a/palacios/src/palacios/vmm_cpuid.c +++ b/palacios/src/palacios/vmm_cpuid.c @@ -7,11 +7,10 @@ * and the University of New Mexico. You can find out more at * http://www.v3vee.org * - * Copyright (c) 2008, Jack Lange - * Copyright (c) 2008, The V3VEE Project + * Copyright (c) 2011, Jack Lange * All rights reserved. * - * Author: Jack Lange + * Author: Jack Lange * * This is free software. You are permitted to use, * redistribute, and modify it as specified in the file "V3VEE_LICENSE". @@ -22,11 +21,33 @@ #include #include +struct masked_cpuid { + uint32_t rax_mask; + uint32_t rbx_mask; + uint32_t rcx_mask; + uint32_t rdx_mask; + + uint32_t rax; + uint32_t rbx; + uint32_t rcx; + uint32_t rdx; +}; + void v3_init_cpuid_map(struct v3_vm_info * vm) { vm->cpuid_map.map.rb_node = NULL; + + // Setup default cpuid entries + + + // Disable XSAVE (cpuid 0x01, ECX bit 26) + v3_cpuid_add_fields(vm, 0x01, 0, 0, 0, 0, (1 << 26), 0, 0, 0); + } + + + int v3_deinit_cpuid_map(struct v3_vm_info * vm) { struct rb_node * node = v3_rb_first(&(vm->cpuid_map.map)); struct v3_cpuid_hook * hook = NULL; @@ -104,6 +125,105 @@ static struct v3_cpuid_hook * get_cpuid_hook(struct v3_vm_info * vm, uint32_t cp } + +static int mask_hook(struct guest_info * core, uint32_t cpuid, + uint32_t * eax, uint32_t * ebx, + uint32_t * ecx, uint32_t * edx, + void * priv_data) { + struct masked_cpuid * mask = (struct masked_cpuid *)priv_data; + + v3_cpuid(cpuid, eax, ebx, ecx, edx); + + *eax &= ~(mask->rax_mask); + *eax |= mask->rax; + + *ebx &= ~(mask->rbx_mask); + *ebx |= mask->rbx; + + *ecx &= ~(mask->rcx_mask); + *ecx |= mask->rcx; + + *edx &= ~(mask->rdx_mask); + *edx |= mask->rdx; + + return 0; +} + +int v3_cpuid_add_fields(struct v3_vm_info * vm, uint32_t cpuid, + uint32_t rax_mask, uint32_t rax, + uint32_t rbx_mask, uint32_t rbx, + uint32_t rcx_mask, uint32_t rcx, + uint32_t rdx_mask, uint32_t rdx) { + struct v3_cpuid_hook * hook = get_cpuid_hook(vm, cpuid); + + if (hook == NULL) { + struct masked_cpuid * mask = V3_Malloc(sizeof(struct masked_cpuid)); + memset(mask, 0, sizeof(struct masked_cpuid)); + + mask->rax_mask = rax_mask; + mask->rax = rax; + mask->rbx_mask = rbx_mask; + mask->rbx = rbx; + mask->rcx_mask = rcx_mask; + mask->rcx = rcx; + mask->rdx_mask = rdx_mask; + mask->rdx = rdx; + + if (v3_hook_cpuid(vm, cpuid, mask_hook, mask) == -1) { + PrintError("Error hooking cpuid %d\n", cpuid); + return -1; + } + } else { + struct masked_cpuid * mask = NULL; + uint32_t tmp_val = 0; + + if (hook->hook_fn != mask_hook) { + PrintError("trying to add fields to a fully hooked cpuid (%d)\n", cpuid); + return -1; + } + + mask = (struct masked_cpuid *)(hook->private_data); + + if ((mask->rax_mask & rax_mask) || + (mask->rbx_mask & rbx_mask) || + (mask->rcx_mask & rcx_mask) || + (mask->rdx_mask & rdx_mask)) { + PrintError("Trying to add fields that have already been masked\n"); + return -1; + } + + if ((~rax_mask & rax) || (~rbx_mask & rbx) || + (~rcx_mask & rcx) || (~rdx_mask & rdx)) { + PrintError("Invalid cpuid reg value (mask overrun)\n"); + return -1; + } + + mask->rax_mask |= rax_mask; + mask->rbx_mask |= rbx_mask; + mask->rcx_mask |= rcx_mask; + mask->rdx_mask |= rdx_mask; + + mask->rax |= rax; + tmp_val = (~rax_mask | rax); + mask->rax &= tmp_val; + + mask->rbx |= rbx; + tmp_val = (~rbx_mask | rbx); + mask->rbx &= tmp_val; + + mask->rcx |= rcx; + tmp_val = (~rcx_mask | rcx); + mask->rcx &= tmp_val; + + mask->rdx |= rdx; + tmp_val = (~rdx_mask | rdx); + mask->rdx &= tmp_val; + + } + + return 0; +} + int v3_unhook_cpuid(struct v3_vm_info * vm, uint32_t cpuid) { struct v3_cpuid_hook * hook = get_cpuid_hook(vm, cpuid); @@ -185,3 +305,8 @@ int v3_handle_cpuid(struct guest_info * info) { return 0; } + + + + + diff --git a/palacios/src/palacios/vmm_emulator.c b/palacios/src/palacios/vmm_emulator.c index 15a56d6..c05e09d 100644 --- a/palacios/src/palacios/vmm_emulator.c +++ b/palacios/src/palacios/vmm_emulator.c @@ -314,7 +314,7 @@ static int run_str_op(struct guest_info * core, struct x86_instr * instr, struct rflags * flags_reg = (struct rflags *)&(core->ctrl_regs.rflags); - PrintError("Emulation_len=%d, tmp_rcx=%d\n", emulation_length, (uint_t)tmp_rcx); + PrintDebug("Emulation_len=%d, tmp_rcx=%d\n", emulation_length, (uint_t)tmp_rcx); if (instr->op_type == V3_OP_MOVS) { diff --git a/palacios/src/palacios/vmm_extensions.c b/palacios/src/palacios/vmm_extensions.c index de1cfa0..1db9dc6 100644 --- a/palacios/src/palacios/vmm_extensions.c +++ b/palacios/src/palacios/vmm_extensions.c @@ -69,6 +69,8 @@ int V3_init_extensions() { } + + int V3_deinit_extensions() { v3_free_htable(ext_table, 0, 0); return 0; @@ -85,6 +87,15 @@ int v3_init_ext_manager(struct v3_vm_info * vm) { return 0; } + +int v3_deinit_ext_manager(struct v3_vm_info * vm) { + + PrintError("I should really do something here... \n"); + return -1; +} + + + int v3_add_extension(struct v3_vm_info * vm, const char * name, v3_cfg_tree_t * cfg) { struct v3_extension_impl * impl = NULL; struct v3_extension * ext = NULL; @@ -125,3 +136,34 @@ int v3_add_extension(struct v3_vm_info * vm, const char * name, v3_cfg_tree_t * return 0; } + +int v3_init_core_extensions(struct guest_info * core) { + struct v3_extension * ext = NULL; + + list_for_each_entry(ext, &(core->vm_info->extensions.extensions), node) { + if ((ext->impl) && (ext->impl->core_init)) { + if (ext->impl->core_init(core, ext->priv_data) == -1) { + PrintError("Error configuring per core extension %s on core %d\n", + ext->impl->name, core->cpu_id); + return -1; + } + } + } + + return 0; +} + + + + +void * v3_get_extension_state(struct v3_vm_info * vm, const char * name) { + struct v3_extension * ext = NULL; + + list_for_each_entry(ext, &(vm->extensions.extensions), node) { + if (strncmp(ext->impl->name, name, strlen(ext->impl->name)) == 0) { + return ext->priv_data; + } + } + + return NULL; +} diff --git a/palacios/src/palacios/vmm_host_events.c b/palacios/src/palacios/vmm_host_events.c index 432b9fb..3e6d09b 100644 --- a/palacios/src/palacios/vmm_host_events.c +++ b/palacios/src/palacios/vmm_host_events.c @@ -20,7 +20,6 @@ #include #include #include -#include int v3_init_host_events(struct v3_vm_info * vm) { struct v3_host_events * host_evts = &(vm->host_event_hooks); @@ -125,9 +124,6 @@ int v3_deliver_keyboard_event(struct v3_vm_info * vm, struct v3_host_events * host_evts = NULL; struct v3_host_event_hook * hook = NULL; - if (vm == NULL) { - vm = v3_get_foreground_vm(); - } host_evts = &(vm->host_event_hooks); @@ -150,9 +146,6 @@ int v3_deliver_mouse_event(struct v3_vm_info * vm, struct v3_host_events * host_evts = NULL; struct v3_host_event_hook * hook = NULL; - if (vm == NULL) { - vm = v3_get_foreground_vm(); - } host_evts = &(vm->host_event_hooks); @@ -175,9 +168,6 @@ int v3_deliver_timer_event(struct v3_vm_info * vm, struct v3_host_events * host_evts = NULL; struct v3_host_event_hook * hook = NULL; - if (vm == NULL) { - vm = v3_get_foreground_vm(); - } host_evts = &(vm->host_event_hooks); @@ -199,9 +189,6 @@ int v3_deliver_serial_event(struct v3_vm_info * vm, struct v3_host_events * host_evts = NULL; struct v3_host_event_hook * hook = NULL; - if (vm == NULL) { - vm = v3_get_foreground_vm(); - } host_evts = &(vm->host_event_hooks); @@ -225,9 +212,6 @@ int v3_deliver_console_event(struct v3_vm_info * vm, struct v3_host_events * host_evts = NULL; struct v3_host_event_hook * hook = NULL; - if (vm == NULL) { - vm = v3_get_foreground_vm(); - } host_evts = &(vm->host_event_hooks); @@ -250,9 +234,6 @@ int v3_deliver_packet_event(struct v3_vm_info * vm, struct v3_host_events * host_evts = NULL; struct v3_host_event_hook * hook = NULL; - if (vm == NULL) { - vm = v3_get_foreground_vm(); - } host_evts = &(vm->host_event_hooks); diff --git a/palacios/src/palacios/vmm_muxer.c b/palacios/src/palacios/vmm_muxer.c deleted file mode 100644 index 01e8169..0000000 --- a/palacios/src/palacios/vmm_muxer.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * This file is part of the Palacios Virtual Machine Monitor developed - * by the V3VEE Project with funding from the United States National - * Science Foundation and the Department of Energy. - * - * The V3VEE Project is a joint project between Northwestern University - * and the University of New Mexico. You can find out more at - * http://www.v3vee.org - * - * Copyright (c) 2008, Jack Lange - * Copyright (c) 2008, The V3VEE Project - * All rights reserved. - * - * Author: Jack Lange - * - * This is free software. You are permitted to use, - * redistribute, and modify it as specified in the file "V3VEE_LICENSE". - */ - -#include -#include -#include - - - -static struct v3_vm_info * foreground_vm = NULL; - -// list of notification callbacks -static LIST_HEAD(cb_list); - - -struct mux_callback { - struct list_head cb_node; - - int (*focus_change)(struct v3_vm_info * old_vm, struct v3_vm_info * new_vm); -}; - - -struct v3_vm_info * v3_get_foreground_vm() { - return foreground_vm; -} - - -void v3_set_foreground_vm(struct v3_vm_info * vm) { - struct mux_callback * tmp_cb; - - list_for_each_entry(tmp_cb, &(cb_list), cb_node) { - tmp_cb->focus_change(foreground_vm, vm); - } - - foreground_vm = vm; -} - - -int v3_add_mux_notification(int (*focus_change)(struct v3_vm_info * old_vm, - struct v3_vm_info * new_vm)) { - - struct mux_callback * cb = (struct mux_callback *)V3_Malloc(sizeof(struct mux_callback)); - - cb->focus_change = focus_change; - - list_add(&(cb->cb_node), &cb_list); - - return 0; -} diff --git a/palacios/src/palacios/vmm_queue.c b/palacios/src/palacios/vmm_queue.c index b06ff73..03cfb6d 100644 --- a/palacios/src/palacios/vmm_queue.c +++ b/palacios/src/palacios/vmm_queue.c @@ -19,20 +19,20 @@ #include -void v3_init_queue(struct gen_queue * queue) { +void v3_init_queue(struct v3_queue * queue) { queue->num_entries = 0; INIT_LIST_HEAD(&(queue->entries)); v3_lock_init(&queue->lock); } -struct gen_queue * v3_create_queue() { - struct gen_queue * tmp_queue = V3_Malloc(sizeof(struct gen_queue)); +struct v3_queue * v3_create_queue() { + struct v3_queue * tmp_queue = V3_Malloc(sizeof(struct v3_queue)); v3_init_queue(tmp_queue); return tmp_queue; } -void v3_enqueue(struct gen_queue * queue, addr_t entry) { - struct queue_entry * q_entry = V3_Malloc(sizeof(struct queue_entry)); +void v3_enqueue(struct v3_queue * queue, addr_t entry) { + struct v3_queue_entry * q_entry = V3_Malloc(sizeof(struct v3_queue_entry)); v3_lock(queue->lock); q_entry->entry = entry; @@ -42,13 +42,13 @@ void v3_enqueue(struct gen_queue * queue, addr_t entry) { } -addr_t v3_dequeue(struct gen_queue * queue) { +addr_t v3_dequeue(struct v3_queue * queue) { addr_t entry_val = 0; v3_lock(queue->lock); if (!list_empty(&(queue->entries))) { struct list_head * q_entry = queue->entries.next; - struct queue_entry * tmp_entry = list_entry(q_entry, struct queue_entry, entry_list); + struct v3_queue_entry * tmp_entry = list_entry(q_entry, struct v3_queue_entry, entry_list); entry_val = tmp_entry->entry; list_del(q_entry); diff --git a/palacios/src/palacios/vmm_vnet_core.c b/palacios/src/palacios/vmm_vnet_core.c index e0e0ac7..4b54d71 100644 --- a/palacios/src/palacios/vmm_vnet_core.c +++ b/palacios/src/palacios/vmm_vnet_core.c @@ -31,6 +31,8 @@ #define PrintDebug(fmt, args...) #endif +int v3_net_debug = 0; + struct eth_hdr { uint8_t dst_mac[ETH_ALEN]; uint8_t src_mac[ETH_ALEN]; @@ -45,11 +47,6 @@ struct vnet_dev { struct v3_vnet_dev_ops dev_ops; void * private_data; - int active; - - uint64_t bytes_tx, bytes_rx; - uint32_t pkts_tx, pkt_rx; - struct list_head node; } __attribute__((packed)); @@ -60,7 +57,6 @@ struct vnet_brg_dev { uint8_t type; - int active; void * private_data; } __attribute__((packed)); @@ -85,6 +81,20 @@ struct route_list { } __attribute__((packed)); +struct queue_entry{ + uint8_t use; + struct v3_vnet_pkt pkt; + uint8_t data[ETHERNET_PACKET_LEN]; +}; + +#define VNET_QUEUE_SIZE 10240 +struct vnet_queue { + struct queue_entry buf[VNET_QUEUE_SIZE]; + int head, tail; + int count; + v3_lock_t lock; +}; + static struct { struct list_head routes; struct list_head devs; @@ -97,10 +107,13 @@ static struct { v3_lock_t lock; struct vnet_stat stats; - struct hashtable * route_cache; -} vnet_state; + void * pkt_flush_thread; + struct vnet_queue pkt_q; + struct hashtable * route_cache; +} vnet_state; + #ifdef CONFIG_DEBUG_VNET static inline void mac_to_string(uint8_t * mac, char * buf) { @@ -182,7 +195,8 @@ static int clear_hash_cache() { return 0; } -static int look_into_cache(const struct v3_vnet_pkt * pkt, struct route_list ** routes) { +static int look_into_cache(const struct v3_vnet_pkt * pkt, + struct route_list ** routes) { *routes = (struct route_list *)v3_htable_search(vnet_state.route_cache, (addr_t)(pkt->hash_buf)); return 0; @@ -306,8 +320,8 @@ static struct route_list * match_route(const struct v3_vnet_pkt * pkt) { int max_rank = 0; struct list_head match_list; struct eth_hdr * hdr = (struct eth_hdr *)(pkt->data); -// uint8_t src_type = pkt->src_type; - // uint32_t src_link = pkt->src_id; + // uint8_t src_type = pkt->src_type; + // uint32_t src_link = pkt->src_id; #ifdef CONFIG_DEBUG_VNET { @@ -425,19 +439,18 @@ static struct route_list * match_route(const struct v3_vnet_pkt * pkt) { } -int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) { +int vnet_tx_one_pkt(struct v3_vnet_pkt * pkt, void * private_data) { struct route_list * matched_routes = NULL; unsigned long flags; int i; -#ifdef CONFIG_DEBUG_VNET - { - int cpu = V3_Get_CPU(); - PrintDebug("VNET/P Core: cpu %d: pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n", + int cpu = V3_Get_CPU(); + V3_Net_Print(2, "VNET/P Core: cpu %d: pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n", cpu, pkt->size, pkt->src_id, pkt->src_type, pkt->dst_id, pkt->dst_type); - } -#endif + if(v3_net_debug >= 4){ + v3_hexdump(pkt->data, pkt->size, NULL, 0); + } flags = v3_lock_irqsave(vnet_state.lock); @@ -466,30 +479,30 @@ int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) { for (i = 0; i < matched_routes->num_routes; i++) { struct vnet_route_info * route = matched_routes->routes[i]; - if (route->route_def.dst_type == LINK_EDGE) { - struct vnet_brg_dev *bridge = vnet_state.bridge; - pkt->dst_type = LINK_EDGE; - pkt->dst_id = route->route_def.dst_id; + if (route->route_def.dst_type == LINK_EDGE) { + struct vnet_brg_dev * bridge = vnet_state.bridge; + pkt->dst_type = LINK_EDGE; + pkt->dst_id = route->route_def.dst_id; - if (bridge == NULL || (bridge->active == 0)) { - PrintDebug("VNET/P Core: No active bridge to sent data to\n"); + if (bridge == NULL) { + V3_Net_Print(2, "VNET/P Core: No active bridge to sent data to\n"); continue; } if(bridge->brg_ops.input(bridge->vm, pkt, bridge->private_data) < 0){ - PrintDebug("VNET/P Core: Packet not sent properly to bridge\n"); + V3_Net_Print(2, "VNET/P Core: Packet not sent properly to bridge\n"); continue; } vnet_state.stats.tx_bytes += pkt->size; vnet_state.stats.tx_pkts ++; } else if (route->route_def.dst_type == LINK_INTERFACE) { - if (route->dst_dev == NULL || route->dst_dev->active == 0){ - PrintDebug("VNET/P Core: No active device to sent data to\n"); + if (route->dst_dev == NULL){ + V3_Net_Print(2, "VNET/P Core: No active device to sent data to\n"); continue; } if(route->dst_dev->dev_ops.input(route->dst_dev->vm, pkt, route->dst_dev->private_data) < 0) { - PrintDebug("VNET/P Core: Packet not sent properly\n"); + V3_Net_Print(2, "VNET/P Core: Packet not sent properly\n"); continue; } vnet_state.stats.tx_bytes += pkt->size; @@ -502,6 +515,50 @@ int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) { return 0; } + +static int vnet_pkt_enqueue(struct v3_vnet_pkt * pkt){ + unsigned long flags; + struct queue_entry * entry; + struct vnet_queue * q = &(vnet_state.pkt_q); + + flags = v3_lock_irqsave(q->lock); + + if (q->count >= VNET_QUEUE_SIZE){ + V3_Net_Print(1, "VNET Queue overflow!\n"); + v3_unlock_irqrestore(q->lock, flags); + return -1; + } + + q->count ++; + entry = &(q->buf[q->tail++]); + q->tail %= VNET_QUEUE_SIZE; + + v3_unlock_irqrestore(q->lock, flags); + + /* this is ugly, but should happen very unlikely */ + while(entry->use); + + entry->pkt.data = entry->data; + memcpy(&(entry->pkt), pkt, sizeof(struct v3_vnet_pkt)); + memcpy(entry->data, pkt->data, pkt->size); + + entry->use = 1; + + return 0; +} + + +int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data, int synchronize) { + if(synchronize){ + vnet_tx_one_pkt(pkt, NULL); + }else { + vnet_pkt_enqueue(pkt); + V3_Net_Print(2, "VNET/P Core: Put pkt into Queue: pkt size %d\n", pkt->size); + } + + return 0; +} + int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac, struct v3_vnet_dev_ops *ops, void * priv_data){ @@ -517,11 +574,9 @@ int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac, memcpy(new_dev->mac_addr, mac, 6); new_dev->dev_ops.input = ops->input; - new_dev->dev_ops.poll = ops->poll; new_dev->private_data = priv_data; new_dev->vm = vm; new_dev->dev_id = 0; - new_dev->active = 1; flags = v3_lock_irqsave(vnet_state.lock); @@ -544,7 +599,6 @@ int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac, } - int v3_vnet_del_dev(int dev_id){ struct vnet_dev * dev = NULL; unsigned long flags; @@ -566,6 +620,7 @@ int v3_vnet_del_dev(int dev_id){ return 0; } + int v3_vnet_stat(struct vnet_stat * stats){ stats->rx_bytes = vnet_state.stats.rx_bytes; @@ -604,12 +659,10 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm, struct vnet_brg_dev * tmp_bridge = NULL; flags = v3_lock_irqsave(vnet_state.lock); - if (vnet_state.bridge == NULL) { bridge_free = 1; vnet_state.bridge = (void *)1; } - v3_unlock_irqrestore(vnet_state.lock, flags); if (bridge_free == 0) { @@ -629,7 +682,6 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm, tmp_bridge->brg_ops.input = ops->input; tmp_bridge->brg_ops.poll = ops->poll; tmp_bridge->private_data = priv_data; - tmp_bridge->active = 1; tmp_bridge->type = type; /* make this atomic to avoid possible race conditions */ @@ -641,20 +693,39 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm, } -void v3_vnet_do_poll(struct v3_vm_info * vm){ - struct vnet_dev * dev = NULL; +static int vnet_tx_flush(void *args){ + unsigned long flags; + struct queue_entry * entry; + struct vnet_queue * q = &(vnet_state.pkt_q); - /* TODO: run this on separate threads - * round-robin schedule, with maximal budget for each poll - */ - list_for_each_entry(dev, &(vnet_state.devs), node) { - if(dev->dev_ops.poll != NULL){ - dev->dev_ops.poll(vm, -1, dev->private_data); - } + V3_Print("VNET/P Handing Pkt Thread Starting ....\n"); + + //V3_THREAD_SLEEP(); + /* we need thread sleep/wakeup in Palacios */ + while(1){ + flags = v3_lock_irqsave(q->lock); + + if (q->count <= 0){ + v3_unlock_irqrestore(q->lock, flags); + v3_yield(NULL); + //V3_THREAD_SLEEP(); + }else { + q->count --; + entry = &(q->buf[q->head++]); + q->head %= VNET_QUEUE_SIZE; + + v3_unlock_irqrestore(q->lock, flags); + + /* this is ugly, but should happen very unlikely */ + while(!entry->use); + vnet_tx_one_pkt(&(entry->pkt), NULL); + entry->use = 0; + + V3_Net_Print(2, "vnet_tx_flush: pkt (size %d)\n", entry->pkt.size); + } } } - int v3_init_vnet() { memset(&vnet_state, 0, sizeof(vnet_state)); @@ -669,12 +740,15 @@ int v3_init_vnet() { } vnet_state.route_cache = v3_create_htable(0, &hash_fn, &hash_eq); - if (vnet_state.route_cache == NULL) { PrintError("VNET/P Core: Fails to initiate route cache\n"); return -1; } + v3_lock_init(&(vnet_state.pkt_q.lock)); + + vnet_state.pkt_flush_thread = V3_CREATE_THREAD(vnet_tx_flush, NULL, "VNET_Pkts"); + PrintDebug("VNET/P Core is initiated\n"); return 0; diff --git a/palacios/src/palacios/vmm_xed.c b/palacios/src/palacios/vmm_xed.c index 9f3d7ac..80fbfde 100644 --- a/palacios/src/palacios/vmm_xed.c +++ b/palacios/src/palacios/vmm_xed.c @@ -465,7 +465,7 @@ int v3_decode(struct guest_info * info, addr_t instr_ptr, struct x86_instr * ins } } - V3_Print("Operand 0 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op))); +// V3_Print("Operand 0 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op))); if (xed_operand_read(op)) { @@ -555,7 +555,7 @@ int v3_decode(struct guest_info * info, addr_t instr_ptr, struct x86_instr * ins } } - V3_Print("Operand 1 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op))); +// V3_Print("Operand 1 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op))); if (xed_operand_read(op)) { v3_op->read = 1; diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c index eb79fa3..4326788 100644 --- a/palacios/src/palacios/vmx.c +++ b/palacios/src/palacios/vmx.c @@ -34,6 +34,7 @@ #include #include +#include #ifndef CONFIG_DEBUG_VMX #undef PrintDebug @@ -41,8 +42,12 @@ #endif -static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0}; +/* These fields contain the hardware feature sets supported by the local CPU */ +static struct vmx_hw_info hw_info; + + static addr_t active_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0}; +static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0}; extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs); extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs); @@ -50,7 +55,7 @@ extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, str static int inline check_vmcs_write(vmcs_field_t field, addr_t val) { int ret = 0; - ret = vmcs_write(field,val); + ret = vmcs_write(field, val); if (ret != VMX_SUCCESS) { PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret); @@ -76,7 +81,6 @@ static int inline check_vmcs_read(vmcs_field_t field, void * val) { static addr_t allocate_vmcs() { - reg_ex_t msr; struct vmcs_data * vmcs_page = NULL; PrintDebug("Allocating page\n"); @@ -84,10 +88,8 @@ static addr_t allocate_vmcs() { vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1)); memset(vmcs_page, 0, 4096); - v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low)); - - vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision; - PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision); + vmcs_page->revision = hw_info.basic_info.revision; + PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision); return (addr_t)V3_PAddr((void *)vmcs_page); } @@ -388,7 +390,7 @@ static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state) // reenable global interrupts for vm state initialization now // that the vm state is initialized. If another VM kicks us off, // it'll update our vmx state so that we know to reload ourself - v3_disable_ints(); + v3_enable_ints(); return 0; } @@ -641,6 +643,13 @@ int v3_vmx_enter(struct guest_info * info) { // disable global interrupts for vm state transition v3_disable_ints(); + + if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) { + vmcs_load(vmx_info->vmcs_ptr_phys); + active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys; + } + + v3_vmx_restore_vmcs(info); @@ -666,10 +675,6 @@ int v3_vmx_enter(struct guest_info * info) { check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high); check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low); - if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) { - vmcs_load(vmx_info->vmcs_ptr_phys); - active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys; - } if (vmx_info->state == VMX_UNLAUNCHED) { vmx_info->state = VMX_LAUNCHED; @@ -726,10 +731,15 @@ int v3_vmx_enter(struct guest_info * info) { update_irq_exit_state(info); #endif - // Handle any exits needed still in the atomic section - if (v3_handle_atomic_vmx_exit(info, &exit_info) == -1) { - PrintError("Error in atomic VMX exit handler\n"); - return -1; + if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) { + // This is a special case whose only job is to inject an interrupt + vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value)); + vmx_info->pri_proc_ctrls.int_wndw_exit = 0; + vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value); + +#ifdef CONFIG_DEBUG_INTERRUPTS + PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip); +#endif } // reenable global interrupts after vm exit @@ -807,6 +817,12 @@ int v3_start_vmx_guest(struct guest_info * info) { } + + +#define VMX_FEATURE_CONTROL_MSR 0x0000003a +#define CPUID_VMX_FEATURES 0x00000005 /* LOCK and VMXON */ +#define CPUID_1_ECX_VTXFLAG 0x00000020 + int v3_is_vmx_capable() { v3_msr_t feature_msr; uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; @@ -820,7 +836,7 @@ int v3_is_vmx_capable() { PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo); - if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) { + if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) { PrintDebug("VMX is locked -- enable in the BIOS\n"); return 0; } @@ -833,82 +849,23 @@ int v3_is_vmx_capable() { return 1; } -static int has_vmx_nested_paging() { - return 0; -} -void v3_init_vmx_cpu(int cpu_id) { - extern v3_cpu_arch_t v3_cpu_types[]; - struct v3_msr tmp_msr; - uint64_t ret = 0; - v3_get_msr(VMX_CR4_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo)); -#ifdef __V3_64BIT__ - __asm__ __volatile__ ( - "movq %%cr4, %%rbx;" - "orq $0x00002000, %%rbx;" - "movq %%rbx, %0;" - : "=m"(ret) - : - : "%rbx" - ); - - if ((~ret & tmp_msr.value) == 0) { - __asm__ __volatile__ ( - "movq %0, %%cr4;" - : - : "q"(ret) - ); - } else { - PrintError("Invalid CR4 Settings!\n"); - return; - } +void v3_init_vmx_cpu(int cpu_id) { + extern v3_cpu_arch_t v3_cpu_types[]; - __asm__ __volatile__ ( - "movq %%cr0, %%rbx; " - "orq $0x00000020,%%rbx; " - "movq %%rbx, %%cr0;" - : - : - : "%rbx" - ); -#elif __V3_32BIT__ - __asm__ __volatile__ ( - "movl %%cr4, %%ecx;" - "orl $0x00002000, %%ecx;" - "movl %%ecx, %0;" - : "=m"(ret) - : - : "%ecx" - ); - - if ((~ret & tmp_msr.value) == 0) { - __asm__ __volatile__ ( - "movl %0, %%cr4;" - : - : "q"(ret) - ); - } else { - PrintError("Invalid CR4 Settings!\n"); - return; + if (cpu_id == 0) { + if (v3_init_vmx_hw(&hw_info) == -1) { + PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id); + return; + } } - __asm__ __volatile__ ( - "movl %%cr0, %%ecx; " - "orl $0x00000020,%%ecx; " - "movl %%ecx, %%cr0;" - : - : - : "%ecx" - ); -#endif - - // - // Should check and return Error here.... + enable_vmx(); // Setup VMXON Region @@ -916,7 +873,7 @@ void v3_init_vmx_cpu(int cpu_id) { PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]); - if (v3_enable_vmx(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) { + if (vmx_on(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) { PrintDebug("VMX Enabled\n"); } else { PrintError("VMX initialization failure\n"); @@ -924,11 +881,8 @@ void v3_init_vmx_cpu(int cpu_id) { } - if (has_vmx_nested_paging() == 1) { - v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU; - } else { - v3_cpu_types[cpu_id] = V3_VMX_CPU; - } + v3_cpu_types[cpu_id] = V3_VMX_CPU; + } diff --git a/palacios/src/palacios/vmx_ept.c b/palacios/src/palacios/vmx_ept.c new file mode 100644 index 0000000..42ca942 --- /dev/null +++ b/palacios/src/palacios/vmx_ept.c @@ -0,0 +1,19 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2011, Jack Lange + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + + diff --git a/palacios/src/palacios/vmx_handler.c b/palacios/src/palacios/vmx_handler.c index 4066bf2..e5da762 100644 --- a/palacios/src/palacios/vmx_handler.c +++ b/palacios/src/palacios/vmx_handler.c @@ -42,25 +42,6 @@ #endif /* At this point the GPRs are already copied into the guest_info state */ -int v3_handle_atomic_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_info) { - struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data); - - switch (exit_info->exit_reason) { - case VMEXIT_INTR_WINDOW: - // This is here because we touch the VMCS - vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value)); - vmx_info->pri_proc_ctrls.int_wndw_exit = 0; - vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value); - -#ifdef CONFIG_DEBUG_INTERRUPTS - PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip); -#endif - break; - } - return 0; -} - -/* At this point the GPRs are already copied into the guest_info state */ int v3_handle_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_info) { /* PrintError("Handling VMEXIT: %s (%u), %lu (0x%lx)\n", diff --git a/palacios/src/palacios/vmx_hw_info.c b/palacios/src/palacios/vmx_hw_info.c new file mode 100644 index 0000000..3220e52 --- /dev/null +++ b/palacios/src/palacios/vmx_hw_info.c @@ -0,0 +1,113 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2011, Jack Lange + * Copyright (c) 2011, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#include +#include +#include +#include + +// Intel VMX Feature MSRs + + + +static int get_ex_ctrl_caps(struct vmx_hw_info * hw_info, struct vmx_ctrl_field * field, + uint32_t old_msr, uint32_t true_msr) { + uint32_t old_0; /* Bit is 1 => MB1 */ + uint32_t old_1; /* Bit is 0 => MBZ */ + uint32_t true_0; /* Bit is 1 => MB1 */ + uint32_t true_1; /* Bit is 0 => MBZ */ + + v3_get_msr(old_msr, &old_1, &old_0); + field->def_val = old_0; + + if (hw_info->basic_info.def1_maybe_0) { + v3_get_msr(true_msr, &true_1, &true_0); + } else { + true_0 = old_0; + true_1 = old_1; + } + + field->req_val = true_0; + field->req_mask = ~(true_1 ^ true_0); + + return 0; +} + + +static int get_ctrl_caps(struct vmx_ctrl_field * field, uint32_t msr) { + uint32_t mbz = 0; /* Bit is 0 => MBZ */ + uint32_t mb1 = 0; /* Bit is 1 => MB1 */ + + v3_get_msr(msr, &mbz, &mb1); + + field->def_val = mb1; + field->req_val = mb1; + field->req_mask = ~(mbz ^ mb1); + + return 0; +} + + + +static int get_cr_fields(struct vmx_cr_field * field, uint32_t fixed_1_msr, uint32_t fixed_0_msr) { + struct v3_msr mbz; /* Bit is 0 => MBZ */ + struct v3_msr mb1; /* Bit is 0 => MBZ */ + + v3_get_msr(fixed_1_msr, &(mbz.hi), &(mbz.lo)); + v3_get_msr(fixed_0_msr, &(mb1.hi), &(mb1.lo)); + + field->def_val = mb1.value; + field->req_val = mb1.value; + field->req_mask = ~(mbz.value ^ mb1.value); + + return 0; +} + + + + + +int v3_init_vmx_hw(struct vmx_hw_info * hw_info) { + // extern v3_cpu_arch_t v3_cpu_types[]; + + memset(hw_info, 0, sizeof(struct vmx_hw_info)); + + v3_get_msr(VMX_BASIC_MSR, &(hw_info->basic_info.hi), &(hw_info->basic_info.lo)); + v3_get_msr(VMX_MISC_MSR, &(hw_info->misc_info.hi), &(hw_info->misc_info.lo)); + v3_get_msr(VMX_EPT_VPID_CAP_MSR, &(hw_info->ept_info.hi), &(hw_info->ept_info.lo)); + + PrintError("BASIC_MSR: Lo: %x, Hi: %x\n", hw_info->basic_info.lo, hw_info->basic_info.hi); + + get_ex_ctrl_caps(hw_info, &(hw_info->pin_ctrls), VMX_PINBASED_CTLS_MSR, VMX_TRUE_PINBASED_CTLS_MSR); + get_ex_ctrl_caps(hw_info, &(hw_info->proc_ctrls), VMX_PROCBASED_CTLS_MSR, VMX_TRUE_PROCBASED_CTLS_MSR); + get_ex_ctrl_caps(hw_info, &(hw_info->exit_ctrls), VMX_EXIT_CTLS_MSR, VMX_TRUE_EXIT_CTLS_MSR); + get_ex_ctrl_caps(hw_info, &(hw_info->entry_ctrls), VMX_ENTRY_CTLS_MSR, VMX_TRUE_ENTRY_CTLS_MSR); + + /* Get secondary PROCBASED controls if secondary controls are available (optional or required) */ + /* Intel Manual 3B. Sect. G.3.3 */ + if ( ((hw_info->proc_ctrls.req_mask & 0x80000000) == 0) || + ((hw_info->proc_ctrls.req_val & 0x80000000) == 1) ) { + get_ctrl_caps(&(hw_info->proc_ctrls_2), VMX_PROCBASED_CTLS2_MSR); + } + + get_cr_fields(&(hw_info->cr0), VMX_CR0_FIXED1_MSR, VMX_CR0_FIXED0_MSR); + get_cr_fields(&(hw_info->cr4), VMX_CR4_FIXED1_MSR, VMX_CR4_FIXED0_MSR); + + return 0; +}