endmenu
source "palacios/src/interfaces/Kconfig"
-
+source "palacios/src/extensions/Kconfig"
config TELEMETRY
bool "Enable VMM telemetry support"
libs-y := palacios/lib/$(ARCH)/
devices-y := palacios/src/devices/
interfaces-y := palacios/src/interfaces/
+extensions-y := palacios/src/extensions/
modules-y := modules/
palacios-dirs := $(patsubst %/,%,$(filter %/, \
- $(core-y) $(devices-y) $(interfaces-y) $(libs-y)) $(modules-y))
+ $(core-y) $(devices-y) $(interfaces-y) $(extensions-y) $(libs-y)) $(modules-y))
palacios-cleandirs := $(sort $(palacios-dirs) $(patsubst %/,%,$(filter %/, \
$(core-n) $(core-) $(devices-n) $(devices-) \
- $(interfaces-n) $(interfaces-) $(modules-n) $(modules-))))
+ $(interfaces-n) $(interfaces-) $(extensions-n) $(extensions-) $(modules-n) $(modules-))))
core-y := $(patsubst %/, %/built-in.o, $(core-y))
devices-y := $(patsubst %/, %/built-in.o, $(devices-y))
interfaces-y := $(patsubst %/, %/built-in.o, $(interfaces-y))
+extensions-y := $(patsubst %/, %/built-in.o, $(extensions-y))
libs-y := $(patsubst %/, %/built-in.o, $(libs-y))
modules-y := $(patsubst %/, %/built-in.o, $(modules-y))
#lnxmod-y := $(patsubst %/, %/built-in.o, $(lnxmod-y))
-palacios := $(core-y) $(devices-y) $(interfaces-y) $(libs-y) $(modules-y)
+palacios := $(core-y) $(devices-y) $(interfaces-y) $(extensions-y) $(libs-y) $(modules-y)
# Rule to link palacios - also used during CONFIG_CONFIGKALLSYMS
--- /dev/null
+/*
+ * DebugFS interface
+ * (c) Jack Lange, 2011
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include <interfaces/inspector.h>
+
+#include "palacios.h"
+
+struct dentry * v3_dir = NULL;
+
+
+int palacios_init_debugfs( void ) {
+
+ v3_dir = debugfs_create_dir("v3vee", NULL);
+
+ if (IS_ERR(v3_dir)) {
+ printk("Error creating v3vee debugfs directory\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int palacios_deinit_debugfs( void ) {
+ debugfs_remove(v3_dir);
+ return 0;
+}
+
+
+
+static int dfs_register_tree(struct dentry * dir, v3_inspect_node_t * root) {
+ v3_inspect_node_t * tmp_node = v3_inspection_first_child(root);
+ struct v3_inspection_value tmp_value;
+
+ while (tmp_node) {
+ tmp_value = v3_inspection_value(tmp_node);
+
+ if (tmp_value.size == 0) {
+ struct dentry * new_dir = debugfs_create_dir(tmp_value.name, dir);
+ dfs_register_tree(new_dir, tmp_node);
+ } else if (tmp_value.size == 1) {
+ debugfs_create_u8(tmp_value.name, 0644, dir, (u8 *)tmp_value.value);
+ } else if (tmp_value.size == 2) {
+ debugfs_create_u16(tmp_value.name, 0644, dir, (u16 *)tmp_value.value);
+ } else if (tmp_value.size == 4) {
+ debugfs_create_u32(tmp_value.name, 0644, dir, (u32 *)tmp_value.value);
+ } else if (tmp_value.size == 8) {
+ debugfs_create_u64(tmp_value.name, 0644, dir, (u64 *)tmp_value.value);
+ } else {
+
+ // buffer
+ }
+
+ tmp_node = v3_inspection_node_next(tmp_node);
+
+ }
+
+ return 0;
+}
+
+
+int dfs_register_vm(struct v3_guest * guest) {
+ v3_inspect_node_t * root = v3_get_inspection_root(guest->v3_ctx);
+
+ if (root == NULL) {
+ printk("No inspection root found\n");
+ return -1;
+ }
+
+ dfs_register_tree(v3_dir, root);
+ return 0;
+}
--- /dev/null
+/*
+ * DebugFS interface
+ * (c) Jack Lange, 2011
+ */
+
+#include "palacios.h"
+
+int palacios_init_debugfs( void );
+int palacios_deinit_debugfs( void );
+
+
+
+int dfs_register_vm(struct v3_guest * guest);
+
#define READ_ONLY 2
#define HOOKED 4
-struct v3_inspector_state {
- struct v3_mtree state_tree;
-
-};
-
-
int v3_init_inspector(struct v3_vm_info * vm);
int v3_init_inspector_core(struct guest_info * core);
#include <palacios/vmm.h>
-
/*
The purpose of this interface is to make it possible to implement
#ifdef __V3VEE__
+struct v3_vm_info;
+
v3_host_dev_t v3_host_dev_open(char *impl,
v3_bus_class_t bus,
- v3_guest_dev_t gdev);
+ v3_guest_dev_t gdev,
+ struct v3_vm_info *vm);
int v3_host_dev_close(v3_host_dev_t hdev);
int v3_host_dev_ack_irq(v3_host_dev_t hostdev, uint8_t irq);
-uint64_t v3_host_dev_config_read(v3_host_dev_t hostdev,
+uint64_t v3_host_dev_read_config(v3_host_dev_t hostdev,
uint64_t offset,
void *dest,
uint64_t len);
-uint64_t v3_host_dev_config_write(v3_host_dev_t hostdev,
- uint64_t offset,
+uint64_t v3_host_dev_write_config(v3_host_dev_t hostdev,
+ uint64_t offset,
void *src,
uint64_t len);
// this device is attached to and an opaque pointer back to the
// guest device. It returns an opaque representation of
// the host device it has attached to, with zero indicating
- // failure
+ // failure. The host_priv_data arguement supplies to the
+ // host the pointer that the VM was originally registered with
v3_host_dev_t (*open)(char *impl,
v3_bus_class_t bus,
- v3_guest_dev_t gdev);
+ v3_guest_dev_t gdev,
+ void *host_priv_data);
int (*close)(v3_host_dev_t hdev);
// fail, returning != len
// Callee gets the host dev id, and the guest physical address
uint64_t (*read_mem)(v3_host_dev_t hostdev,
- addr_t gpa,
+ void * gpa,
void *dest,
uint64_t len);
uint64_t (*write_mem)(v3_host_dev_t hostdev,
- addr_t gpa,
+ void * gpa,
void *src,
uint64_t len);
/* These functions allow the host to read and write the guest
memory by physical address, for example to implement DMA
-
- These functions are incremental - that is, they can return
- a smaller amount than requested
*/
uint64_t v3_host_dev_read_guest_mem(v3_host_dev_t hostdev,
v3_guest_dev_t guest_dev,
- addr_t gpa,
+ void * gpa,
void *dest,
uint64_t len);
uint64_t v3_host_dev_write_guest_mem(v3_host_dev_t hostdev,
v3_guest_dev_t guest_dev,
- addr_t gpa,
+ void * gpa,
void *src,
uint64_t len);
struct v3_sym_core_state;
#endif
-#ifdef CONFIG_INSPECTOR
-#include <palacios/vmm_inspector.h>
-#endif
#include <palacios/vmm_config.h>
struct v3_telemetry_state telemetry;
#endif
-#ifdef CONFIG_INSPECTOR
- struct v3_inspector_state inspector;
-#endif
uint64_t yield_cycle_period;
+struct vmcs_field_encoding {
+ uint8_t access_type : 1; /* 0 = full, 1 = high, (for accessing 64 bit fields on 32bit CPU) */
+ uint16_t index : 9;
+ uint8_t type : 2; /* 0=ctrl, 1=read-only, 2 = guest state, 3 = host state */
+ uint8_t rsvd1 : 1; /* MBZ */
+ uint8_t width : 2; /* 0 = 16bit, 1 = 64bit, 2 = 32bit, 3 = natural width */
+ uint32_t rsvd2 : 17;
+} __attribute__((packed));
+
typedef enum {
VMCS_GUEST_ES_SELECTOR = 0x00000800,
#ifdef CONFIG_MULTITHREAD_OS
-#define V3_CREATE_THREAD(fn, arg, name) \
- do { \
+#define V3_CREATE_THREAD(fn, arg, name) ({ \
+ void * thread = NULL; \
extern struct v3_os_hooks * os_hooks; \
if ((os_hooks) && (os_hooks)->start_kernel_thread) { \
- (os_hooks)->start_kernel_thread(fn, arg, name); \
+ thread = (os_hooks)->start_kernel_thread(fn, arg, name); \
} \
- } while (0)
+ thread; \
+ })
+
+
+#define V3_THREAD_SLEEP() \
+ do{ \
+ extern struct v3_os_hooks * os_hooks; \
+ if ((os_hooks) && (os_hooks)->kernel_thread_sleep) { \
+ (os_hooks)->kernel_thread_sleep(); \
+ } \
+ }while(0)
+
+
+#define V3_THREAD_WAKEUP(thread) \
+ do{ \
+ extern struct v3_os_hooks * os_hooks; \
+ if ((os_hooks) && (os_hooks)->kernel_thread_wakeup) { \
+ (os_hooks)->kernel_thread_wakeup(thread); \
+ } \
+ }while(0)
+
#define V3_Call_On_CPU(cpu, fn, arg) \
- void (*start_kernel_thread)(int (*fn)(void * arg), void * arg, char * thread_name);
+ void * (*start_kernel_thread)(int (*fn)(void * arg), void * arg, char * thread_name);
+ void (*kernel_thread_sleep)(void);
+ void (*kernel_thread_wakeup)(void * thread);
void (*interrupt_cpu)(struct v3_vm_info * vm, int logical_cpu, int vector);
void (*call_on_cpu)(int logical_cpu, void (*fn)(void * arg), void * arg);
void * (*start_thread_on_cpu)(int cpu_id, int (*fn)(void * arg), void * arg, char * thread_name);
-/*
+/*
* This file is part of the Palacios Virtual Machine Monitor developed
* by the V3VEE Project with funding from the United States National
* Science Foundation and the Department of Energy.
* and the University of New Mexico. You can find out more at
* http://www.v3vee.org
*
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
* All rights reserved.
*
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
*
* This is free software. You are permitted to use,
* redistribute, and modify it as specified in the file "V3VEE_LICENSE".
*/
-#ifndef __VMM_MUXER_H__
-#define __VMM_MUXER_H__
+#ifndef __VMM_BARRIER_H__
+#define __VMM_BARRIER_H__
#ifdef __V3VEE__
-struct v3_vm_info;
+#include <util/vmm_lock.h>
+struct v3_barrier {
+
+ int active; // If 1, barrier is active, everyone must wait
+ // If 0, barrier is clear, can proceed
-struct v3_vm_info * v3_get_foreground_vm();
-void v3_set_foreground_vm(struct v3_vm_info * vm);
+ v3_lock_t lock;
+};
-int v3_add_mux_notification(int (*focus_change)(struct v3_vm_info * old_vm, struct v3_vm_info * new_vm));
#endif
void v3_print_cpuid_map(struct v3_vm_info * vm);
+int v3_cpuid_add_fields(struct v3_vm_info * vm, uint32_t cpuid,
+ uint32_t rax_mask, uint32_t rax,
+ uint32_t rbx_mask, uint32_t rbx,
+ uint32_t rcx_mask, uint32_t rcx,
+ uint32_t rdx_mask, uint32_t rdx);
+
int v3_hook_cpuid(struct v3_vm_info * vm, uint32_t cpuid,
int (*hook_fn)(struct guest_info * info, uint32_t cpuid, \
uint32_t * eax, uint32_t * ebx, \
struct v3_dev_net_ops {
/* Backend implemented functions */
- int (*send)(uint8_t * buf, uint32_t count, void * private_data);
+ int (*send)(uint8_t * buf, uint32_t len, int synchronize, void * private_data);
/* Frontend implemented functions */
- int (*recv)(uint8_t * buf, uint32_t count, void * frnt_data);
- void (*poll)(struct v3_vm_info * vm, int budget, void * frnt_data);
+ int (*recv)(uint8_t * buf, uint32_t len, void * frnt_data);
/* This is ugly... */
void * frontend_data;
#define ETHERNET_PACKET_LEN (ETHERNET_HEADER_LEN + ETHERNET_MTU)
#define ETH_ALEN 6
+#define MIN_MTU 68
+//#define MAX_MTU 65535
+#define MAX_MTU 9000
+
+#define MAX_PACKET_LEN (ETHERNET_HEADER_LEN + MAX_MTU)
+
+
+extern int v3_net_debug;
#ifdef __V3VEE__
#include <palacios/vmm.h>
+#define V3_Net_Print(level, fmt, args...) \
+ do { \
+ if(level <= v3_net_debug) { \
+ extern struct v3_os_hooks * os_hooks; \
+ if ((os_hooks) && (os_hooks)->print) { \
+ (os_hooks)->print((fmt), ##args); \
+ } \
+ } \
+ } while (0)
+
struct nic_statistics {
- uint32_t tx_pkts;
+ uint64_t tx_pkts;
uint64_t tx_bytes;
- uint32_t tx_dropped;
+ uint64_t tx_dropped;
- uint32_t rx_pkts;
+ uint64_t rx_pkts;
uint64_t rx_bytes;
- uint32_t rx_dropped;
+ uint64_t rx_dropped;
- uint32_t interrupts;
+ uint32_t tx_interrupts;
+ uint32_t rx_interrupts;
};
static inline int is_multicast_ethaddr(const uint8_t * addr)
#ifdef __V3VEE__
#include <palacios/vmm.h>
-#include <palacios/vmm_list.h>
#include <palacios/vmm_config.h>
+#include <palacios/vmm_list.h>
struct v3_vm_info;
char * name;
int (*init)(struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data);
int (*deinit)(struct v3_vm_info * vm, void * priv_data);
- int (*core_init)(struct guest_info * core);
- int (*core_deinit)(struct guest_info * core);
- int (*on_entry)(struct guest_info * core);
- int (*on_exit)(struct guest_info * core);
+ int (*core_init)(struct guest_info * core, void * priv_data);
+ int (*core_deinit)(struct guest_info * core, void * priv_data);
+ int (*on_entry)(struct guest_info * core, void * priv_data);
+ int (*on_exit)(struct guest_info * core, void * priv_data);
};
struct v3_extension {
int v3_init_ext_manager(struct v3_vm_info * vm);
int v3_add_extension(struct v3_vm_info * vm, const char * name, v3_cfg_tree_t * cfg);
+int v3_init_core_extensions(struct guest_info * core);
+
+void * v3_get_extension_state(struct v3_vm_info * vm, const char * name);
#define register_extension(ext) \
#define MAKE_1OP_8FLAGS_INST(iname) static inline void iname##8(addr_t * dst, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushf; " \
#define MAKE_1OP_16FLAGS_INST(iname) static inline void iname##16(addr_t * dst, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushf; " \
#define MAKE_1OP_32FLAGS_INST(iname) static inline void iname##32(addr_t * dst, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushf; " \
#define MAKE_1OP_64FLAGS_INST(iname) static inline void iname##64(addr_t * dst, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushfq; " \
#define MAKE_2OP_64FLAGS_INST(iname) static inline void iname##64(addr_t * dst, addr_t * src, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushfq\r\n" \
#define MAKE_2OP_32FLAGS_INST(iname) static inline void iname##32(addr_t * dst, addr_t * src, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushf; " \
#define MAKE_2OP_16FLAGS_INST(iname) static inline void iname##16(addr_t * dst, addr_t * src, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushf; " \
#define MAKE_2OP_8FLAGS_INST(iname) static inline void iname##8(addr_t * dst, addr_t * src, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushf; " \
addr_t * src, \
addr_t * ecx, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushfq; " \
addr_t * src, \
addr_t * ecx, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushf; " \
addr_t * src, \
addr_t * ecx, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushf; " \
addr_t * src, \
addr_t * ecx, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushf; " \
addr_t * src, \
addr_t * ecx, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushfq; " \
addr_t * src, \
addr_t * ecx, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushf; " \
addr_t * src, \
addr_t * ecx, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushf; " \
addr_t * src, \
addr_t * ecx, addr_t * flags) { \
/* Some of the flags values are not copied out in a pushf, we save them here */ \
- addr_t flags_rsvd = *flags & ~0xfffe7fff; \
+ addr_t flags_rsvd = *flags & ~0xfffc7fff; \
\
asm volatile ( \
"pushf; " \
#include <palacios/vmm_lock.h>
-/* IMPORTANT:
- * This implementation currently does no locking, and as such is not
- * SMP/thread/interrupt safe
- */
-struct queue_entry {
+struct v3_queue_entry {
addr_t entry;
struct list_head entry_list;
};
-struct gen_queue {
+struct v3_queue {
uint_t num_entries;
struct list_head entries;
v3_lock_t lock;
};
-struct gen_queue * v3_create_queue();
-void v3_init_queue(struct gen_queue * queue);
+struct v3_queue * v3_create_queue();
+void v3_init_queue(struct v3_queue * queue);
-void v3_enqueue(struct gen_queue * queue, addr_t entry);
-addr_t v3_dequeue(struct gen_queue * queue);
+void v3_enqueue(struct v3_queue * queue, addr_t entry);
+addr_t v3_dequeue(struct v3_queue * queue);
* redistribute, and modify it as specified in the file "V3VEE_LICENSE".
*/
-#ifndef __VNET_H__
-#define __VNET_H__
+#ifndef __VNET_CORE_H__
+#define __VNET_CORE_H__
#include <palacios/vmm.h>
#include <palacios/vmm_ethernet.h>
#define VNET_HASH_SIZE 17
-//routing table entry
+extern int v3_vnet_debug;
+
struct v3_vnet_route {
uint8_t src_mac[ETH_ALEN];
uint8_t dst_mac[ETH_ALEN];
uint8_t type,
void * priv_data);
int v3_vnet_add_route(struct v3_vnet_route route);
-int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data);
+int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data, int synchronize);
int v3_vnet_find_dev(uint8_t * mac);
int v3_vnet_stat(struct vnet_stat * stats);
int (*input)(struct v3_vm_info * vm,
struct v3_vnet_pkt * pkt,
void * dev_data);
- void (*poll) (struct v3_vm_info * vm, int budget, void * dev_data);
};
int v3_init_vnet(void);
void v3_deinit_vnet(void);
-void v3_vnet_do_poll(struct v3_vm_info * vm);
-
int v3_vnet_add_dev(struct v3_vm_info * info, uint8_t * mac,
struct v3_vnet_dev_ops * ops,
void * priv_data);
int v3_vnet_del_dev(int dev_id);
+
#endif
#endif
#include <palacios/vmm.h>
#include <palacios/vm_guest.h>
-// Intel VMX Specific MSRs
-#define VMX_FEATURE_CONTROL_MSR 0x0000003a
-#define VMX_BASIC_MSR 0x00000480
-#define VMX_PINBASED_CTLS_MSR 0x00000481
-#define VMX_PROCBASED_CTLS_MSR 0x00000482
-#define VMX_EXIT_CTLS_MSR 0x00000483
-#define VMX_ENTRY_CTLS_MSR 0x00000484
-#define VMX_MISC_MSR 0x00000485
-#define VMX_CR0_FIXED0_MSR 0x00000486
-#define VMX_CR0_FIXED1_MSR 0x00000487
-#define VMX_CR4_FIXED0_MSR 0x00000488
-#define VMX_CR4_FIXED1_MSR 0x00000489
-#define VMX_VMCS_ENUM_MSR 0x0000048A
#define VMX_SUCCESS 0
#define VMX_FAIL_INVALID 1
#define VMX_FAIL_VALID 2
#define VMM_ERROR 3
-#define FEATURE_CONTROL_LOCK 0x00000001
-#define FEATURE_CONTROL_VMXON 0x00000004
-#define FEATURE_CONTROL_VALID ( FEATURE_CONTROL_LOCK | FEATURE_CONTROL_VMXON )
-#define CPUID_1_ECX_VTXFLAG 0x00000020
struct vmx_pin_ctrls {
} __attribute__((packed));
} __attribute__((packed));
-struct vmx_basic_msr {
- uint32_t revision;
- uint_t regionSize : 13;
- uint_t rsvd1 : 4; // Always 0
- uint_t physWidth : 1;
- uint_t smm : 1; // Always 1
- uint_t memType : 4;
- uint_t rsvd2 : 10; // Always 0
-} __attribute__((packed));
typedef enum {
VMXASSIST_DISABLED,
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu>
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+#ifndef __VMX_EPT_H__
+#define __VMX_EPT_H__
+
+
+#ifdef __V3VEE__
+
+/* The actual format of these data structures is specified as being machine
+ dependent. Thus the lengths of the base address fields are defined as variable.
+ To be safe we assume the maximum(?) size fields
+*/
+
+
+typedef struct vmx_eptp {
+ uint8_t psmt : 3;
+ uint8_t pwl1 : 3;
+ uint8_t rsvd1 : 6;
+ uint64_t pml_base_addr : 39;
+ uint16_t rsvd2 : 13;
+} __attribute__((packed)) vmx_eptp_t;
+
+
+typedef struct vmx_pml4 {
+ uint8_t read : 1;
+ uint8_t write : 1;
+ uint8_t exec : 1;
+ uint8_t rsvd1 : 5;
+ uint8_t ignore1 : 4;
+ uint64_t pdp_base_addr : 39;
+ uint8_t rsvd2 : 1;
+ uint32_t ignore2 : 12;
+} __attribute__((packed)) vmx_pml4_t;
+
+
+typedef struct vmx_pdp_1GB {
+ uint8_t read : 1;
+ uint8_t write : 1;
+ uint8_t exec : 1;
+ uint8_t mt : 3;
+ uint8_t ipat : 1;
+ uint8_t large_page : 1;
+ uint8_t ignore1 : 4;
+ uint32_t rsvd1 : 18;
+ uint32_t page_base_addr : 21;
+ uint8_t rsvd2 : 1;
+ uint32_t ignore2 : 12;
+} __attribute__((packed)) vmx_pdp_1GB_t;
+
+typedef struct vmx_pdp {
+ uint8_t read : 1;
+ uint8_t write : 1;
+ uint8_t exec : 1;
+ uint8_t rsvd1 : 4;
+ uint8_t large_page : 1;
+ uint8_t ignore1 : 4;
+ uint32_t page_base_addr : 39;
+ uint8_t rsvd2 : 1;
+ uint32_t ignore2 : 12;
+} __attribute__((packed)) vmx_pdp_t;
+
+
+typedef struct vmx_pde_2MB {
+ uint8_t read : 1;
+ uint8_t write : 1;
+ uint8_t exec : 1;
+ uint8_t mt : 3;
+ uint8_t ipat : 1;
+ uint8_t large_page : 1;
+ uint8_t ignore1 : 4;
+ uint32_t rsvd1 : 9;
+ uint32_t page_base_addr : 30;
+ uint8_t rsvd2 : 1;
+ uint32_t ignore2 : 12;
+} __attribute__((packed)) vmx_pde_2MB_t;
+
+
+typedef struct vmx_pde {
+ uint8_t read : 1;
+ uint8_t write : 1;
+ uint8_t exec : 1;
+ uint8_t rsvd1 : 4;
+ uint8_t large_page : 1;
+ uint8_t ignore1 : 4;
+ uint32_t page_base_addr : 39;
+ uint8_t rsvd2 : 1;
+ uint32_t ignore2 : 12;
+} __attribute__((packed)) vmx_pde_t;
+
+
+
+typedef struct vmx_pte {
+ uint8_t read : 1;
+ uint8_t write : 1;
+ uint8_t exec : 1;
+ uint8_t mt : 3;
+ uint8_t ipat : 1;
+ uint8_t ignore1 : 5;
+ uint32_t page_base_addr : 39;
+ uint8_t rsvd2 : 1;
+ uint32_t ignore2 : 12;
+} __attribute__((packed)) vmx_pte_t;
+
+#endif
+
+#endif
+
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+#ifndef __VMX_HW_INFO_H__
+#define __VMX_HW_INFO_H__
+
+#ifdef __V3VEE__
+
+
+
+#define VMX_BASIC_MSR 0x00000480
+#define VMX_PINBASED_CTLS_MSR 0x00000481
+#define VMX_PROCBASED_CTLS_MSR 0x00000482
+#define VMX_EXIT_CTLS_MSR 0x00000483
+#define VMX_ENTRY_CTLS_MSR 0x00000484
+#define VMX_MISC_MSR 0x00000485
+#define VMX_CR0_FIXED0_MSR 0x00000486
+#define VMX_CR0_FIXED1_MSR 0x00000487
+#define VMX_CR4_FIXED0_MSR 0x00000488
+#define VMX_CR4_FIXED1_MSR 0x00000489
+#define VMX_VMCS_ENUM_MSR 0x0000048A
+#define VMX_PROCBASED_CTLS2_MSR 0x0000048B
+#define VMX_EPT_VPID_CAP_MSR 0x0000048C
+#define VMX_TRUE_PINBASED_CTLS_MSR 0x0000048D
+#define VMX_TRUE_PROCBASED_CTLS_MSR 0x0000048E
+#define VMX_TRUE_EXIT_CTLS_MSR 0x0000048F
+#define VMX_TRUE_ENTRY_CTLS_MSR 0x00000490
+
+
+
+struct vmx_basic_msr {
+ union {
+ struct {
+ uint32_t lo;
+ uint32_t hi;
+ } __attribute__((packed));
+
+ struct { uint32_t revision;
+ uint32_t regionSize : 13;
+ uint8_t rsvd1 : 3; /* Always 0 */
+ uint8_t physWidth : 1; /* VMCS address field widths
+ (1=32bits, 0=natural width) */
+ uint8_t smm : 1;
+ uint8_t memType : 4; /* 0 = UC, 6 = WriteBack */
+ uint8_t io_str_info : 1;
+ uint8_t def1_maybe_0 : 1; /* 1="Any VMX ctrls that default to 1 may be cleared to 0" */
+ uint32_t rsvd2 : 8; /* Always 0 */
+ } __attribute__((packed));
+ } __attribute__((packed));
+} __attribute__((packed));
+
+
+struct vmx_misc_msr {
+ union {
+ struct {
+ uint32_t lo;
+ uint32_t hi;
+ } __attribute__((packed));
+
+ struct {
+ uint8_t tsc_multiple : 5; /* Bit position in TSC field that drives vmx timer step */
+ uint8_t exits_store_LMA : 1;
+ uint8_t can_halt : 1;
+ uint8_t can_shtdown : 1;
+ uint8_t can_wait_for_sipi : 1;
+ uint8_t rsvd1 : 7;
+ uint16_t num_cr3_targets : 9;
+ uint8_t max_msr_cache_size : 3; /* (512 * (max_msr_cache_size + 1)) == max msr load/store list size */
+ uint8_t SMM_ctrl_avail : 1;
+ uint8_t rsvd2 : 3;
+ uint32_t MSEG_rev_id;
+ } __attribute__((packed));
+ } __attribute__((packed));
+} __attribute__((packed));
+
+
+struct vmx_ept_msr {
+ union {
+ struct {
+ uint32_t lo;
+ uint32_t hi;
+ } __attribute__((packed));
+
+ struct {
+ uint8_t exec_only_ok : 1;
+ uint8_t rsvd1 : 5;
+ uint8_t pg_walk_len4 : 1; /* support for a page walk of length 4 */
+ uint8_t rsvd2 : 1;
+ uint8_t ept_uc_ok : 1; /* EPT page tables can be uncacheable */
+ uint8_t rsvd3 : 5;
+ uint8_t ept_wb_ok : 1; /* EPT page tables can be writeback */
+ uint8_t rsvd4 : 1;
+ uint8_t ept_2MB_ok : 1; /* 2MB EPT pages supported */
+ uint8_t ept_1GB_ok : 1; /* 1GB EPT pages supported */
+ uint8_t rsvd5 : 2;
+ uint8_t INVEPT_avail : 1; /* INVEPT instruction is available */
+ uint8_t rsvd6 : 4;
+ uint8_t INVEPT_single_ctx_avail : 1;
+ uint8_t INVEPT_all_ctx_avail : 1;
+ uint8_t rsvd7 : 5;
+ uint8_t INVVPID_avail : 1;
+ uint8_t rsvd8 : 7;
+ uint8_t INVVPID_1addr_avail : 1;
+ uint8_t INVVPID_single_ctx_avail : 1;
+ uint8_t INVVPID_all_ctx_avail : 1;
+ uint8_t INVVPID_single_ctx_w_glbls_avail : 1;
+ uint32_t rsvd9 : 20;
+ } __attribute__((packed));
+ } __attribute__((packed));
+}__attribute__((packed));
+
+
+struct vmx_ctrl_field {
+ uint32_t def_val;
+ uint32_t req_val; /* Required values: field_val & req_mask == req_val */
+ uint32_t req_mask; /* If a mask bit is set it's value is restricted (i.e. the VMM cannot change it) */
+};
+
+
+struct vmx_cr_field {
+ uint64_t def_val;
+ uint64_t req_val; /* Required values: field_val & req_mask == req_val */
+ uint64_t req_mask; /* If a mask bit is set it's value is restricted (i.e. the VMM cannot change it) */
+};
+
+
+
+
+struct vmx_hw_info {
+ struct vmx_basic_msr basic_info;
+ struct vmx_misc_msr misc_info;
+ struct vmx_ept_msr ept_info;
+
+ struct vmx_ctrl_field pin_ctrls;
+ struct vmx_ctrl_field proc_ctrls;
+ struct vmx_ctrl_field exit_ctrls;
+ struct vmx_ctrl_field entry_ctrls;
+ struct vmx_ctrl_field proc_ctrls_2;
+
+ struct vmx_cr_field cr0;
+ struct vmx_cr_field cr4;
+};
+
+
+int v3_init_vmx_hw(struct vmx_hw_info * hw_info);
+
+
+
+
+#endif
+
+#endif
-static inline int v3_enable_vmx(addr_t vmxon_ptr) {
- uint64_t vmxon_ptr_64 __attribute__((aligned(8))) = (uint64_t)vmxon_ptr;
- uint8_t ret_invalid = 0;
- __asm__ __volatile__ (
- VMXON_OPCODE
- EAX_06_MODRM
- "setnaeb %0;" // fail invalid (CF=1)
- : "=q"(ret_invalid)
- : "a"(&vmxon_ptr_64),"0"(ret_invalid)
- : "memory");
-
- if (ret_invalid) {
- return VMX_FAIL_INVALID;
- } else {
- return VMX_SUCCESS;
- }
-}
static inline int vmcs_clear(addr_t vmcs_ptr) {
uint64_t vmcs_ptr_64 __attribute__ ((aligned(8))) = (uint64_t)vmcs_ptr;
return VMX_SUCCESS;
}
+
+static inline int vmx_on(addr_t vmxon_ptr) {
+ uint64_t vmxon_ptr_64 __attribute__((aligned(8))) = (uint64_t)vmxon_ptr;
+ uint8_t ret_invalid = 0;
+
+ __asm__ __volatile__ (
+ VMXON_OPCODE
+ EAX_06_MODRM
+ "setnaeb %0;" // fail invalid (CF=1)
+ : "=q"(ret_invalid)
+ : "a"(&vmxon_ptr_64),"0"(ret_invalid)
+ : "memory");
+
+ if (ret_invalid) {
+ return VMX_FAIL_INVALID;
+ } else {
+ return VMX_SUCCESS;
+ }
+}
+
static inline int vmx_off() {
uint8_t ret_valid = 0;
uint8_t ret_invalid = 0;
return VMX_SUCCESS;
}
+
+static inline int enable_vmx() {
+#ifdef __V3_64BIT__
+ __asm__ __volatile__ (
+ "movq %%cr4, %%rbx;"
+ "orq $0x00002000, %%rbx;"
+ "movq %%rbx, %%cr4;"
+ :
+ :
+ : "%rbx"
+ );
+
+
+ __asm__ __volatile__ (
+ "movq %%cr0, %%rbx; "
+ "orq $0x00000020,%%rbx; "
+ "movq %%rbx, %%cr0;"
+ :
+ :
+ : "%rbx"
+ );
+#elif __V3_32BIT__
+ __asm__ __volatile__ (
+ "movl %%cr4, %%ecx;"
+ "orl $0x00002000, %%ecx;"
+ "movl %%ecx, %%cr4;"
+ :
+ :
+ : "%ecx"
+ );
+
+
+
+ __asm__ __volatile__ (
+ "movl %%cr0, %%ecx; "
+ "orl $0x00000020,%%ecx; "
+ "movl %%ecx, %%cr0;"
+ :
+ :
+ : "%ecx"
+ );
+
+#endif
+
+ return 0;
+}
+
+
+
+
+
#endif
#endif
bool "Generic Device"
default y
help
- Includes the Virtual Generic device
+ Includes the virtual generic device. This device allows you
+ to see guest I/O port and memory region interaction with a physical
+ device on the underlying hardware, as well as to ignore such
+ interaction. The generic device also serves as a front-end
+ device for non-PCI host-based virtual device implementations. If
+ you want to handle either host-based virtual or physical devices
+ that are not PCI devices, this is what you want. If you want
+ to handle a host-based virtual device that is a PCI device, you
+ want to use the PCI front-end device. If you want to handle
+ a physical PCI device, you want the passthrough PCI device.
config DEBUG_GENERIC
bool "Generic device Debugging"
default n
depends on PCI && EXPERIMENTAL && VNET
help
- Enable the Virtio VNET interface
+ Enable the Virtio VNET interface for Control VM
config DEBUG_LINUX_VIRTIO_VNET
bool "Virtio VNET Interface Debugging"
config VNET_NIC
- bool "Enable VNET VIrtio NIC Device"
+ bool "Enable VNET Backend Device"
default n
depends on PCI && EXPERIMENTAL && VNET
help
- Enable the VNET Virtio backend device
+ Enable the VNET backend device
config DEBUG_VNET_NIC
bool "VNET NIC Device Debugging"
help
Enables hardware devices to be passed through to the VM
+
config DEBUG_PCI
bool "PCI debugging"
depends on PCI && DEBUG_ON
Enable debugging for the PCI
+config PCI_FRONT
+ bool "PCI front-end device"
+ default y
+ depends on PCI && HOST_DEVICE
+ help
+ PCI front-end device for a host-based PCI device implementation
+ This device allows you to project a host-based *virtual* device
+ into the guest as a PCI device. If you want to project a
+ physical PCI device, use Passthrough PCI instead. If you want
+ to project a non-PCI virtual or physical device,
+ use the generic device.
+
+
+config DEBUG_PCI_FRONT
+ bool "PCI front-end debugging"
+ depends on PCI_FRONT && DEBUG_ON
+ help
+ Enable debugging for the PCI front-end device
+
+
config PIC
bool "8259A PIC"
obj-$(CONFIG_VGA) += vga.o
+obj-$(CONFIG_PCI_FRONT) += pci_front.o
+
#include <palacios/vmm_list.h>
#include <palacios/vmm_io.h>
#include <palacios/vmm_dev_mgr.h>
+#include <palacios/vm_guest_mem.h>
+
+#ifdef CONFIG_HOST_DEVICE
+#include <interfaces/vmm_host_dev.h>
+#endif
#ifndef CONFIG_DEBUG_GENERIC
#undef PrintDebug
#define PrintDebug(fmt, args...)
#endif
+#define MAX_NAME 32
+#define MAX_MEM_HOOKS 16
typedef enum {GENERIC_IGNORE,
GENERIC_PASSTHROUGH,
GENERIC_PRINT_AND_IGNORE} generic_mode_t;
struct generic_internal {
+ enum {GENERIC_PHYSICAL, GENERIC_HOST} forward_type;
+#ifdef CONFIG_HOST_DEVICE
+ v3_host_dev_t host_dev;
+#endif
+ struct vm_device *dev; // me
+
+ char name[MAX_NAME];
+
+ uint32_t num_mem_hooks;
+ addr_t mem_hook[MAX_MEM_HOOKS];
};
-static int generic_write_port_passthrough(struct guest_info * core, uint16_t port, void * src,
- uint_t length, void * priv_data) {
+static int generic_write_port_passthrough(struct guest_info * core,
+ uint16_t port,
+ void * src,
+ uint_t length,
+ void * priv_data)
+{
+ struct generic_internal *state = (struct generic_internal *) priv_data;
uint_t i;
- switch (length) {
- case 1:
- v3_outb(port, ((uint8_t *)src)[0]);
- break;
- case 2:
- v3_outw(port, ((uint16_t *)src)[0]);
+ switch (state->forward_type) {
+ case GENERIC_PHYSICAL:
+ switch (length) {
+ case 1:
+ v3_outb(port, ((uint8_t *)src)[0]);
+ break;
+ case 2:
+ v3_outw(port, ((uint16_t *)src)[0]);
+ break;
+ case 4:
+ v3_outdw(port, ((uint32_t *)src)[0]);
+ break;
+ default:
+ for (i = 0; i < length; i++) {
+ v3_outb(port, ((uint8_t *)src)[i]);
+ }
+ break;
+ }
+ return length;
break;
- case 4:
- v3_outdw(port, ((uint32_t *)src)[0]);
+#ifdef CONFIG_HOST_DEVICE
+ case GENERIC_HOST:
+ if (state->host_dev) {
+ return v3_host_dev_write_io(state->host_dev,port,src,length);
+ } else {
+ return -1;
+ }
break;
+#endif
default:
- for (i = 0; i < length; i++) {
- v3_outb(port, ((uint8_t *)src)[i]);
- }
+ PrintError("generic (%s): unknown forwarding type\n", state->name);
+ return -1;
+ break;
}
-
- return length;
}
static int generic_write_port_print_and_passthrough(struct guest_info * core, uint16_t port, void * src,
uint_t i;
int rc;
- PrintDebug("generic: writing 0x");
+#ifdef CONFIG_DEBUG_GENERIC
+ struct generic_internal *state = (struct generic_internal *) priv_data;
+#endif
+
+ PrintDebug("generic (%s): writing 0x%x bytes to port 0x%x using %s ...", state->name,
+ length, port,
+ state->forward_type == GENERIC_PHYSICAL ? "physical" :
+ state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+
+ PrintDebug("generic (%s): writing 0x", state->name);
for (i = 0; i < length; i++) {
PrintDebug("%x", ((uint8_t *)src)[i]);
return rc;
}
-static int generic_read_port_passthrough(struct guest_info * core, uint16_t port, void * src,
- uint_t length, void * priv_data) {
+static int generic_read_port_passthrough(struct guest_info * core,
+ uint16_t port,
+ void * dst,
+ uint_t length,
+ void * priv_data)
+{
+ struct generic_internal *state = (struct generic_internal *) priv_data;
+
uint_t i;
- switch (length) {
- case 1:
- ((uint8_t *)src)[0] = v3_inb(port);
- break;
- case 2:
- ((uint16_t *)src)[0] = v3_inw(port);
+ switch (state->forward_type) {
+ case GENERIC_PHYSICAL:
+ switch (length) {
+ case 1:
+ ((uint8_t *)dst)[0] = v3_inb(port);
+ break;
+ case 2:
+ ((uint16_t *)dst)[0] = v3_inw(port);
+ break;
+ case 4:
+ ((uint32_t *)dst)[0] = v3_indw(port);
+ break;
+ default:
+ for (i = 0; i < length; i++) {
+ ((uint8_t *)dst)[i] = v3_inb(port);
+ }
+ }
+ return length;
break;
- case 4:
- ((uint32_t *)src)[0] = v3_indw(port);
+#ifdef CONFIG_HOST_DEVICE
+ case GENERIC_HOST:
+ if (state->host_dev) {
+ return v3_host_dev_read_io(state->host_dev,port,dst,length);
+ }
break;
+#endif
default:
- for (i = 0; i < length; i++) {
- ((uint8_t *)src)[i] = v3_inb(port);
- }
+ PrintError("generic (%s): unknown forwarding type\n", state->name);
+ return -1;
+ break;
}
- return length;
+ return -1;
}
static int generic_read_port_print_and_passthrough(struct guest_info * core, uint16_t port, void * src,
uint_t length, void * priv_data) {
uint_t i;
int rc;
-
- PrintDebug("generic: reading 0x%x bytes from port 0x%x ...", length, port);
+
+#ifdef CONFIG_DEBUG_GENERIC
+ struct generic_internal *state = (struct generic_internal *) priv_data;
+#endif
+
+ PrintDebug("generic (%s): reading 0x%x bytes from port 0x%x using %s ...", state->name, length, port,
+ state->forward_type == GENERIC_PHYSICAL ? "physical" :
+ state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
rc=generic_read_port_passthrough(core,port,src,length,priv_data);
static int generic_read_port_print_and_ignore(struct guest_info * core, uint16_t port, void * src,
uint_t length, void * priv_data) {
- PrintDebug("generic: reading 0x%x bytes from port 0x%x ...", length, port);
+#ifdef CONFIG_DEBUG_GENERIC
+ struct generic_internal *state = (struct generic_internal *) priv_data;
+#endif
+
+ PrintDebug("generic (%s): reading 0x%x bytes from port 0x%x using %s ...", state->name, length, port,
+ state->forward_type == GENERIC_PHYSICAL ? "physical" :
+ state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+
memset((uint8_t *)src, 0, length);
PrintDebug(" ignored (return zeroed buffer)\n");
uint_t length, void * priv_data) {
int i;
- PrintDebug("generic: writing 0x%x bytes to port 0x%x ", length, port);
+#ifdef CONFIG_DEBUG_GENERIC
+ struct generic_internal *state = (struct generic_internal *) priv_data;
+#endif
+ PrintDebug("generic (%s): writing 0x%x bytes to port 0x%x using %s ", state->name, length, port,
+ state->forward_type == GENERIC_PHYSICAL ? "physical" :
+ state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+
memset((uint8_t *)src, 0, length);
PrintDebug(" ignored - data was: 0x");
+static int generic_write_mem_passthrough(struct guest_info * core,
+ addr_t gpa,
+ void * src,
+ uint_t len,
+ void * priv)
+{
+ struct vm_device *dev = (struct vm_device *) priv;
+ struct generic_internal *state = (struct generic_internal *) dev->private_data;
+
+ switch (state->forward_type) {
+ case GENERIC_PHYSICAL:
+ memcpy(V3_VAddr((void*)gpa),src,len);
+ return len;
+ break;
+#ifdef CONFIG_HOST_DEVICE
+ case GENERIC_HOST:
+ if (state->host_dev) {
+ return v3_host_dev_write_mem(state->host_dev,gpa,src,len);
+ } else {
+ return -1;
+ }
+ break;
+#endif
+ default:
+ PrintError("generic (%s): unknown forwarding type\n", state->name);
+ return -1;
+ break;
+ }
+}
-static int generic_free(struct generic_internal * state) {
- PrintDebug("generic: deinit_device\n");
+static int generic_write_mem_print_and_passthrough(struct guest_info * core,
+ addr_t gpa,
+ void * src,
+ uint_t len,
+ void * priv)
+{
+#ifdef CONFIG_DEBUG_GENERIC
+ struct vm_device *dev = (struct vm_device *) priv;
+ struct generic_internal *state = (struct generic_internal *) dev->private_data;
+#endif
+
+ PrintDebug("generic (%s): writing %u bytes to GPA 0x%p via %s ... ", state->name,
+ len,(void*)gpa,
+ state->forward_type == GENERIC_PHYSICAL ? "physical" :
+ state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+
+ int rc = generic_write_mem_passthrough(core,gpa,src,len,priv);
+
+ PrintDebug("done\n");
+
+ return rc;
+}
+static int generic_write_mem_ignore(struct guest_info * core,
+ addr_t gpa,
+ void * src,
+ uint_t len,
+ void * priv)
+{
+ return len;
+}
+
+static int generic_write_mem_print_and_ignore(struct guest_info * core,
+ addr_t gpa,
+ void * src,
+ uint_t len,
+ void * priv)
+{
+#ifdef CONFIG_DEBUG_GENERIC
+ struct vm_device *dev = (struct vm_device *) priv;
+ struct generic_internal *state = (struct generic_internal *) dev->private_data;
+#endif
+
+ PrintDebug("generic (%s): ignoring write of %u bytes to GPA 0x%p via %s", state->name,
+ len,(void*)gpa,
+ state->forward_type == GENERIC_PHYSICAL ? "physical" :
+ state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+
+ return len;
+}
+
+static int generic_read_mem_passthrough(struct guest_info * core,
+ addr_t gpa,
+ void * dst,
+ uint_t len,
+ void * priv)
+{
+ struct vm_device *dev = (struct vm_device *) priv;
+ struct generic_internal *state = (struct generic_internal *) dev->private_data;
+
+ switch (state->forward_type) {
+ case GENERIC_PHYSICAL:
+ memcpy(dst,V3_VAddr((void*)gpa),len);
+ return len;
+ break;
+#ifdef CONFIG_HOST_DEVICE
+ case GENERIC_HOST:
+ if (state->host_dev) {
+ return v3_host_dev_read_mem(state->host_dev,gpa,dst,len);
+ } else {
+ return -1;
+ }
+ break;
+#endif
+ default:
+ PrintError("generic (%s): unknown forwarding type\n", state->name);
+ break;
+ }
+
+ return -1;
+}
+
+static int generic_read_mem_print_and_passthrough(struct guest_info * core,
+ addr_t gpa,
+ void * dst,
+ uint_t len,
+ void * priv)
+{
+#ifdef CONFIG_DEBUG_GENERIC
+ struct vm_device *dev = (struct vm_device *) priv;
+ struct generic_internal *state = (struct generic_internal *) dev->private_data;
+#endif
+
+ PrintDebug("generic (%s): attempting to read %u bytes from GPA 0x%p via %s ... ", state->name,
+ len,(void*)gpa,
+ state->forward_type == GENERIC_PHYSICAL ? "physical" :
+ state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+
+ int rc = generic_read_mem_passthrough(core,gpa,dst,len,priv);
+
+ PrintDebug("done - read %d bytes\n", rc);
+
+ return rc;
+}
+
+static int generic_read_mem_ignore(struct guest_info * core,
+ addr_t gpa,
+ void * dst,
+ uint_t len,
+ void * priv)
+{
+#ifdef CONFIG_DEBUG_GENERIC
+ struct vm_device *dev = (struct vm_device *) priv;
+ struct generic_internal *state = (struct generic_internal *) dev->private_data;
+#endif
+
+ PrintDebug("generic (%s): ignoring attempt to read %u bytes from GPA 0x%p via %s ... ", state->name,
+ len,(void*)gpa,
+ state->forward_type == GENERIC_PHYSICAL ? "physical" :
+ state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+
+ memset((uint8_t *)dst, 0, len);
+
+ PrintDebug("returning zeros\n");
+
+ return len;
+}
+
+
+static int generic_read_mem_print_and_ignore(struct guest_info * core,
+ addr_t gpa,
+ void * dst,
+ uint_t len,
+ void * priv)
+{
+ memset((uint8_t *)dst, 0, len);
+ return len;
+}
+
+
+static int generic_free(struct generic_internal * state) {
+ int i;
+
+ PrintDebug("generic (%s): deinit_device\n", state->name);
+
+#ifdef CONFIG_HOST_DEVICE
+ if (state->host_dev) {
+ v3_host_dev_close(state->host_dev);
+ state->host_dev=0;
+ }
+#endif
+
+ // Note that the device manager handles unhooking the I/O ports
+ // We need to handle unhooking memory regions
+ for (i=0;i<state->num_mem_hooks;i++) {
+ if (v3_unhook_mem(state->dev->vm,V3_MEM_CORE_ANY,state->mem_hook[i])<0) {
+ PrintError("generic (%s): unable to unhook memory starting at 0x%p\n", state->name,(void*)(state->mem_hook[i]));
+ return -1;
+ }
+ }
+
V3_Free(state);
return 0;
}
static int add_port_range(struct vm_device * dev, uint_t start, uint_t end, generic_mode_t mode) {
uint_t i = 0;
- PrintDebug("generic: Adding Port Range: 0x%x to 0x%x as %s\n",
+ struct generic_internal *state = (struct generic_internal *) dev->private_data;
+
+ PrintDebug("generic (%s): adding port range 0x%x to 0x%x as %s\n", state->name,
start, end,
- (mode == GENERIC_PRINT_AND_PASSTHROUGH) ? "print-and-passthrough" : "print-and-ignore");
-
+ (mode == GENERIC_PRINT_AND_PASSTHROUGH) ? "print-and-passthrough" :
+ (mode == GENERIC_PRINT_AND_IGNORE) ? "print-and-ignore" :
+ (mode == GENERIC_PASSTHROUGH) ? "passthrough" :
+ (mode == GENERIC_IGNORE) ? "ignore" : "UNKNOWN");
+
for (i = start; i <= end; i++) {
- if (mode == GENERIC_PRINT_AND_PASSTHROUGH) {
- if (v3_dev_hook_io(dev, i,
- &generic_read_port_print_and_passthrough,
- &generic_write_port_print_and_passthrough) == -1) {
- PrintError("generic: can't hook port 0x%x (already hooked?)\n", i);
+ switch (mode) {
+ case GENERIC_PRINT_AND_PASSTHROUGH:
+ if (v3_dev_hook_io(dev, i,
+ &generic_read_port_print_and_passthrough,
+ &generic_write_port_print_and_passthrough) == -1) {
+ PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i);
+ return -1;
+ }
+ break;
+
+ case GENERIC_PRINT_AND_IGNORE:
+ if (v3_dev_hook_io(dev, i,
+ &generic_read_port_print_and_ignore,
+ &generic_write_port_print_and_ignore) == -1) {
+ PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i);
+ return -1;
+ }
+ break;
+ case GENERIC_PASSTHROUGH:
+ if (v3_dev_hook_io(dev, i,
+ &generic_read_port_passthrough,
+ &generic_write_port_passthrough) == -1) {
+ PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i);
+ return -1;
+ }
+ break;
+ case GENERIC_IGNORE:
+ if (v3_dev_hook_io(dev, i,
+ &generic_read_port_ignore,
+ &generic_write_port_ignore) == -1) {
+ PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i);
+ return -1;
+ }
+ break;
+ default:
+ PrintError("generic (%s): huh?\n", state->name);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+static int add_mem_range(struct vm_device * dev, addr_t start, addr_t end, generic_mode_t mode) {
+
+ struct generic_internal *state = (struct generic_internal *) dev->private_data;
+
+ PrintDebug("generic (%s): adding memory range 0x%p to 0x%p as %s\n", state->name,
+ (void*)start, (void*)end,
+ (mode == GENERIC_PRINT_AND_PASSTHROUGH) ? "print-and-passthrough" :
+ (mode == GENERIC_PRINT_AND_IGNORE) ? "print-and-ignore" :
+ (mode == GENERIC_PASSTHROUGH) ? "passthrough" :
+ (mode == GENERIC_IGNORE) ? "ignore" : "UNKNOWN");
+
+ switch (mode) {
+ case GENERIC_PRINT_AND_PASSTHROUGH:
+ if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1,
+ &generic_read_mem_print_and_passthrough,
+ &generic_write_mem_print_and_passthrough, dev) == -1) {
+ PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end);
return -1;
}
- } else if (mode == GENERIC_PRINT_AND_IGNORE) {
- if (v3_dev_hook_io(dev, i,
- &generic_read_port_print_and_ignore,
- &generic_write_port_print_and_ignore) == -1) {
- PrintError("generic: can't hook port 0x%x (already hooked?)\n", i);
+ break;
+
+ case GENERIC_PRINT_AND_IGNORE:
+ if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1,
+ &generic_read_mem_print_and_ignore,
+ &generic_write_mem_print_and_ignore, dev) == -1) {
+ PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end);
return -1;
}
- } else if (mode == GENERIC_PASSTHROUGH) {
- if (v3_dev_hook_io(dev, i,
- &generic_read_port_passthrough,
- &generic_write_port_passthrough) == -1) {
- PrintError("generic: can't hook port 0x%x (already hooked?)\n", i);
+ break;
+
+ case GENERIC_PASSTHROUGH:
+ if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1,
+ &generic_read_mem_passthrough,
+ &generic_write_mem_passthrough, dev) == -1) {
+ PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end);
return -1;
}
- } else if (mode == GENERIC_IGNORE) {
- if (v3_dev_hook_io(dev, i,
- &generic_read_port_ignore,
- &generic_write_port_ignore) == -1) {
- PrintError("generic: can't hook port 0x%x (already hooked?)\n", i);
+ break;
+
+ case GENERIC_IGNORE:
+ if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1,
+ &generic_read_mem_ignore,
+ &generic_write_mem_ignore, dev) == -1) {
+ PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end);
return -1;
}
- }
+ break;
+ default:
+ PrintError("generic (%s): huh?\n",state->name);
+ break;
}
-
+
return 0;
}
+/*
+ The device can be used to forward to the underlying physical device
+ or to a host device that has a given url. Both memory and ports can be forwarded as
+
+ GENERIC_PASSTHROUGH => send writes and reads to physical device or host
+ GENERIC_PRINT_AND_PASSTHROUGH => also print what it's doing
+
+ GENERIC_IGNORE => ignore writes and reads
+ GENERIC_PRINT_AND_PASSTHROUGH => also print what it's doing
+
+
+ The purpose of the "PRINT" variants is to make it easy to spy on
+ device interactions (although you will not see DMA or interrupts)
+
+ <device class="generic" id="my_id"
+ empty | forward="physical_device" or forward="host_device" host_device="url">
+
+ (empty implies physical_dev)
+
+ <ports>
+ <start>portno1</start>
+ <end>portno2</end> => portno1 through portno2 (inclusive)
+ <mode>PRINT_AND_PASSTHROUGH</mode> (as above)
+ </ports>
+
+ <memory>
+ <start>gpa1</start>
+ <end>gpa2</end> => memory addreses gpa1 through gpa2 (inclusive); page granularity
+ <mode> ... as above </mode>
+ </memory>
+
+*/
static int generic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
struct generic_internal * state = NULL;
char * dev_id = v3_cfg_val(cfg, "ID");
+ char * forward = v3_cfg_val(cfg, "forward");
+#ifdef CONFIG_HOST_DEVICE
+ char * host_dev = v3_cfg_val(cfg, "hostdev");
+#endif
v3_cfg_tree_t * port_cfg = v3_cfg_subtree(cfg, "ports");
+ v3_cfg_tree_t * mem_cfg = v3_cfg_subtree(cfg, "memory");
state = (struct generic_internal *)V3_Malloc(sizeof(struct generic_internal));
if (state == NULL) {
- PrintError("Could not allocate generic state\n");
+ PrintError("generic (%s): could not allocate generic state\n",dev_id);
return -1;
}
memset(state, 0, sizeof(struct generic_internal));
+ strncpy(state->name,dev_id,MAX_NAME);
+
+ if (!forward) {
+ state->forward_type=GENERIC_PHYSICAL;
+ } else {
+ if (!strcasecmp(forward,"physical_device")) {
+ state->forward_type=GENERIC_PHYSICAL;
+ } else if (!strcasecmp(forward,"host_device")) {
+#ifdef CONFIG_HOST_DEVICE
+ state->forward_type=GENERIC_HOST;
+#else
+ PrintError("generic (%s): cannot configure host device since host device support is not built in\n", state->name);
+ V3_Free(state);
+ return -1;
+#endif
+ } else {
+ PrintError("generic (%s): unknown forwarding type \"%s\"\n", state->name, forward);
+ V3_Free(state);
+ return -1;
+ }
+ }
struct vm_device * dev = v3_add_device(vm, dev_id, &dev_ops, state);
if (dev == NULL) {
- PrintError("Could not attach device %s\n", dev_id);
+ PrintError("generic: could not attach device %s\n", state->name);
V3_Free(state);
return -1;
}
- PrintDebug("generic: init_device\n");
+ state->dev=dev;
+
+
+#ifdef CONFIG_HOST_DEVICE
+ if (state->forward_type==GENERIC_HOST) {
+ if (!host_dev) {
+ PrintError("generic (%s): host forwarding requested, but no host device given\n", state->name);
+ v3_remove_device(dev);
+ return -1;
+ } else {
+ state->host_dev = v3_host_dev_open(host_dev,V3_BUS_CLASS_DIRECT,dev,vm);
+ if (!(state->host_dev)) {
+ PrintError("generic (%s): unable to open host device \"%s\"\n", state->name,host_dev);
+ v3_remove_device(dev);
+ return -1;
+ } else {
+ PrintDebug("generic (%s): successfully attached host device \"%s\"\n", state->name,host_dev);
+ }
+ }
+ }
+#endif
+
+ PrintDebug("generic (%s): init_device\n", state->name);
// scan port list....
while (port_cfg) {
uint16_t end = atox(v3_cfg_val(port_cfg, "end"));
char * mode_str = v3_cfg_val(port_cfg, "mode");
generic_mode_t mode = GENERIC_IGNORE;
-
if (strcasecmp(mode_str, "print_and_ignore") == 0) {
mode = GENERIC_PRINT_AND_IGNORE;
} else if (strcasecmp(mode_str, "print_and_passthrough") == 0) {
} else if (strcasecmp(mode_str, "ignore") == 0) {
mode = GENERIC_IGNORE;
} else {
- PrintError("Invalid Mode %s\n", mode_str);
+ PrintError("generic (%s): invalid mode %s in adding ports\n", state->name, mode_str);
v3_remove_device(dev);
return -1;
}
+
if (add_port_range(dev, start, end, mode) == -1) {
- PrintError("Could not add port range %d-%d\n", start, end);
+ PrintError("generic (%s): could not add port range 0x%x to 0x%x\n", state->name, start, end);
v3_remove_device(dev);
return -1;
}
port_cfg = v3_cfg_next_branch(port_cfg);
}
+ // scan memory list....
+ while (mem_cfg) {
+ addr_t start = atox(v3_cfg_val(mem_cfg, "start"));
+ addr_t end = atox(v3_cfg_val(mem_cfg, "end"));
+ char * mode_str = v3_cfg_val(mem_cfg, "mode");
+ generic_mode_t mode = GENERIC_IGNORE;
+
+ if (strcasecmp(mode_str, "print_and_ignore") == 0) {
+ mode = GENERIC_PRINT_AND_IGNORE;
+ } else if (strcasecmp(mode_str, "print_and_passthrough") == 0) {
+ mode = GENERIC_PRINT_AND_PASSTHROUGH;
+ } else if (strcasecmp(mode_str, "passthrough") == 0) {
+ mode = GENERIC_PASSTHROUGH;
+ } else if (strcasecmp(mode_str, "ignore") == 0) {
+ mode = GENERIC_IGNORE;
+ } else {
+ PrintError("generic (%s): invalid mode %s for adding memory\n", state->name, mode_str);
+ v3_remove_device(dev);
+ return -1;
+ }
+
+ if (state->num_mem_hooks>=MAX_MEM_HOOKS) {
+ PrintError("generic (%s): cannot add another memory hook (increase MAX_MEM_HOOKS)\n", state->name);
+ v3_remove_device(dev);
+ return -1;
+ }
+
+ if (add_mem_range(dev, start, end, mode) == -1) {
+ PrintError("generic (%s): could not add memory range 0x%p to 0x%p\n", state->name, (void*)start, (void*)end);
+ v3_remove_device(dev);
+ return -1;
+ }
+
+ state->mem_hook[state->num_mem_hooks] = start;
+ state->num_mem_hooks++;
+
+ mem_cfg = v3_cfg_next_branch(port_cfg);
+ }
+
+ PrintDebug("generic (%s): initialization complete\n", state->name);
return 0;
}
#define PrintDebug(fmt, args...)
#endif
+#define TX_QUEUE_SIZE 4096
+#define RX_QUEUE_SIZE 4096
+#define CTRL_QUEUE_SIZE 64
+
+/* The feature bitmap for virtio nic
+ * from Linux */
+#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */
+#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
+#define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */
+#define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */
+#define VIRTIO_NET_F_STATUS 16 /* virtio_net_config.status available */
+
+/* Port to get virtio config */
+#define VIRTIO_NET_CONFIG 20
+
#define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10))
+/* for gso_type in virtio_net_hdr */
+#define VIRTIO_NET_HDR_GSO_NONE 0
+#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
+#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */
+#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */
-struct virtio_net_hdr {
- uint8_t flags;
-
- uint8_t gso_type;
- uint16_t hdr_len; /* Ethernet + IP + tcp/udp hdrs */
- uint16_t gso_size; /* Bytes to append to hdr_len per frame */
- uint16_t csum_start; /* Position to start checksumming from */
- uint16_t csum_offset; /* Offset after that to place checksum */
-}__attribute__((packed));
+/* for flags in virtio_net_hdr */
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
-struct virtio_net_hdr_mrg_rxbuf {
- struct virtio_net_hdr hdr;
- uint16_t num_buffers; /* Number of merged rx buffers */
-};
-
-#define TX_QUEUE_SIZE 256
-#define RX_QUEUE_SIZE 4096
-#define CTRL_QUEUE_SIZE 64
+/* First element of the scatter-gather list, used with GSO or CSUM features */
+struct virtio_net_hdr
+{
+ uint8_t flags;
+ uint8_t gso_type;
+ uint16_t hdr_len; /* Ethernet + IP + tcp/udp hdrs */
+ uint16_t gso_size; /* Bytes to append to hdr_len per frame */
+ uint16_t csum_start; /* Position to start checksumming from */
+ uint16_t csum_offset; /* Offset after that to place checksum */
+}__attribute__((packed));
-#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */
-#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
-#define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */
-#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */
-#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */
-/* Port to get virtio config */
-#define VIRTIO_NET_CONFIG 20
+/* The header to use when the MRG_RXBUF
+ * feature has been negotiated. */
+struct virtio_net_hdr_mrg_rxbuf {
+ struct virtio_net_hdr hdr;
+ uint16_t num_buffers; /* Number of merged rx buffers */
+};
struct virtio_net_config
{
struct virtio_net_config net_cfg;
struct virtio_config virtio_cfg;
+ struct v3_vm_info * vm;
struct vm_device * dev;
struct pci_device * pci_dev;
int io_range_size;
struct virtio_queue ctrl_vq; /* idx 2*/
struct v3_timer * timer;
+ void * poll_thread;
- struct nic_statistics statistics;
+ struct nic_statistics stats;
struct v3_dev_net_ops * net_ops;
v3_lock_t rx_lock, tx_lock;
uint8_t tx_notify, rx_notify;
uint32_t tx_pkts, rx_pkts;
- uint64_t past_ms;
+ uint64_t past_us;
void * backend_data;
struct virtio_dev_state * virtio_dev;
struct list_head dev_link;
};
+
static int virtio_init_state(struct virtio_net_state * virtio)
{
virtio->rx_vq.queue_size = RX_QUEUE_SIZE;
virtio->virtio_cfg.pci_isr = 0;
- virtio->virtio_cfg.host_features = 0 | (1 << VIRTIO_NET_F_MAC) |
- (1 << VIRTIO_NET_F_HOST_UFO) |
- (1 << VIRTIO_NET_F_HOST_TSO4);
+ virtio->virtio_cfg.host_features = 0 | (1 << VIRTIO_NET_F_MAC);
+ // (1 << VIRTIO_NET_F_GSO) |
+ // (1 << VIRTIO_NET_F_HOST_UFO) |
+ // (1 << VIRTIO_NET_F_HOST_TSO4);
if ((v3_lock_init(&(virtio->rx_lock)) == -1) ||
(v3_lock_init(&(virtio->tx_lock)) == -1)){
}
static int tx_one_pkt(struct guest_info * core,
- struct virtio_net_state * virtio,
- struct vring_desc * buf_desc)
+ struct virtio_net_state * virtio,
+ struct vring_desc * buf_desc)
{
uint8_t * buf = NULL;
uint32_t len = buf_desc->length;
+ int synchronize = 1; // (virtio->tx_notify == 1)?1:0;
if (v3_gpa_to_hva(core, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) {
PrintDebug("Could not translate buffer address\n");
return -1;
}
- if(virtio->net_ops->send(buf, len, virtio->backend_data) >= 0){
- virtio->statistics.tx_pkts ++;
- virtio->statistics.tx_bytes += len;
+ V3_Net_Print(2, "Virtio-NIC: virtio_tx: size: %d\n", len);
+ if(v3_net_debug >= 4){
+ v3_hexdump(buf, len, NULL, 0);
+ }
- return 0;
+ if(virtio->net_ops->send(buf, len, synchronize, virtio->backend_data) < 0){
+ virtio->stats.tx_dropped ++;
+ return -1;
}
- virtio->statistics.tx_dropped ++;
+ virtio->stats.tx_pkts ++;
+ virtio->stats.tx_bytes += len;
- return -1;
+ return 0;
}
-static int
-copy_data_to_desc(struct guest_info * core,
+static inline int copy_data_to_desc(struct guest_info * core,
struct virtio_net_state * virtio_state,
struct vring_desc * desc,
uchar_t * buf,
uint8_t * desc_buf = NULL;
if (v3_gpa_to_hva(core, desc->addr_gpa, (addr_t *)&(desc_buf)) == -1) {
- PrintError("Could not translate buffer address\n");
+ PrintDebug("Could not translate buffer address\n");
return -1;
}
len = (desc->length < buf_len)?(desc->length - offset):buf_len;
}
-static int get_desc_count(struct virtio_queue * q, int index) {
+static inline int get_desc_count(struct virtio_queue * q, int index) {
struct vring_desc * tmp_desc = &(q->desc[index]);
int cnt = 1;
queue->used->flags |= VRING_NO_NOTIFY_FLAG;
}
-
-/* interrupt the guest, so the guest core get EXIT to Palacios */
-static inline void notify_guest(struct virtio_net_state * virtio){
- v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0);
-}
-
-
-/* guest free some pkts for rx queue */
-static int handle_rx_queue_kick(struct guest_info * core,
- struct virtio_net_state * virtio)
-{
- return 0;
-}
-
-
-static int handle_ctrl(struct guest_info * core,
- struct virtio_net_state * virtio) {
-
- return 0;
-}
-
static int handle_pkt_tx(struct guest_info * core,
struct virtio_net_state * virtio_state)
{
struct virtio_queue *q = &(virtio_state->tx_vq);
- struct virtio_net_hdr *hdr = NULL;
int txed = 0;
unsigned long flags;
flags = v3_lock_irqsave(virtio_state->tx_lock);
while (q->cur_avail_idx != q->avail->index) {
+ struct virtio_net_hdr *hdr = NULL;
struct vring_desc * hdr_desc = NULL;
addr_t hdr_addr = 0;
uint16_t desc_idx = q->avail->ring[q->cur_avail_idx % q->queue_size];
int desc_cnt = get_desc_count(q, desc_idx);
- uint32_t req_len = 0;
- int i = 0;
+
+ if(desc_cnt > 2){
+ PrintError("VNIC: merged rx buffer not supported, desc_cnt %d\n", desc_cnt);
+ goto exit_error;
+ }
hdr_desc = &(q->desc[desc_idx]);
if (v3_gpa_to_hva(core, hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
hdr = (struct virtio_net_hdr *)hdr_addr;
desc_idx = hdr_desc->next;
- if(desc_cnt > 2){
- PrintError("VNIC: merged rx buffer not supported, desc_cnt %d\n", desc_cnt);
+ /* here we assumed that one ethernet pkt is not splitted into multiple buffer */
+ struct vring_desc * buf_desc = &(q->desc[desc_idx]);
+ if (tx_one_pkt(core, virtio_state, buf_desc) == -1) {
+ PrintError("Virtio NIC: Error handling nic operation\n");
goto exit_error;
}
-
- /* here we assumed that one ethernet pkt is not splitted into multiple virtio buffer */
- for (i = 0; i < desc_cnt - 1; i++) {
- struct vring_desc * buf_desc = &(q->desc[desc_idx]);
- if (tx_one_pkt(core, virtio_state, buf_desc) == -1) {
- PrintError("Error handling nic operation\n");
- goto exit_error;
- }
-
- req_len += buf_desc->length;
- desc_idx = buf_desc->next;
- }
-
+
q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size];
- q->used->ring[q->used->index % q->queue_size].length = req_len; /* What do we set this to???? */
+ q->used->ring[q->used->index % q->queue_size].length = buf_desc->length; /* What do we set this to???? */
q->used->index ++;
q->cur_avail_idx ++;
}
v3_unlock_irqrestore(virtio_state->tx_lock, flags);
-
+
+ //virtio_state->virtio_cfg.pci_isr == 0 &&
if (txed && !(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
v3_pci_raise_irq(virtio_state->virtio_dev->pci_bus, 0, virtio_state->pci_dev);
virtio_state->virtio_cfg.pci_isr = 0x1;
- virtio_state->statistics.interrupts ++;
+ virtio_state->stats.rx_interrupts ++;
+ }
+
+ if(txed > 0) {
+ V3_Net_Print(2, "Virtio Handle TX: txed pkts: %d\n", txed);
}
return 0;
break;
case 1:
virtio_setup_queue(core, virtio, &virtio->tx_vq, pfn, page_addr);
+ if(virtio->tx_notify == 0){
+ disable_cb(&virtio->tx_vq);
+ V3_THREAD_WAKEUP(virtio->poll_thread);
+ }
break;
case 2:
virtio_setup_queue(core, virtio, &virtio->ctrl_vq, pfn, page_addr);
{
uint16_t queue_idx = *(uint16_t *)src;
if (queue_idx == 0){
- if(handle_rx_queue_kick(core, virtio) == -1){
- PrintError("Could not handle Virtio NIC rx kick\n");
- return -1;
- }
+ /* receive queue refill */
+ virtio->stats.tx_interrupts ++;
} else if (queue_idx == 1){
if (handle_pkt_tx(core, virtio) == -1) {
PrintError("Could not handle Virtio NIC tx kick\n");
return -1;
}
+ virtio->stats.tx_interrupts ++;
} else if (queue_idx == 2){
- if (handle_ctrl(core, virtio) == -1) {
- PrintError("Could not handle Virtio NIC ctrl kick\n");
- return -1;
- }
+ /* ctrl */
} else {
PrintError("Wrong queue index %d\n", queue_idx);
}
case HOST_FEATURES_PORT:
if (length != 4) {
PrintError("Illegal read length for host features\n");
- return -1;
+ //return -1;
}
*(uint32_t *)dst = virtio->virtio_cfg.host_features;
break;
struct virtio_net_hdr_mrg_rxbuf hdr;
uint32_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
uint32_t data_len;
- uint32_t offset = 0;
+ //uint32_t offset = 0;
unsigned long flags;
-#ifdef CONFIG_DEBUG_VIRTIO_NET
- PrintDebug("Virtio-NIC: virtio_rx: size: %d\n", size);
- v3_hexdump(buf, size, NULL, 0);
-#endif
+ V3_Net_Print(2, "Virtio-NIC: virtio_rx: size: %d\n", size);
+ if(v3_net_debug >= 4){
+ v3_hexdump(buf, size, NULL, 0);
+ }
flags = v3_lock_irqsave(virtio->rx_lock);
memset(&hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
if (q->ring_avail_addr == 0) {
- PrintDebug("Queue is not set\n");
+ V3_Net_Print(2, "Virtio NIC: RX Queue not set\n");
+ virtio->stats.rx_dropped ++;
goto err_exit;
}
if (q->cur_avail_idx != q->avail->index){
addr_t hdr_addr = 0;
- uint16_t hdr_idx = q->avail->ring[q->cur_avail_idx % q->queue_size];
uint16_t buf_idx = 0;
+ uint16_t hdr_idx = q->avail->ring[q->cur_avail_idx % q->queue_size];
struct vring_desc * hdr_desc = NULL;
+ struct vring_desc * buf_desc = NULL;
+ uint32_t len;
hdr_desc = &(q->desc[hdr_idx]);
if (v3_gpa_to_hva(&(virtio->virtio_dev->vm->cores[0]), hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
- PrintDebug("Could not translate receive buffer address\n");
+ V3_Net_Print(2, "Virtio NIC: Could not translate receive buffer address\n");
+ virtio->stats.rx_dropped ++;
goto err_exit;
}
- hdr.num_buffers = 1;
- memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf));
- if (offset >= data_len) {
- hdr_desc->flags &= ~VIRTIO_NEXT_FLAG;
- }
- struct vring_desc * buf_desc = NULL;
- for (buf_idx = hdr_desc->next; offset < data_len; buf_idx = q->desc[hdr_idx].next) {
+#if 0 /* merged buffer */
+ for(buf_idx = hdr_desc->next; offset < data_len; buf_idx = q->desc[hdr_idx].next) {
uint32_t len = 0;
buf_desc = &(q->desc[buf_idx]);
buf_desc->flags = VIRTIO_NEXT_FLAG;
}
buf_desc->length = len;
+ hdr.num_buffers ++;
}
buf_desc->flags &= ~VIRTIO_NEXT_FLAG;
-
+ memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf));
+#endif
+
+ hdr.num_buffers = 1;
+ memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf));
+ if (data_len == 0) {
+ hdr_desc->flags &= ~VIRTIO_NEXT_FLAG;
+ }
+
+ buf_idx = hdr_desc->next;
+ buf_desc = &(q->desc[buf_idx]);
+ len = copy_data_to_desc(&(virtio->virtio_dev->vm->cores[0]), virtio, buf_desc, buf, data_len, 0);
+ if (len < data_len) {
+ V3_Net_Print(2, "Virtio NIC: ring buffer len less than pkt size, merged buffer not supported\n");
+ virtio->stats.rx_dropped ++;
+
+ goto err_exit;
+ }
+ buf_desc->flags &= ~VIRTIO_NEXT_FLAG;
+
q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size];
q->used->ring[q->used->index % q->queue_size].length = data_len + hdr_len; /* This should be the total length of data sent to guest (header+pkt_data) */
q->used->index++;
q->cur_avail_idx++;
- virtio->statistics.rx_pkts ++;
- virtio->statistics.rx_bytes += size;
+ virtio->stats.rx_pkts ++;
+ virtio->stats.rx_bytes += size;
} else {
- virtio->statistics.rx_dropped ++;
+ V3_Net_Print(2, "Virtio NIC: Guest RX queue is full\n");
+ virtio->stats.rx_dropped ++;
+
+ /* kick guest to refill the queue */
+ virtio->virtio_cfg.pci_isr = 0x1;
+ v3_pci_raise_irq(virtio->virtio_dev->pci_bus, 0, virtio->pci_dev);
+ v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0);
+ virtio->stats.rx_interrupts ++;
goto err_exit;
}
+ V3_Net_Print(2, "pci_isr %d, virtio flags %d\n", virtio->virtio_cfg.pci_isr, q->avail->flags);
+ //virtio->virtio_cfg.pci_isr == 0 &&
+
if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
- PrintDebug("Raising IRQ %d\n", virtio->pci_dev->config_header.intr_line);
-
+ V3_Net_Print(2, "Raising IRQ %d\n", virtio->pci_dev->config_header.intr_line);
+
+ virtio->virtio_cfg.pci_isr = 0x1;
v3_pci_raise_irq(virtio->virtio_dev->pci_bus, 0, virtio->pci_dev);
- virtio->virtio_cfg.pci_isr = 0x1;
- virtio->statistics.interrupts ++;
+
+ virtio->stats.rx_interrupts ++;
}
v3_unlock_irqrestore(virtio->rx_lock, flags);
- /* notify guest if guest is running */
+ /* notify guest if it is in guest mode */
+ /* ISSUE: What is gonna happen if guest thread is running on the same core as this thread? */
if(virtio->rx_notify == 1){
v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0);
}
};
-static void virtio_nic_poll(struct v3_vm_info * vm, int budget, void * data){
- struct virtio_net_state * virtio = (struct virtio_net_state *)data;
+static int virtio_tx_flush(void * args){
+ struct virtio_net_state *virtio = (struct virtio_net_state *)args;
+
+ V3_Print("Virtio TX Poll Thread Starting for %s\n", virtio->vm->name);
- if(virtio->tx_notify == 0){
- handle_pkt_tx(&(vm->cores[0]), virtio);
+ while(1){
+ if(virtio->tx_notify == 0){
+ handle_pkt_tx(&(virtio->vm->cores[0]), virtio);
+ v3_yield(NULL);
+ }else {
+ V3_THREAD_SLEEP();
+ }
}
+
+ return 0;
}
static int register_dev(struct virtio_dev_state * virtio,
#define RATE_UPPER_THRESHOLD 10 /* 10000 pkts per second, around 100Mbits */
#define RATE_LOWER_THRESHOLD 1
-#define PROFILE_PERIOD 50 /*50ms*/
+#define PROFILE_PERIOD 10000 /*us*/
-/* Timer Functions */
static void virtio_nic_timer(struct guest_info * core,
uint64_t cpu_cycles, uint64_t cpu_freq,
void * priv_data) {
struct virtio_net_state * net_state = (struct virtio_net_state *)priv_data;
- uint64_t period_ms;
+ uint64_t period_us;
+ static int profile_ms = 0;
- period_ms = cpu_cycles/cpu_freq;
- net_state->past_ms += period_ms;
+ period_us = (1000*cpu_cycles)/cpu_freq;
+ net_state->past_us += period_us;
- if(net_state->past_ms > PROFILE_PERIOD){
+#if 0
+ if(net_state->past_us > PROFILE_PERIOD){
uint32_t tx_rate, rx_rate;
- tx_rate = (net_state->statistics.tx_pkts - net_state->tx_pkts)/net_state->past_ms; /* pkts/per ms */
- rx_rate = (net_state->statistics.rx_pkts - net_state->rx_pkts)/net_state->past_ms;
+ tx_rate = (net_state->stats.tx_pkts - net_state->tx_pkts)/(net_state->past_us/1000); /* pkts/per ms */
+ rx_rate = (net_state->stats.rx_pkts - net_state->rx_pkts)/(net_state->past_us/1000);
- net_state->tx_pkts = net_state->statistics.tx_pkts;
- net_state->rx_pkts = net_state->statistics.rx_pkts;
+ net_state->tx_pkts = net_state->stats.tx_pkts;
+ net_state->rx_pkts = net_state->stats.rx_pkts;
if(tx_rate > RATE_UPPER_THRESHOLD && net_state->tx_notify == 1){
V3_Print("Virtio NIC: Switch TX to VMM driven mode\n");
disable_cb(&(net_state->tx_vq));
net_state->tx_notify = 0;
+ V3_THREAD_WAKEUP(net_state->poll_thread);
}
if(tx_rate < RATE_LOWER_THRESHOLD && net_state->tx_notify == 0){
}
if(rx_rate > RATE_UPPER_THRESHOLD && net_state->rx_notify == 1){
- PrintDebug("Virtio NIC: Switch RX to VMM None notify mode\n");
+ V3_Print("Virtio NIC: Switch RX to VMM None notify mode\n");
net_state->rx_notify = 0;
}
if(rx_rate < RATE_LOWER_THRESHOLD && net_state->rx_notify == 0){
- PrintDebug("Virtio NIC: Switch RX to VMM notify mode\n");
+ V3_Print("Virtio NIC: Switch RX to VMM notify mode\n");
net_state->rx_notify = 1;
}
- net_state->past_ms = 0;
+ net_state->past_us = 0;
}
-}
+#endif
+ profile_ms += period_us/1000;
+ if(profile_ms > 20000){
+ V3_Net_Print(1, "Virtio NIC: TX: Pkt: %lld, Bytes: %lld\n\t\tRX Pkt: %lld. Bytes: %lld\n\t\tDropped: tx %lld, rx %lld\nInterrupts: tx %d, rx %d\nTotal Exit: %lld\n",
+ net_state->stats.tx_pkts, net_state->stats.tx_bytes,
+ net_state->stats.rx_pkts, net_state->stats.rx_bytes,
+ net_state->stats.tx_dropped, net_state->stats.rx_dropped,
+ net_state->stats.tx_interrupts, net_state->stats.rx_interrupts,
+ net_state->vm->cores[0].num_exits);
+ profile_ms = 0;
+ }
+}
static struct v3_timer_ops timer_ops = {
.update_timer = virtio_nic_timer,
memset(net_state, 0, sizeof(struct virtio_net_state));
register_dev(virtio, net_state);
+ net_state->vm = info;
net_state->net_ops = ops;
net_state->backend_data = private_data;
net_state->virtio_dev = virtio;
- net_state->tx_notify = 1;
- net_state->rx_notify = 1;
-
+ net_state->tx_notify = 0;
+ net_state->rx_notify = 0;
+
net_state->timer = v3_add_timer(&(info->cores[0]),&timer_ops,net_state);
ops->recv = virtio_rx;
- ops->poll = virtio_nic_poll;
ops->frontend_data = net_state;
memcpy(ops->fnt_mac, virtio->mac, ETH_ALEN);
+ net_state->poll_thread = V3_CREATE_THREAD(virtio_tx_flush, (void *)net_state, "Virtio_Poll");
+
return 0;
}
if (macstr != NULL && !str2mac(macstr, virtio_state->mac)) {
PrintDebug("Virtio NIC: Mac specified %s\n", macstr);
- PrintDebug("MAC: %x:%x:%x:%x:%x:%x\n", virtio_state->mac[0],
- virtio_state->mac[1],
- virtio_state->mac[2],
- virtio_state->mac[3],
- virtio_state->mac[4],
- virtio_state->mac[5]);
}else {
- PrintDebug("Virtio NIC: MAC not specified\n");
random_ethaddr(virtio_state->mac);
}
memcpy(pkt.header, virtio_pkt->pkt, ETHERNET_HEADER_LEN);
pkt.data = virtio_pkt->pkt;
- v3_vnet_send_pkt(&pkt, NULL);
+ v3_vnet_send_pkt(&pkt, NULL, 1);
q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size];
q->used->ring[q->used->index % q->queue_size].length = pkt_desc->length; // What do we set this to????
};
static int bridge_send(uint8_t * buf, uint32_t len,
+ int synchronize,
void * private_data) {
#ifdef CONFIG_DEBUG_NIC_BRIDGE
uint64_t period_us;
- period_us = (1000000*cpu_cycles/cpu_freq);
+ // cpu freq in khz
+ period_us = (1000*cpu_cycles/cpu_freq);
update_time(nvram_state,period_us);
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Peter Dinda <pdinda@northwestern.edu>
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Authors:
+ * Peter Dinda <pdinda@northwestern.edu> (PCI front device forwarding to host dev interface)
+ * Jack Lange <jarusl@cs.northwestern.edu> (original PCI passthrough to physical hardware)
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+/*
+ This is front-end PCI device intended to be used together with the
+ host device interface and a *virtual* PCI device implementation in
+ the host OS. It makes it possible to project such a virtual device
+ into the guest as a PCI device. It's based on the PCI passthrough
+ device, which projects *physical* PCI devices into the guest.
+
+ If you need to project a non-PCI host-based virtual or physical
+ device into the guest, you should use the generic device.
+
+*/
+
+/*
+ * The basic idea is that we do not change the hardware PCI configuration
+ * Instead we modify the guest environment to map onto the physical configuration
+ *
+ * The pci subsystem handles most of the configuration space, except for the bar registers.
+ * We handle them here, by either letting them go directly to hardware or remapping through virtual hooks
+ *
+ * Memory Bars are always remapped via the shadow map,
+ * IO Bars are selectively remapped through hooks if the guest changes them
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_dev_mgr.h>
+#include <palacios/vmm_sprintf.h>
+#include <palacios/vmm_lowlevel.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_symspy.h>
+
+#include <devices/pci.h>
+#include <devices/pci_types.h>
+
+#include <interfaces/vmm_host_dev.h>
+
+
+#ifndef CONFIG_DEBUG_PCI_FRONT
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+
+// Our own address in PCI-land
+union pci_addr_reg {
+ uint32_t value;
+ struct {
+ uint_t rsvd1 : 2;
+ uint_t reg : 6;
+ uint_t func : 3;
+ uint_t dev : 5;
+ uint_t bus : 8;
+ uint_t rsvd2 : 7;
+ uint_t enable : 1;
+ } __attribute__((packed));
+} __attribute__((packed));
+
+
+// identical to PCI passthrough device
+typedef enum { PT_BAR_NONE,
+ PT_BAR_IO,
+ PT_BAR_MEM32,
+ PT_BAR_MEM24,
+ PT_BAR_MEM64_LO,
+ PT_BAR_MEM64_HI,
+ PT_EXP_ROM } pt_bar_type_t;
+
+// identical to PCI passthrough device
+struct pt_bar {
+ uint32_t size;
+ pt_bar_type_t type;
+
+ /* We store 64 bit memory bar addresses in the high BAR
+ * because they are the last to be updated
+ * This means that the addr field must be 64 bits
+ */
+ uint64_t addr;
+
+ uint32_t val;
+};
+
+
+
+
+struct pci_front_internal {
+ // this is our local cache of what the host device has
+ union {
+ uint8_t config_space[256];
+ struct pci_config_header real_hdr;
+ } __attribute__((packed));
+
+ // We do need a representation of the bars
+ // since we need to be made aware when they are written
+ // so that we can change the hooks.
+ //
+ // We assume here that the PCI subsystem, on a bar write
+ // will first send us a config_update, which we forward to
+ // the host dev. Then it will send us a bar update
+ // which we will use to rehook the device
+ //
+ struct pt_bar bars[6]; // our bars (for update purposes)
+ //
+ // Currently unsupported
+ //
+ //struct pt_bar exp_rom; // and exp ram areas of the config space, above
+
+ struct vm_device *pci_bus; // what bus we are attached to
+ struct pci_device *pci_dev; // our representation as a registered PCI device
+
+ union pci_addr_reg pci_addr; // our pci address
+
+ char name[32];
+
+ v3_host_dev_t host_dev; // the actual implementation
+};
+
+
+
+/*
+static int push_config(struct pci_front_internal *state, uint8_t *config)
+{
+ if (v3_host_dev_config_write(state->host_dev, 0, config, 256) != 256) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
+*/
+
+static int pull_config(struct pci_front_internal *state, uint8_t *config)
+{
+ if (v3_host_dev_read_config(state->host_dev, 0, config, 256) != 256) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
+
+
+static int pci_front_read_mem(struct guest_info * core,
+ addr_t gpa,
+ void * dst,
+ uint_t len,
+ void * priv)
+{
+ int i;
+ int rc;
+ struct vm_device *dev = (struct vm_device *) priv;
+ struct pci_front_internal *state = (struct pci_front_internal *) dev->private_data;
+
+ PrintDebug("pci_front (%s): reading 0x%x bytes from gpa 0x%p from host dev 0x%p ...",
+ state->name, len, (void*)gpa, state->host_dev);
+
+ rc = v3_host_dev_read_mem(state->host_dev, gpa, dst, len);
+
+ PrintDebug(" done ... read %d bytes: 0x", rc);
+
+ for (i = 0; i < rc; i++) {
+ PrintDebug("%x", ((uint8_t *)dst)[i]);
+ }
+
+ PrintDebug("\n");
+
+ return rc;
+}
+
+static int pci_front_write_mem(struct guest_info * core,
+ addr_t gpa,
+ void * src,
+ uint_t len,
+ void * priv)
+{
+ int i;
+ int rc;
+ struct vm_device *dev = (struct vm_device *) priv;
+ struct pci_front_internal *state = (struct pci_front_internal *) dev->private_data;
+
+ PrintDebug("pci_front (%s): writing 0x%x bytes to gpa 0x%p to host dev 0x%p bytes=0x",
+ state->name, len, (void*)gpa, state->host_dev);
+
+ for (i = 0; i < len; i++) {
+ PrintDebug("%x", ((uint8_t *)src)[i]);
+ }
+
+ rc = v3_host_dev_write_mem(state->host_dev, gpa, src, len);
+
+ PrintDebug(" %d bytes written\n",rc);
+
+ return rc;
+}
+
+
+static int pci_front_read_port(struct guest_info * core,
+ uint16_t port,
+ void * dst,
+ uint_t len,
+ void * priv_data)
+{
+ int i;
+ struct pci_front_internal *state = (struct pci_front_internal *) priv_data;
+
+ PrintDebug("pci_front (%s): reading 0x%x bytes from port 0x%x from host dev 0x%p ...",
+ state->name, len, port, state->host_dev);
+
+ int rc = v3_host_dev_read_io(state->host_dev, port, dst, len);
+
+ PrintDebug(" done ... read %d bytes: 0x", rc);
+
+ for (i = 0; i < rc; i++) {
+ PrintDebug("%x", ((uint8_t *)dst)[i]);
+ }
+
+ PrintDebug("\n");
+
+ return rc;
+
+}
+
+static int pci_front_write_port(struct guest_info * core,
+ uint16_t port,
+ void * src,
+ uint_t len,
+ void * priv_data)
+{
+ int i;
+ struct pci_front_internal *state = (struct pci_front_internal *) priv_data;
+
+ PrintDebug("pci_front (%s): writing 0x%x bytes to port 0x%x to host dev 0x%p bytes=0x",
+ state->name, len, port, state->host_dev);
+
+ for (i = 0; i < len; i++) {
+ PrintDebug("%x", ((uint8_t *)src)[i]);
+ }
+
+ int rc = v3_host_dev_write_io(state->host_dev, port, src, len);
+
+ PrintDebug(" %d bytes written\n",rc);
+
+ return rc;
+}
+
+
+
+//
+// This is called at registration time for the device
+//
+// We assume that someone has called pull_config to get a local
+// copy of the config data from the host device by this point
+//
+static int pci_bar_init(int bar_num, uint32_t * dst, void * private_data) {
+ struct vm_device * dev = (struct vm_device *)private_data;
+ struct pci_front_internal * state = (struct pci_front_internal *)(dev->private_data);
+
+
+ const uint32_t bar_base_reg = 4; // offset in 32bit words to skip to the first bar
+
+ union pci_addr_reg pci_addr = {state->pci_addr.value}; // my address
+
+ uint32_t bar_val = 0;
+ uint32_t max_val = 0;
+
+ struct pt_bar * pbar = &(state->bars[bar_num]);
+
+ pci_addr.reg = bar_base_reg + bar_num;
+
+ PrintDebug("pci_front (%s): pci_bar_init: PCI Address = 0x%x\n", state->name, pci_addr.value);
+
+ // This assumees that pull_config() has been previously called and
+ // we have a local copy of the host device's configuration space
+ bar_val = *((uint32_t*)(&(state->config_space[(bar_base_reg+bar_num)*4])));
+
+ // Now let's set our copy of the relevant bar accordingly
+ pbar->val = bar_val;
+
+ // Now we will configure the hooks relevant to this bar
+
+ // We preset this type when we encounter a MEM64 Low BAR
+ // This is a 64 bit memory region that we turn into a memory hook
+ if (pbar->type == PT_BAR_MEM64_HI) {
+ struct pt_bar * lo_pbar = &(state->bars[bar_num - 1]);
+
+ max_val = PCI_MEM64_MASK_HI;
+
+ pbar->size += lo_pbar->size;
+
+ PrintDebug("pci_front (%s): pci_bar_init: Adding 64 bit PCI mem region: start=0x%p, end=0x%p as a full hook\n",
+ state->name,
+ (void *)(addr_t)pbar->addr,
+ (void *)(addr_t)(pbar->addr + pbar->size));
+
+ if (v3_hook_full_mem(dev->vm,
+ V3_MEM_CORE_ANY,
+ pbar->addr,
+ pbar->addr+pbar->size-1,
+ pci_front_read_mem,
+ pci_front_write_mem,
+ dev)<0) {
+
+ PrintError("pci_front (%s): pci_bar_init: failed to hook 64 bit region (0x%p, 0x%p)\n",
+ state->name,
+ (void *)(addr_t)pbar->addr,
+ (void *)(addr_t)(pbar->addr + pbar->size - 1));
+ return -1;
+ }
+
+ } else if ((bar_val & 0x3) == 0x1) {
+ // This an I/O port region which we will turn into a range of hooks
+
+ int i = 0;
+
+ pbar->type = PT_BAR_IO;
+ pbar->addr = PCI_IO_BASE(bar_val);
+
+ max_val = bar_val | PCI_IO_MASK;
+
+ pbar->size = (uint16_t)~PCI_IO_BASE(max_val) + 1;
+
+
+ PrintDebug("pci_front (%s): pci_bar_init: hooking ports 0x%x through 0x%x\n",
+ state->name, (uint32_t)pbar->addr, (uint32_t)pbar->addr + pbar->size - 1);
+
+ for (i = 0; i < pbar->size; i++) {
+ if (v3_dev_hook_io(dev,
+ pbar->addr + i,
+ pci_front_read_port,
+ pci_front_write_port)<0) {
+ PrintError("pci_front (%s): pci_bar_init: unabled to hook I/O port 0x%x\n",state->name, (unsigned)(pbar->addr+i));
+ return -1;
+ }
+ }
+
+ } else {
+
+ // might be a 32 bit memory region or an empty bar
+
+ max_val = bar_val | PCI_MEM_MASK;
+
+ if (max_val == 0) {
+ // nothing, so just ignore it
+ pbar->type = PT_BAR_NONE;
+ } else {
+
+ // memory region - hook it
+
+ if ((bar_val & 0x6) == 0x0) {
+ // 32 bit memory region
+
+ pbar->type = PT_BAR_MEM32;
+ pbar->addr = PCI_MEM32_BASE(bar_val);
+ pbar->size = ~PCI_MEM32_BASE(max_val) + 1;
+
+ PrintDebug("pci_front (%s): pci_init_bar: adding 32 bit PCI mem region: start=0x%p, end=0x%p\n",
+ state->name,
+ (void *)(addr_t)pbar->addr,
+ (void *)(addr_t)(pbar->addr + pbar->size));
+
+ if (v3_hook_full_mem(dev->vm,
+ V3_MEM_CORE_ANY,
+ pbar->addr,
+ pbar->addr+pbar->size-1,
+ pci_front_read_mem,
+ pci_front_write_mem,
+ dev) < 0 ) {
+ PrintError("pci_front (%s): pci_init_bar: unable to hook 32 bit memory region 0x%p to 0x%p\n",
+ state->name, (void*)(pbar->addr), (void*)(pbar->addr+pbar->size-1));
+ return -1;
+ }
+
+ } else if ((bar_val & 0x6) == 0x2) {
+
+ // 24 bit memory region
+
+ pbar->type = PT_BAR_MEM24;
+ pbar->addr = PCI_MEM24_BASE(bar_val);
+ pbar->size = ~PCI_MEM24_BASE(max_val) + 1;
+
+
+ if (v3_hook_full_mem(dev->vm,
+ V3_MEM_CORE_ANY,
+ pbar->addr,
+ pbar->addr+pbar->size-1,
+ pci_front_read_mem,
+ pci_front_write_mem,
+ dev) < 0 ) {
+ PrintError("pci_front (%s): pci_init_bar: unable to hook 24 bit memory region 0x%p to 0x%p\n",
+ state->name, (void*)(pbar->addr), (void*)(pbar->addr+pbar->size-1));
+ return -1;
+ }
+
+ } else if ((bar_val & 0x6) == 0x4) {
+
+ // partial update of a 64 bit region, no hook done yet
+
+ struct pt_bar * hi_pbar = &(state->bars[bar_num + 1]);
+
+ pbar->type = PT_BAR_MEM64_LO;
+ hi_pbar->type = PT_BAR_MEM64_HI;
+
+ // Set the low bits, only for temporary storage until we calculate the high BAR
+ pbar->addr = PCI_MEM64_BASE_LO(bar_val);
+ pbar->size = ~PCI_MEM64_BASE_LO(max_val) + 1;
+
+ PrintDebug("pci_front (%s): pci_bar_init: partial 64 bit update\n",state->name);
+
+ } else {
+ PrintError("pci_front (%s): pci_bar_init: invalid memory bar type\n",state->name);
+ return -1;
+ }
+
+ }
+ }
+
+
+
+ // Update the pci subsystem versions
+ *dst = bar_val;
+
+ return 0;
+}
+
+
+//
+// If the guest modifies a BAR, we expect that pci.c will do the following,
+// in this order
+//
+// 1. notify us via the config_update callback, which we will feed back
+// to the host device
+// 2. notify us of the bar change via the following callback
+//
+// This callback will unhook as needed for the old bar value and rehook
+// as needed for the new bar value
+//
+static int pci_bar_write(int bar_num, uint32_t * src, void * private_data) {
+ struct vm_device * dev = (struct vm_device *)private_data;
+ struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data;
+
+ struct pt_bar * pbar = &(state->bars[bar_num]);
+
+ PrintDebug("pci_front (%s): bar update: bar_num=%d, src=0x%x\n", state->name, bar_num, *src);
+ PrintDebug("pci_front (%s): the current bar has size=%u, type=%d, addr=%p, val=0x%x\n",
+ state->name, pbar->size, pbar->type, (void *)(addr_t)pbar->addr, pbar->val);
+
+
+
+ if (pbar->type == PT_BAR_NONE) {
+ PrintDebug("pci_front (%s): bar update is to empty bar - ignored\n",state->name);
+ return 0;
+ } else if (pbar->type == PT_BAR_IO) {
+ int i = 0;
+
+ // unhook old ports
+ PrintDebug("pci_front (%s): unhooking I/O ports 0x%x through 0x%x\n",
+ state->name,
+ (unsigned)(pbar->addr), (unsigned)(pbar->addr+pbar->size-1));
+ for (i = 0; i < pbar->size; i++) {
+ if (v3_dev_unhook_io(dev, pbar->addr + i) == -1) {
+ PrintError("pci_front (%s): could not unhook previously hooked port.... 0x%x\n",
+ state->name,
+ (uint32_t)pbar->addr + i);
+ return -1;
+ }
+ }
+
+ PrintDebug("pci_front (%s): setting I/O Port range size=%d\n", state->name, pbar->size);
+
+ //
+ // Not clear if this cooking is needed... why not trust
+ // the write? Who cares if it wants to suddenly hook more ports?
+ //
+
+ // clear the low bits to match the size
+ *src &= ~(pbar->size - 1);
+
+ // Set reserved bits
+ *src |= (pbar->val & ~PCI_IO_MASK);
+
+ pbar->addr = PCI_IO_BASE(*src);
+
+ PrintDebug("pci_front (%s): cooked src=0x%x\n", state->name, *src);
+
+ PrintDebug("pci_front (%s): rehooking I/O ports 0x%x through 0x%x\n",
+ state->name, (unsigned)(pbar->addr), (unsigned)(pbar->addr+pbar->size-1));
+
+ for (i = 0; i < pbar->size; i++) {
+ if (v3_dev_hook_io(dev,
+ pbar->addr + i,
+ pci_front_read_port,
+ pci_front_write_port)<0) {
+ PrintError("pci_front (%s): unable to rehook port 0x%x\n",state->name, (unsigned)(pbar->addr+i));
+ return -1;
+ }
+ }
+
+ } else if (pbar->type == PT_BAR_MEM32) {
+
+ if (v3_unhook_mem(dev->vm,V3_MEM_CORE_ANY,pbar->addr)<0) {
+ PrintError("pci_front (%s): unable to unhook 32 bit memory region starting at 0x%p\n",
+ state->name, (void*)(pbar->addr));
+ return -1;
+ }
+
+ // Again, not sure I need to do this cooking...
+
+ // clear the low bits to match the size
+ *src &= ~(pbar->size - 1);
+
+ // Set reserved bits
+ *src |= (pbar->val & ~PCI_MEM_MASK);
+
+ PrintDebug("pci_front (%s): cooked src=0x%x\n", state->name, *src);
+
+ pbar->addr = PCI_MEM32_BASE(*src);
+
+ PrintDebug("pci_front (%s): rehooking 32 bit memory region 0x%p through 0x%p\n",
+ state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
+
+ if (v3_hook_full_mem(dev->vm,
+ V3_MEM_CORE_ANY,
+ pbar->addr,
+ pbar->addr+pbar->size-1,
+ pci_front_read_mem,
+ pci_front_write_mem,
+ dev)<0) {
+ PrintError("pci_front (%s): unable to rehook 32 bit memory region 0x%p through 0x%p\n",
+ state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
+ return -1;
+ }
+
+ } else if (pbar->type == PT_BAR_MEM64_LO) {
+ // We only store the written values here, the actual reconfig comes when the high BAR is updated
+
+ // clear the low bits to match the size
+ *src &= ~(pbar->size - 1);
+
+ // Set reserved bits
+ *src |= (pbar->val & ~PCI_MEM_MASK);
+
+ // Temp storage, used when hi bar is written
+ pbar->addr = PCI_MEM64_BASE_LO(*src);
+
+ PrintDebug("pci_front (%s): handled partial update for 64 bit memory region\n",state->name);
+
+ } else if (pbar->type == PT_BAR_MEM64_HI) {
+ struct pt_bar * lo_vbar = &(state->bars[bar_num - 1]);
+
+ if (v3_unhook_mem(dev->vm,V3_MEM_CORE_ANY,pbar->addr)<0) {
+ PrintError("pci_front (%s): unable to unhook 64 bit memory region starting at 0x%p\n",
+ state->name, (void*)(pbar->addr));
+ return -1;
+ }
+
+
+ // We don't set size, because we assume region is less than 4GB
+
+ // Set reserved bits
+ *src |= (pbar->val & ~PCI_MEM64_MASK_HI);
+
+ pbar->addr = PCI_MEM64_BASE_HI(*src);
+ pbar->addr <<= 32;
+ pbar->addr += lo_vbar->addr;
+
+ PrintDebug("pci_front (%s): rehooking 64 bit memory region 0x%p through 0x%p\n",
+ state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
+
+ if (v3_hook_full_mem(dev->vm,
+ V3_MEM_CORE_ANY,
+ pbar->addr,
+ pbar->addr+pbar->size-1,
+ pci_front_read_mem,
+ pci_front_write_mem,
+ dev)<0) {
+ PrintError("pci_front (%s): unable to rehook 64 bit memory region 0x%p through 0x%p\n",
+ state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
+ return -1;
+ }
+
+ } else {
+ PrintError("pci_front (%s): unhandled PCI bar type %d\n", state->name, pbar->type);
+ return -1;
+ }
+
+ pbar->val = *src;
+
+ return 0;
+}
+
+
+static int pci_front_config_update(uint_t reg_num, void * src, uint_t length, void * private_data)
+{
+ int i;
+ struct vm_device * dev = (struct vm_device *)private_data;
+ struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data;
+ union pci_addr_reg pci_addr = {state->pci_addr.value};
+
+ pci_addr.reg = reg_num >> 2;
+
+ PrintDebug("pci_front (%s): configuration update: writing 0x%x bytes at offset 0x%x to host device 0x%p, bytes=0x",
+ state->name, length, pci_addr.value, state->host_dev);
+
+ for (i = 0; i < length; i++) {
+ PrintDebug("%x", ((uint8_t *)src)[i]);
+ }
+
+ PrintDebug("\n");
+
+ if (v3_host_dev_write_config(state->host_dev,
+ pci_addr.value,
+ src,
+ length) != length) {
+ PrintError("pci_front (%s): configuration update: unable to write all bytes\n",state->name);
+ return -1;
+ }
+
+
+ return 0;
+}
+
+
+static int unhook_all_mem(struct pci_front_internal *state)
+{
+ int bar_num;
+ struct vm_device *bus = state->pci_bus;
+
+
+ for (bar_num=0;bar_num<6;bar_num++) {
+ struct pt_bar * pbar = &(state->bars[bar_num]);
+
+ PrintDebug("pci_front (%s): unhooking for bar %d\n", state->name, bar_num);
+
+ if (pbar->type == PT_BAR_MEM32) {
+ if (v3_unhook_mem(bus->vm,V3_MEM_CORE_ANY,pbar->addr)<0) {
+ PrintError("pci_front (%s): unable to unhook 32 bit memory region starting at 0x%p\n",
+ state->name, (void*)(pbar->addr));
+ return -1;
+ }
+ } else if (pbar->type == PT_BAR_MEM64_HI) {
+
+ if (v3_unhook_mem(bus->vm,V3_MEM_CORE_ANY,pbar->addr)<0) {
+ PrintError("pci_front (%s): unable to unhook 64 bit memory region starting at 0x%p\n",
+ state->name, (void*)(pbar->addr));
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+
+static int setup_virt_pci_dev(struct v3_vm_info * vm_info, struct vm_device * dev)
+{
+ struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data;
+ struct pci_device * pci_dev = NULL;
+ struct v3_pci_bar bars[6];
+ int bus_num = 0;
+ int i;
+
+ for (i = 0; i < 6; i++) {
+ bars[i].type = PCI_BAR_PASSTHROUGH;
+ bars[i].private_data = dev;
+ bars[i].bar_init = pci_bar_init;
+ bars[i].bar_write = pci_bar_write;
+ }
+
+ pci_dev = v3_pci_register_device(state->pci_bus,
+ PCI_STD_DEVICE,
+ bus_num, -1, 0,
+ state->name, bars,
+ pci_front_config_update,
+ NULL, // no support for command updates
+ NULL, // no support for expansion roms
+ dev);
+
+
+ state->pci_dev = pci_dev;
+
+
+ // EXPANSION ROMS CURRENTLY UNSUPPORTED
+
+ // COMMANDS CURRENTLY UNSUPPORTED
+
+ return 0;
+}
+
+
+
+//
+// Note: potential bug: not clear what pointer I get here
+//
+static int pci_front_free(struct pci_front_internal *state)
+{
+
+ if (unhook_all_mem(state)<0) {
+ return -1;
+ }
+
+ // the device manager will unhook the i/o ports for us
+
+ if (state->host_dev) {
+ v3_host_dev_close(state->host_dev);
+ state->host_dev=0;
+ }
+
+
+ V3_Free(state);
+
+ PrintDebug("pci_front (%s): freed\n",state->name);
+
+ return 0;
+}
+
+
+static struct v3_device_ops dev_ops = {
+//
+// Note: potential bug: not clear what pointer I get here
+//
+ .free = (int (*)(void*))pci_front_free,
+};
+
+
+
+
+
+
+
+static int pci_front_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg)
+{
+ struct vm_device * dev;
+ struct vm_device * bus;
+ struct pci_front_internal *state;
+ char *dev_id;
+ char *bus_id;
+ char *url;
+
+
+ if (!(dev_id = v3_cfg_val(cfg, "ID"))) {
+ PrintError("pci_front: no id given!\n");
+ return -1;
+ }
+
+ if (!(bus_id = v3_cfg_val(cfg, "bus"))) {
+ PrintError("pci_front (%s): no bus given!\n",dev_id);
+ return -1;
+ }
+
+ if (!(url = v3_cfg_val(cfg, "hostdev"))) {
+ PrintError("pci_front (%s): no host device url given!\n",dev_id);
+ return -1;
+ }
+
+ if (!(bus = v3_find_dev(vm,bus_id))) {
+ PrintError("pci_front (%s): cannot attach to bus %s\n",dev_id,bus_id);
+ return -1;
+ }
+
+ if (!(state = V3_Malloc(sizeof(struct pci_front_internal)))) {
+ PrintError("pci_front (%s): cannot allocate state for device\n",dev_id);
+ return -1;
+ }
+
+ memset(state, 0, sizeof(struct pci_front_internal));
+
+ state->pci_bus = bus;
+ strncpy(state->name, dev_id, 32);
+
+ if (!(dev = v3_add_device(vm, dev_id, &dev_ops, state))) {
+ PrintError("pci_front (%s): unable to add device\n",state->name);
+ return -1;
+ }
+
+ if (!(state->host_dev=v3_host_dev_open(url,V3_BUS_CLASS_PCI,dev,vm))) {
+ PrintError("pci_front (%s): unable to attach to host device %s\n",state->name, url);
+ v3_remove_device(dev);
+ return -1;
+ }
+
+ // fetch config space from the host
+ if (pull_config(state,state->config_space)) {
+ PrintError("pci_front (%s): cannot initially configure device\n",state->name);
+ v3_remove_device(dev);
+ return -1;
+ }
+
+ // setup virtual device for now
+ if (setup_virt_pci_dev(vm,dev)<0) {
+ PrintError("pci_front (%s): cannot set up virtual pci device\n", state->name);
+ v3_remove_device(dev);
+ return -1;
+ }
+
+ // We do not need to hook anything here since pci will call
+ // us back via the bar_init functions
+
+ PrintDebug("pci_front (%s): inited and ready to be Potemkinized\n",state->name);
+
+ return 0;
+
+}
+
+
+device_register("PCI_FRONT", pci_front_init)
#include "vga_regs.h"
+#ifndef CONFIG_DEBUG_VGA
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+#define DEBUG_MEM_DATA 0
+#define DEBUG_DEEP_MEM 0
+#define DEBUG_DEEP_RENDER 0
+
+
#define MEM_REGION_START 0xa0000
#define MEM_REGION_END 0xc0000
#define MEM_REGION_NUM_PAGES (((MEM_REGION_END)-(MEM_REGION_START))/4096)
struct vga_internal {
- struct vm_device *dev;
+ struct vm_device *dev;
bool passthrough;
bool skip_next_passthrough_out; // for word access
};
+typedef enum {PLANAR_SHIFT, PACKED_SHIFT, C256_SHIFT} shift_mode_t;
+
static void find_text_char_dim(struct vga_internal *vga, uint32_t *w, uint32_t *h)
{
}
+
static void find_text_attr_start(struct vga_internal *vga, void **data)
{
uint32_t offset;
}
+static void find_graphics_data_starting_offset(struct vga_internal *vga, uint32_t *offset)
+{
+
+ *offset = vga->vga_crt_controller.vga_start_address_high;
+ *offset <<= 8;
+ *offset += vga->vga_crt_controller.vga_start_address_low;
+}
+
+
+static void find_shift_mode(struct vga_internal *vga, shift_mode_t *mode)
+{
+ if (vga->vga_graphics_controller.vga_graphics_mode.c256) {
+ *mode=C256_SHIFT;
+ } else {
+ if (vga->vga_graphics_controller.vga_graphics_mode.shift_reg_mode) {
+ *mode=PACKED_SHIFT;
+ } else {
+ *mode=PLANAR_SHIFT;
+ }
+ }
+}
+
+
static void find_graphics_res(struct vga_internal *vga, uint32_t *width, uint32_t *height)
{
uint32_t vert_lsb, vert_msb;
+ (vga->vga_crt_controller.vga_overflow.vertical_disp_enable_end8);
*height = ( (vert_msb << 8) + vert_lsb + 1) ; // pixels high (scanlines)
-
-}
-
-
-static void find_graphics_cursor_pos(struct vga_internal *vga, uint32_t *width, uint32_t *height)
-{
-
-}
-static void render_graphics(struct vga_internal *vga, void *fb)
-{
+ // At this point we have the resolution in dot clocks across and scanlines top-to-bottom
+ // This is usually the resolution in pixels, but it can be monkeyed with
+ // at least in the following ways
- PrintDebug("vga: render_graphics is unimplemented\n");
- // Multiuplane 16
- // Packed pixel mono
- // packed pixel 4 color
- // packed pixel 256 color
+ // vga sequencer dot clock divide by two
+ if (vga->vga_sequencer.vga_clocking_mode.dot_clock) {
+ *width/=2;
+ *height/=2;
+ }
- find_graphics_cursor_pos(0,0,0);
+ // crt_controller.max_row_scan.double_scan => each row twice for 200=>400
+ if (vga->vga_crt_controller.vga_max_row_scan.double_scan) {
+ *height/=2;
+ }
+
+ // crt_controller.crt_mode_control.count_by_two => pixels twice as wide as normal
+ if (vga->vga_crt_controller.vga_crt_mode_control.count_by_two) {
+ *width /= 2;
+ }
+ // crt_controller.crt_mode_control.horizontal_retrace_select => pixels twice as tall as normal
+ if (vga->vga_crt_controller.vga_crt_mode_control.horizontal_retrace_select) {
+ *height /= 2;
+ }
+
}
-static void render_text_cursor(struct vga_internal *vga, void *fb)
+
+static void find_graphics_cursor_pos(struct vga_internal *vga, uint32_t *x, uint32_t *y)
{
+ // todo
+ *x=*y=0;
}
-
static void dac_lookup_24bit_color(struct vga_internal *vga,
uint8_t entry,
uint8_t *red,
}
+
+/*
+ Colors work like this:
+
+ 4 bit modes: index is to the internal palette on the attribute controller
+ that supplies 6 bits, but we need 8 to index the dac
+ 2 more (the msbs) are supplied from the color select register
+ we can optionally overwrite bits 5 and 4 from the color
+ select register as well, depending on a selection bit
+ in the mode control register. The result of all this is
+ 8 bit index for the dac
+
+ 8 bit modes: the attribute controller passes the index straight through
+ to the DAC.
+
+
+ The DAC translates from the 8 bit index into 6 bits per color channel
+ (18 bit color). We mulitply by 4 to get 24 bit color.
+*/
+
+static void find_24bit_color(struct vga_internal *vga,
+ uint8_t val,
+ uint8_t *red,
+ uint8_t *green,
+ uint8_t *blue)
+{
+ uint8_t di; // ultimate dac index
+
+ if (vga->vga_attribute_controller.vga_attribute_mode_control.pixel_width) {
+ // 8 bit mode does right to the DAC
+ di=val;
+ } else {
+ struct vga_internal_palette_reg pr = vga->vga_attribute_controller.vga_internal_palette[val%16];
+ di = pr.palette_data;
+
+ // Fix bits 5-4 if needed
+ if (vga->vga_attribute_controller.vga_attribute_mode_control.p54_select) {
+ di &= ~0x30; // clear 5-4
+ di |= vga->vga_attribute_controller.vga_color_select.sc4 << 4;
+ di |= vga->vga_attribute_controller.vga_color_select.sc5 << 5;
+ }
+
+ // We must always produce bits 6 and 7
+ di &= ~0xc0; // clear 7-6
+ di |= vga->vga_attribute_controller.vga_color_select.sc6 << 6;
+ di |= vga->vga_attribute_controller.vga_color_select.sc7 << 7;
+ }
+
+ dac_lookup_24bit_color(vga,di,red,green,blue);
+}
+
+static void render_graphics(struct vga_internal *vga, void *fb)
+{
+
+ struct v3_frame_buffer_spec *spec = &(vga->target_spec);
+
+ uint32_t gw, gh; // graphics w/h
+ uint32_t fw, fh; // fb w/h
+ uint32_t rgw, rgh; // region we can actually show on the frame buffer
+
+
+ uint32_t fx, fy; // pixel position within the frame buffer
+
+ uint32_t offset; // offset into the maps
+ uint8_t m; // map
+ uint8_t p; // pixel in the current map byte (0..7)
+
+ uint8_t r,g,b; // looked up colors for entry
+
+ void *pixel; // current pixel in the fb
+ uint8_t *red; // and the channels in the pixel
+ uint8_t *green; //
+ uint8_t *blue; //
+
+ uint8_t db[4]; // 4 bytes read at a time
+ uint8_t pb[8]; // 8 pixels assembled at a time
+
+ shift_mode_t sm; // shift mode
+
+ uint32_t cur_x, cur_y;
+
+
+ find_graphics_res(vga,&gw,&gh);
+
+ find_shift_mode(vga,&sm);
+
+ find_graphics_cursor_pos(vga,&cur_x,&cur_y);
+
+ find_graphics_data_starting_offset(vga,&offset);
+
+ fw = spec->width;
+ fh = spec->height;
+
+
+ PrintDebug("vga: attempting graphics render (%s): graphics_res=(%u,%u), fb_res=(%u,%u), "
+ "fb=0x%p offset=0x%x\n",
+ sm == PLANAR_SHIFT ? "planar shift" :
+ sm == PACKED_SHIFT ? "packed shift" :
+ sm == C256_SHIFT ? "color256 shift" : "UNKNOWN",
+ gw,gh,fw,fh,fb,offset);
+
+ // First we need to clip to what we can actually show
+ rgw = gw < fw ? gw : fw;
+ rgh = gh < fh ? gh : fh;
+
+ if (gw%8) {
+ PrintError("vga: warning: graphics width is not a multiple of 8\n");
+ }
+
+
+
+ // Now we scan across by row
+ for (fy=0;fy<gh;fy++) {
+ // by column
+ for (fx=0;fx<gw;
+ fx += (sm==C256_SHIFT ? 4 : 8) , offset++ ) {
+
+ // if any of these pixels are in the rendger region
+ if (fy < rgh && fx < rgw) {
+ // assemble all 4 or 8 pixels
+
+ // fetch the data bytes
+ for (m=0;m<4;m++) {
+ db[m]=*((uint8_t*)(vga->map[m]+offset));
+ }
+
+ // assemble
+ switch (sm) {
+ case PLANAR_SHIFT:
+ for (p=0;p<8;p++) {
+ pb[p]=
+ (( db[0] >> 7) & 0x1) |
+ (( db[1] >> 6) & 0x2) |
+ (( db[2] >> 5) & 0x4) |
+ (( db[3] >> 4) & 0x8) ;
+
+ for (m=0;m<4;m++) {
+ db[m] <<= 1;
+ }
+ }
+ break;
+
+ case PACKED_SHIFT:
+ // first 4 pixels use planes 0 and 2
+ for (p=0;p<4;p++) {
+ pb[p] =
+ ((db[2] >> 4) & 0xc) |
+ ((db[0] >> 6) & 0x3) ;
+ db[2] <<= 2;
+ db[0] <<= 2;
+ }
+ break;
+
+ // next 4 pixels use planes 1 and 3
+ for (p=4;p<8;p++) {
+ pb[p] =
+ ((db[3] >> 4) & 0xc) |
+ ((db[1] >> 6) & 0x3) ;
+ db[3] <<= 2;
+ db[1] <<= 2;
+ }
+ break;
+
+ case C256_SHIFT:
+ // this one is either very bizarre or as simple as this
+ for (p=0;p<4;p++) {
+ pb[p] = db[p];
+ }
+ break;
+ }
+
+ // draw each pixel
+ for (p=0;p< (sm==C256_SHIFT ? 4 : 8);p++) {
+
+ // find its color
+ find_24bit_color(vga,pb[p],&r,&g,&b);
+
+ // find its position in the framebuffer;
+ pixel = fb + (((fx + p) + (fy*spec->width)) * spec->bytes_per_pixel);
+ red = pixel + spec->red_offset;
+ green = pixel + spec->green_offset;
+ blue = pixel + spec->blue_offset;
+
+ // draw it
+ *red=r;
+ *green=g;
+ *blue=b;
+ }
+ }
+ }
+ }
+
+ PrintDebug("vga: render done\n");
+}
+
+
+static void render_text_cursor(struct vga_internal *vga, void *fb)
+{
+}
+
+
+
+
//
// A variant of this function could render to
// a text console interface as well
// foreground
if (!extended_fontset(vga)) {
- fg_entry = ((uint8_t)(a.foreground_intensity_or_font_select)) << 3;
+ fg_entry = a.foreground_intensity_or_font_select << 3;
} else {
fg_entry = 0;
}
fg_entry |= a.fore;
- dac_lookup_24bit_color(vga,fg_entry,&fgr,&fgg,&fgb);
+ find_24bit_color(vga,fg_entry,&fgr,&fgg,&fgb);
if (!blinking(vga)) {
- bg_entry = ((uint8_t)(a.blinking_or_bg_intensity)) << 3;
+ bg_entry = a.blinking_or_bg_intensity << 3;
} else {
bg_entry = 0;
}
bg_entry |= a.back;
- dac_lookup_24bit_color(vga,bg_entry,&bgr,&bgg,&bgb);
+ find_24bit_color(vga,bg_entry,&bgr,&bgg,&bgb);
// Draw the character
for (l=0; l<ch; l++, font++) {
}
}
+static void render_black(struct vga_internal *vga, void *fb)
+{
+ struct v3_frame_buffer_spec *s;
+
+ s=&(vga->target_spec);
+
+ memset(fb,0,s->height*s->width*s->bytes_per_pixel);
+}
+
static void render_maps(struct vga_internal *vga, void *fb)
{
fb = v3_graphics_console_get_frame_buffer_data_rw(vga->host_cons,&(vga->target_spec));
- // Draw some crap for testing for now
- if (0) { render_test(vga,fb);}
- // Draw the maps for debugging
- if (0) { render_maps(vga,fb);}
-
- if (vga->vga_graphics_controller.vga_misc.graphics_mode) {
- render_graphics(vga,fb);
+ if (!(vga->vga_sequencer.vga_clocking_mode.screen_off)) {
+ if (vga->vga_attribute_controller.vga_attribute_mode_control.graphics) {
+ render_graphics(vga,fb);
+ } else {
+ render_text(vga,fb);
+ render_text_cursor(vga,fb);
+ }
} else {
- render_text(vga,fb);
- render_text_cursor(vga,fb);
+ render_black(vga,fb);
}
- render_maps(vga,fb);
+ if (0) { render_test(vga,fb); }
+ // always render maps for now
+ render_maps(vga,fb);
v3_graphics_console_release_frame_buffer_data_rw(vga->host_cons);
}
memcpy(V3_VAddr((void*)guest_addr),src,length);
}
-#if 0
+#if DEBUG_MEM_DATA
int i;
PrintDebug("vga: data written was 0x");
for (i=0;i<length;i++) {
/* Write mode determine by Graphics Mode Register (Index 05h).writemode */
- // Probably need to add odd/even mode access here for text
-
- PrintDebug("vga: write is with odd/even = %u\n", vga->vga_sequencer.vga_mem_mode.odd_even);
-
switch (vga->vga_graphics_controller.vga_graphics_mode.write_mode) {
case 0: {
offset = find_offset_write(vga, guest_addr);
+#if DEBUG_DEEP_MEM
PrintDebug("vga: mode 0 write, offset=0x%llx, ror=%u, func=%u\n", offset,ror,func);
+#endif
for (i=0;i<length;i++,offset+=find_increment_write(vga,guest_addr+i)) {
// now for each map
uint8_t bm = vga->vga_graphics_controller.vga_bit_mask;
uint8_t mm = find_map_write(vga,guest_addr+i);
- PrintDebug("vga: write i=%u, mm=0x%x, offset=0x%x\n",i,(unsigned int)mm,(unsigned int)offset);
+#if DEBUG_DEEP_MEM
+ PrintDebug("vga: write i=%u, mm=0x%x, bm=0x%x sr=0x%x esr=0x%x offset=0x%x\n",i,(unsigned int)mm,(unsigned int)bm, (unsigned int)sr, (unsigned int)esr,(unsigned int)offset);
+#endif
- for (mapnum=0;mapnum<4;mapnum++, sr>>=1, esr>>=1, bm>>=1, mm>>=1) {
+ for (mapnum=0;mapnum<4;mapnum++, sr>>=1, esr>>=1, mm>>=1) {
vga_map map = vga->map[mapnum];
uint8_t data = ((uint8_t *)src)[i];
uint8_t latchval = vga->latch[mapnum];
-
+
+#if DEBUG_DEEP_MEM
+ PrintDebug("vga: raw data=0x%x\n",data);
+#endif
// rotate data right
- data = (data>>ror) | data<<(8-ror);
-
+ if (ror) {
+ data = (data>>ror) | data<<(8-ror);
+ }
+
+#if DEBUG_DEEP_MEM
+ PrintDebug("vga: data after ror=0x%x\n",data);
+#endif
// use SR bit if ESR is on for this map
- if (esr & 0x1) {
- data = (uint8_t)((((sint8_t)(sr&0x1))<<7)>>7); // expand sr bit
+ if (esr & 0x1) {
+ data = (sr&0x1) * -1;
+
}
+#if DEBUG_DEEP_MEM
+ PrintDebug("vga: data after esrr=0x%x\n",data);
+#endif
+
// Apply function
switch (func) {
case 0: // NOP
data ^= latchval;
break;
}
-
- // mux between latch and alu output
- if (bm & 0x1) {
- // use alu output, which is in data
- } else {
- // use latch value
- data=latchval;
- }
+
+#if DEBUG_DEEP_MEM
+ PrintDebug("vga: data after func=0x%x\n",data);
+#endif
+
+ // mux between the data byte and the latch byte on
+ // a per-bit basis
+ data = (bm & data) | ((~bm) & latchval);
+
+
+#if DEBUG_DEEP_MEM
+ PrintDebug("vga: data after bm mux=0x%x\n",data);
+#endif
// selective write
if (mm & 0x1) {
// write to this map
- //PrintDebug("vga: write map %u offset 0x%p map=0x%p pointer=0x%p\n",mapnum,(void*)offset,map,&(map[offset]));
+#if DEBUG_DEEP_MEM
+ PrintDebug("vga: write map %u offset 0x%p map=0x%p pointer=0x%p\n",mapnum,(void*)offset,map,&(map[offset]));
+#endif
map[offset] = data;
} else {
// skip this map
uint64_t offset = find_offset_write(vga,guest_addr);
+#if DEBUG_DEEP_MEM
PrintDebug("vga: mode 1 write, offset=0x%llx\n", offset);
+#endif
for (i=0;i<length;i++,offset+=find_increment_write(vga,guest_addr+i)) {
offset = find_offset_write(vga, guest_addr);
+#if DEBUG_DEEP_MEM
PrintDebug("vga: mode 2 write, offset=0x%llx, func=%u\n", offset,func);
+#endif
for (i=0;i<length;i++,offset+=find_increment_write(vga,guest_addr+i)) {
// now for each map
uint8_t latchval = vga->latch[mapnum];
// expand relevant bit to 8 bit
- // it's basically esr=1, sr=bit from write
- data = (uint8_t)(((sint8_t)(((data>>mapnum)&0x1)<<7))>>7);
-
+ // it's basically esr=1, sr=bit from mode 0 write
+ data = ((data>>mapnum)&0x1) * -1;
+
// Apply function
switch (func) {
case 0: // NOP
data ^= latchval;
break;
}
-
+
// mux between latch and alu output
- if (bm & 0x1) {
- // use alu output, which is in data
- } else {
- // use latch value
- data=latchval;
- }
+ data = (bm & data) | ((~bm) & latchval);
// selective write
if (mm & 0x1) {
// now for each map
uint8_t data = ((uint8_t *)src)[i];
- data = (data>>ror) | data<<(8-ror);
+ if (ror) {
+ data = (data>>ror) | data<<(8-ror);
+ }
uint8_t bm = vga->vga_graphics_controller.vga_bit_mask & data;
uint8_t sr = vga->vga_graphics_controller.vga_set_reset.val & 0xf;
vga_map map = vga->map[mapnum];
uint8_t latchval = vga->latch[mapnum];
- data = (uint8_t)((((sint8_t)(sr&0x1))<<7)>>7); // expand sr bit
-
-
+ // expand SR bit
+ data = (sr&0x1) * -1;
+
// mux between latch and alu output
- if (bm & 0x1) {
- // use alu output, which is in data
- } else {
- // use latch value
- data=latchval;
- }
+ data = (bm & data) | ((~bm) & latchval);
// selective write
if (mm & 0x1) {
// address bytes select the map
for (i=0;i<length;i++,offset+=find_increment_read(vga,guest_addr+i)) {
mapnum = (guest_addr+i) % 4;
- ((uint8_t*)dst)[i] = vga->latch[mapnum] = *(vga->map[mapnum]+offset);
+#if DEBUG_DEEP_MEM
+ PrintDebug("vga: mode 0 read, chain4, offset=0x%llx, mapnum=%u\n",offset,mapnum);
+#endif
+ ((uint8_t*)dst)[i] = *(vga->map[mapnum]+offset);
+
+ // presumably all the latches are to be reloaded, not just the selected one?
+ for (mapnum=0;mapnum<4;mapnum++) {
+ vga->latch[mapnum] = *(vga->map[mapnum]+offset);
+ }
}
} else {
mapnum = vga->vga_graphics_controller.vga_read_map_select.map_select;
PrintError("vga: read to offset=%llu map=%u (%u bytes)\n",offset,mapnum,length);
}
+#if DEBUG_DEEP_MEM
+ PrintDebug("vga: mode 0 read, not-chain4, offset=0x%llx, mapnum=%u\n",offset,mapnum);
+#endif
+
memcpy(dst,(vga->map[mapnum])+offset,length);
// load the latches with the last item read
uint8_t bits;
offset = find_offset_read(vga,guest_addr);
+
+#if DEBUG_DEEP_MEM
+ PrintDebug("vga: mode 1 read, offset=0x%llx, cc=0x%x, dc-0x%x\n",offset,cc,dc);
+#endif
+
for (i=0;i<length;i++,offset++) {
vga_map map;
}
-#if 0
+#if DEBUG_MEM_DATA
int i;
PrintDebug("vga: data read is 0x");
for (i=0;i<length;i++) {
// 1 = odd/even addressing as in CGMA
uint8_t shift_reg_mode:1;
// 1 = shift regs get odd bits from odd maps and even/even
- uint8_t c256:1;
- // 1 = 256 color mode
+ uint8_t c256:1; // 1 = 256 color mode
// 0 = shift_reg_mode controls shift regs
uint8_t reserved2:1;
} __attribute__((packed));
uint8_t val;
struct {
uint8_t index:5; // actual address
- uint8_t internal_palette_address_srouce:1;
+ uint8_t internal_palette_address_source:1;
// 0 => use the internal color palette (load the regs)
// 1 => use the external color palette
uint8_t reserved:2;
union {
uint8_t val;
struct {
- union {
- uint8_t fore:3;
- struct {
- uint8_t fore_red:1;
- uint8_t fore_green:1;
- uint8_t fore_blue:1;
- } __attribute__((packed));
- } __attribute__((packed));
+ uint8_t fore:3; //foreground color
uint8_t foreground_intensity_or_font_select:1; // depends on char map select reg
// character map selection is effected
// when memory_mode.extended meomory=1
// and the two character map enteries on character_map_select are
// different
- union {
- uint8_t back:3;
- struct {
- uint8_t back_red:1;
- uint8_t back_green:1;
- uint8_t back_blue:1;
- } __attribute__((packed));
- } __attribute__((packed));
+ uint8_t back:3; //background color
uint8_t blinking_or_bg_intensity:1;
// attribute mode control.enableblink = 1 => blink
// =0 => intensity (16 colors of bg)
/* called by frontend, send pkt to VNET */
static int vnet_nic_send(uint8_t * buf, uint32_t len,
- void * private_data) {
+ int synchronize, void * private_data) {
struct vnet_nic_state * vnetnic = (struct vnet_nic_state *)private_data;
struct v3_vnet_pkt pkt;
memcpy(pkt.header, buf, ETHERNET_HEADER_LEN);
pkt.data = buf;
-#ifdef CONFIG_DEBUG_VNET_NIC
- {
- PrintDebug("VNET-NIC: send pkt (size: %d, src_id: %d, src_type: %d)\n",
+ V3_Net_Print(2, "VNET-NIC: send pkt (size: %d, src_id: %d, src_type: %d)\n",
pkt.size, pkt.src_id, pkt.src_type);
- v3_hexdump(buf, len, NULL, 0);
+ if(v3_net_debug >= 4){
+ v3_hexdump(buf, len, NULL, 0);
}
-#endif
- return v3_vnet_send_pkt(&pkt, NULL);;
+ return v3_vnet_send_pkt(&pkt, NULL, synchronize);
}
void * private_data){
struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data;
- PrintDebug("VNET-NIC: receive pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n",
+ V3_Net_Print(2, "VNET-NIC: receive pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n",
pkt->size, pkt->src_id, pkt->src_type, pkt->dst_id, pkt->dst_type);
return vnetnic->net_ops.recv(pkt->data, pkt->size,
vnetnic->net_ops.frontend_data);
}
-/* poll data from front-end */
-static void virtio_poll(struct v3_vm_info * info,
- int budget,
- void * private_data){
- struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data;
-
- vnetnic->net_ops.poll(info, budget, vnetnic->net_ops.frontend_data);
-}
-
static int vnet_nic_free(struct vnet_nic_state * vnetnic) {
static struct v3_vnet_dev_ops vnet_dev_ops = {
.input = virtio_input,
- .poll = virtio_poll,
};
--- /dev/null
+menu "Extensions"
+
+config EXT_VTIME
+ bool "Enable Time virtualization"
+ default n
+ help
+ Enables the timer virtualization extensions. These hide the cost of
+ running inside the VMM context. This can aid the consistency of
+ time between multiple timers, but can cause the guest to run
+ a good bit slower than the host in VM-intensive parts of the code.
+
+
+config EXT_VTSC
+ bool "Fully virtualize guest TSC"
+ default n
+ depends on EXT_VTIME
+ help
+ Virtualize the processor time stamp counter in the guest,
+ generally increasing consistency between various time sources
+ but also potentially making guest time run slower than real time.
+
+config EXT_MTRRS
+ bool "Support virtualized MTTRs"
+ default n
+ help
+ Provides a virtualized set of MTTR registers
+
+config EXT_MACH_CHECK
+ bool "Support Machine Check functionality"
+ default n
+ help
+ Provides a virtualized machine check architecture
+
+
+config EXT_INSPECTOR
+ bool "VM Inspector"
+ default n
+ help
+ Provides the inspection extension
+
+endmenu
--- /dev/null
+obj-y += null.o
+obj-$(CONFIG_EXT_MTRRS) += ext_mtrr.o
+obj-$(CONFIG_EXT_VTSC) += ext_vtsc.o
+obj-$(CONFIG_EXT_VTIME) += ext_vtime.o
+obj-$(CONFIG_EXT_INSPECTOR) += ext_inspector.o
*/
-#include <palacios/vmm_inspector.h>
+//#include <palacios/vmm_inspector.h>
#include <palacios/vmm.h>
#include <palacios/vm_guest.h>
#include <palacios/vmm_sprintf.h>
+#include <palacios/vmm_extensions.h>
+
+#include <palacios/vmm_multitree.h>
+#include <interfaces/inspector.h>
// Note that v3_inspect_node_t is actuall a struct v3_mtree
// Its set as void for opaque portability
+struct v3_inspector_state {
+ struct v3_mtree state_tree;
+
+};
-int v3_init_inspector(struct v3_vm_info * vm) {
- struct v3_inspector_state * state = (struct v3_inspector_state *)&(vm->inspector);
+static int init_inspector(struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data) {
+ struct v3_inspector_state * state = V3_Malloc(sizeof(struct v3_inspector_state));
memset(state, 0, sizeof(struct v3_inspector_state));
strncpy(state->state_tree.name, "vm->name", 50);
state->state_tree.subtree = 1;
+ *priv_data = state;
+
return 0;
}
-int v3_init_inspector_core(struct guest_info * core) {
- struct v3_inspector_state * vm_state = &(core->vm_info->inspector);
+static int init_inspector_core(struct guest_info * core, void * priv_data) {
+ struct v3_inspector_state * vm_state = priv_data;
char core_name[50];
snprintf(core_name, 50, "core.%d", core->cpu_id);
v3_inspect_64(cr_node, "EFER", (uint64_t *)&(core->ctrl_regs.efer));
- // struct v3_mtree * seg_node = v3_mtree_create_subtree(core_node, "SEGMENTS");
+ //struct v3_mtree * seg_node = v3_mtree_create_subtree(core_node, "SEGMENTS");
}
+
+
+
+static struct v3_extension_impl inspector_impl = {
+ .name = "inspector",
+ .init = init_inspector,
+ .deinit = NULL,
+ .core_init = init_inspector_core,
+ .core_deinit = NULL,
+ .on_entry = NULL,
+ .on_exit = NULL
+};
+
+
+register_extension(&inspector_impl);
+
+
v3_inspect_node_t * v3_inspect_add_subtree(v3_inspect_node_t * root, char * name) {
return v3_mtree_create_subtree(root, name);
}
-
-
int v3_find_inspection_value(v3_inspect_node_t * node, char * name,
struct v3_inspection_value * value) {
struct v3_mtree * mt_node = v3_mtree_find_node(node, name);
v3_inspect_node_t * v3_get_inspection_root(struct v3_vm_info * vm) {
- return &(vm->inspector.state_tree);
+ struct v3_inspector_state * inspector = v3_get_extension_state(vm, inspector_impl.name);
+
+ if (inspector == NULL) {
+ return NULL;
+ }
+
+ return &(inspector->state_tree);
}
v3_inspect_node_t * v3_get_inspection_subtree(v3_inspect_node_t * root, char * name) {
v3_inspect_node_t * v3_inspection_first_child(v3_inspect_node_t * root) {
return v3_mtree_first_child(root);
}
+
+
+
+
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ * Patrick G. Bridges <bridges@cs.unm.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+#include <palacios/vmm.h>
+#include <palacios/vmm_time.h>
+#include <palacios/vm_guest.h>
+
+
+
+
+/* Overview
+ *
+ * Time handling in VMMs is challenging, and Palacios uses the highest
+ * resolution, lowest overhead timer on modern CPUs that it can - the
+ * processor timestamp counter (TSC). Note that on somewhat old processors
+ * this can be problematic; in particular, older AMD processors did not
+ * have a constant rate timestamp counter in the face of power management
+ * events. However, the latest Intel and AMD CPUs all do (should...) have a
+ * constant rate TSC, and Palacios relies on this fact.
+ *
+ * Basically, Palacios keeps track of three quantities as it runs to manage
+ * the passage of time:
+ * (1) The host timestamp counter - read directly from HW and never written
+ * (2) A monotonic guest timestamp counter used to measure the progression of
+ * time in the guest. This is computed using an offsets from (1) above.
+ * (3) The actual guest timestamp counter (which can be written by
+ * writing to the guest TSC MSR - MSR 0x10) from the monotonic guest TSC.
+ * This is also computed as an offset from (2) above when the TSC and
+ * this offset is updated when the TSC MSR is written.
+ *
+ * The value used to offset the guest TSC from the host TSC is the *sum* of all
+ * of these offsets (2 and 3) above
+ *
+ * Because all other devices are slaved off of the passage of time in the guest,
+ * it is (2) above that drives the firing of other timers in the guest,
+ * including timer devices such as the Programmable Interrupt Timer (PIT).
+ *
+ * Future additions:
+ * (1) Add support for temporarily skewing guest time off of where it should
+ * be to support slack simulation of guests. The idea is that simulators
+ * set this skew to be the difference between how much time passed for a
+ * simulated feature and a real implementation of that feature, making
+ * pass at a different rate from real time on this core. The VMM will then
+ * attempt to move this skew back towards 0 subject to resolution/accuracy
+ * constraints from various system timers.
+ *
+ * The main effort in doing this will be to get accuracy/resolution
+ * information from each local timer and to use this to bound how much skew
+ * is removed on each exit.
+ */
+
+
+
+struct vtime_state {
+ uint32_t guest_cpu_freq; // can be lower than host CPU freq!
+ uint64_t initial_time; // Time when VMM started.
+ sint64_t guest_host_offset;// Offset of monotonic guest time from host time
+};
+
+
+
+
+static int offset_time( struct guest_info * info, sint64_t offset )
+{
+ struct vm_time * time_state = &(info->time_state);
+// PrintDebug("Adding additional offset of %lld to guest time.\n", offset);
+ time_state->guest_host_offset += offset;
+ return 0;
+}
+
+
+// Control guest time in relation to host time so that the two stay
+// appropriately synchronized to the extent possible.
+int v3_adjust_time(struct guest_info * info) {
+ struct vm_time * time_state = &(info->time_state);
+ uint64_t host_time, target_host_time;
+ uint64_t guest_time, target_guest_time, old_guest_time;
+ uint64_t guest_elapsed, host_elapsed, desired_elapsed;
+
+ /* Compute the target host time given how much time has *already*
+ * passed in the guest */
+ guest_time = v3_get_guest_time(time_state);
+ guest_elapsed = (guest_time - time_state->initial_time);
+ desired_elapsed = (guest_elapsed * time_state->host_cpu_freq) / time_state->guest_cpu_freq;
+ target_host_time = time_state->initial_time + desired_elapsed;
+
+ /* Now, let the host run while the guest is stopped to make the two
+ * sync up. */
+ host_time = v3_get_host_time(time_state);
+ old_guest_time = v3_get_guest_time(time_state);
+
+ while (target_host_time > host_time) {
+ v3_yield(info);
+ host_time = v3_get_host_time(time_state);
+ }
+
+ guest_time = v3_get_guest_time(time_state);
+
+ // We do *not* assume the guest timer was paused in the VM. If it was
+ // this offseting is 0. If it wasn't we need this.
+ offset_time(info, (sint64_t)old_guest_time - (sint64_t)guest_time);
+
+ /* Now the host may have gotten ahead of the guest because
+ * yielding is a coarse grained thing. Figure out what guest time
+ * we want to be at, and use the use the offsetting mechanism in
+ * the VMM to make the guest run forward. We limit *how* much we skew
+ * it forward to prevent the guest time making large jumps,
+ * however. */
+ host_elapsed = host_time - time_state->initial_time;
+ desired_elapsed = (host_elapsed * time_state->guest_cpu_freq) / time_state->host_cpu_freq;
+ target_guest_time = time_state->initial_time + desired_elapsed;
+
+ if (guest_time < target_guest_time) {
+ uint64_t max_skew, desired_skew, skew;
+
+ if (time_state->enter_time) {
+ max_skew = (time_state->exit_time - time_state->enter_time) / 10;
+ } else {
+ max_skew = 0;
+ }
+
+ desired_skew = target_guest_time - guest_time;
+ skew = desired_skew > max_skew ? max_skew : desired_skew;
+/* PrintDebug("Guest %llu cycles behind where it should be.\n",
+ desired_skew);
+ PrintDebug("Limit on forward skew is %llu. Skewing forward %llu.\n",
+ max_skew, skew); */
+
+ offset_time(info, skew);
+ }
+
+ return 0;
+}
+
+
+static int init() {
+ khz = v3_cfg_val(cfg_tree, "khz");
+
+ if (khz) {
+ time_state->guest_cpu_freq = atoi(khz);
+ PrintDebug("Core %d CPU frequency requested at %d khz.\n",
+ info->cpu_id, time_state->guest_cpu_freq);
+ }
+
+ if ( (khz == NULL) ||
+ (time_state->guest_cpu_freq <= 0) ||
+ (time_state->guest_cpu_freq > time_state->host_cpu_freq) ) {
+
+ time_state->guest_cpu_freq = time_state->host_cpu_freq;
+ }
+
+
+}
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ * Patrick G. Bridges <bridges@cs.unm.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_time.h>
+#include <palacios/vm_guest.h>
+
+
+// Functions for handling exits on the TSC when fully virtualizing
+// the timestamp counter.
+#define TSC_MSR 0x10
+#define TSC_AUX_MSR 0xC0000103
+
+int v3_handle_rdtscp(struct guest_info *info);
+int v3_handle_rdtsc(struct guest_info *info);
+
+
+struct vtsc_state {
+
+ struct v3_msr tsc_aux; // Auxilliary MSR for RDTSCP
+
+};
+
+
+
+/*
+ * Handle full virtualization of the time stamp counter. As noted
+ * above, we don't store the actual value of the TSC, only the guest's
+ * offset from monotonic guest's time. If the guest writes to the TSC, we
+ * handle this by changing that offset.
+ *
+ * Possible TODO: Proper hooking of TSC read/writes?
+ */
+
+static int rdtsc(struct guest_info * info) {
+ uint64_t tscval = v3_get_guest_tsc(&info->time_state);
+
+ info->vm_regs.rdx = tscval >> 32;
+ info->vm_regs.rax = tscval & 0xffffffffLL;
+
+ return 0;
+}
+
+int v3_handle_rdtsc(struct guest_info * info) {
+ rdtsc(info);
+
+ info->vm_regs.rax &= 0x00000000ffffffffLL;
+ info->vm_regs.rdx &= 0x00000000ffffffffLL;
+
+ info->rip += 2;
+
+ return 0;
+}
+
+int v3_rdtscp(struct guest_info * info) {
+ int ret;
+ /* First get the MSR value that we need. It's safe to futz with
+ * ra/c/dx here since they're modified by this instruction anyway. */
+ info->vm_regs.rcx = TSC_AUX_MSR;
+ ret = v3_handle_msr_read(info);
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ info->vm_regs.rcx = info->vm_regs.rax;
+
+ /* Now do the TSC half of the instruction */
+ ret = v3_rdtsc(info);
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+
+int v3_handle_rdtscp(struct guest_info * info) {
+ PrintDebug("Handling virtual RDTSCP call.\n");
+
+ v3_rdtscp(info);
+
+ info->vm_regs.rax &= 0x00000000ffffffffLL;
+ info->vm_regs.rcx &= 0x00000000ffffffffLL;
+ info->vm_regs.rdx &= 0x00000000ffffffffLL;
+
+ info->rip += 3;
+
+ return 0;
+}
+
+
+
+
+static int tsc_aux_msr_read_hook(struct guest_info *info, uint_t msr_num,
+ struct v3_msr *msr_val, void *priv) {
+ struct vm_time * time_state = &(info->time_state);
+
+ V3_ASSERT(msr_num == TSC_AUX_MSR);
+
+ msr_val->lo = time_state->tsc_aux.lo;
+ msr_val->hi = time_state->tsc_aux.hi;
+
+ return 0;
+}
+
+
+static int tsc_aux_msr_write_hook(struct guest_info *info, uint_t msr_num,
+ struct v3_msr msr_val, void *priv) {
+ struct vm_time * time_state = &(info->time_state);
+
+ V3_ASSERT(msr_num == TSC_AUX_MSR);
+
+ time_state->tsc_aux.lo = msr_val.lo;
+ time_state->tsc_aux.hi = msr_val.hi;
+
+ return 0;
+}
+
+
+static int tsc_msr_read_hook(struct guest_info *info, uint_t msr_num,
+ struct v3_msr *msr_val, void *priv) {
+ uint64_t time = v3_get_guest_tsc(&info->time_state);
+
+ V3_ASSERT(msr_num == TSC_MSR);
+
+ msr_val->hi = time >> 32;
+ msr_val->lo = time & 0xffffffffLL;
+
+ return 0;
+}
+
+
+static int tsc_msr_write_hook(struct guest_info *info, uint_t msr_num,
+ struct v3_msr msr_val, void *priv) {
+ struct vm_time * time_state = &(info->time_state);
+ uint64_t guest_time, new_tsc;
+
+ V3_ASSERT(msr_num == TSC_MSR);
+
+ new_tsc = (((uint64_t)msr_val.hi) << 32) | (uint64_t)msr_val.lo;
+ guest_time = v3_get_guest_time(time_state);
+ time_state->tsc_guest_offset = (sint64_t)new_tsc - (sint64_t)guest_time;
+
+ return 0;
+}
+
+
+static int deinit() {
+ v3_unhook_msr(vm, TSC_MSR);
+ v3_unhook_msr(vm, TSC_AUX_MSR);
+}
+
+
+static int init() {
+
+ time_state->tsc_aux.lo = 0;
+ time_state->tsc_aux.hi = 0;
+
+
+
+ PrintDebug("Installing TSC MSR hook.\n");
+ ret = v3_hook_msr(vm, TSC_MSR,
+ tsc_msr_read_hook, tsc_msr_write_hook, NULL);
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ PrintDebug("Installing TSC_AUX MSR hook.\n");
+ ret = v3_hook_msr(vm, TSC_AUX_MSR, tsc_aux_msr_read_hook,
+ tsc_aux_msr_write_hook, NULL);
+
+ if (ret != 0) {
+ return ret;
+ }
+}
--- /dev/null
+/** \file
+ * Do nothing module.
+ *
+ * This file only exists to appease the kbuild gods.
+ */
+
v3_host_dev_t v3_host_dev_open(char *impl,
v3_bus_class_t bus,
- v3_guest_dev_t gdev)
+ v3_guest_dev_t gdev,
+ struct v3_vm_info *vm)
{
V3_ASSERT(host_dev_hooks != NULL);
V3_ASSERT(host_dev_hooks->open != NULL);
- return host_dev_hooks->open(impl,bus,gdev);
+ return host_dev_hooks->open(impl,bus,gdev,vm->host_priv_data);
}
int v3_host_dev_close(v3_host_dev_t hdev)
V3_ASSERT(host_dev_hooks != NULL);
V3_ASSERT(host_dev_hooks->read_mem != NULL);
- return host_dev_hooks->read_mem(hdev,gpa,dst,len);
+ return host_dev_hooks->read_mem(hdev,(void*)gpa,dst,len);
}
uint64_t v3_host_dev_write_mem(v3_host_dev_t hdev,
V3_ASSERT(host_dev_hooks != NULL);
V3_ASSERT(host_dev_hooks->write_mem != NULL);
- return host_dev_hooks->write_mem(hdev,gpa,src,len);
+ return host_dev_hooks->write_mem(hdev,(void*)gpa,src,len);
}
uint64_t v3_host_dev_read_config(v3_host_dev_t hdev,
uint64_t v3_host_dev_read_guest_mem(v3_host_dev_t hostdev,
v3_guest_dev_t guest_dev,
- addr_t gpa,
+ void * gpa,
void *dst,
uint64_t len)
{
if (!vm) {
return 0;
} else {
- return v3_read_gpa_memory(&(vm->cores[0]), gpa, len, dst);
+ return v3_read_gpa_memory(&(vm->cores[0]), (addr_t)gpa, len, dst);
}
}
}
uint64_t v3_host_dev_write_guest_mem(v3_host_dev_t hostdev,
v3_guest_dev_t guest_dev,
- addr_t gpa,
+ void * gpa,
void *src,
uint64_t len)
{
if (!vm) {
return 0;
} else {
- return v3_write_gpa_memory(&(vm->cores[0]), gpa, len, src);
+ return v3_write_gpa_memory(&(vm->cores[0]), (addr_t)gpa, len, src);
}
}
}
vmm_binaries.o \
vmm_cpuid.o \
vmm_xml.o \
- vmm_muxer.o \
vmm_mem_hook.o \
vmm_mptable.o \
vmm_extensions.o \
vmm_multitree.o \
-obj-$(CONFIG_INSPECTOR) += vmm_inspector.o
obj-$(CONFIG_XED) += vmm_xed.o
vmx_io.o \
vmx_lowlevel.o \
vmx_msr.o \
+ vmx_hw_info.o \
vmcs.o \
vmx_ctrl_regs.o \
vmx_assist.o
#include <palacios/vm_guest_mem.h>
#include <palacios/vmm_lowlevel.h>
#include <palacios/vmm_sprintf.h>
-#include <palacios/vmm_muxer.h>
#include <palacios/vmm_xed.h>
#include <palacios/vmm_direct_paging.h>
int v3_init_vm(struct v3_vm_info * vm) {
v3_cpu_arch_t cpu_type = v3_get_cpu_type(V3_Get_CPU());
- if (v3_get_foreground_vm() == NULL) {
- v3_set_foreground_vm(vm);
- }
-#ifdef CONFIG_INSPECTOR
- v3_init_inspector(vm);
-#endif
#ifdef CONFIG_TELEMETRY
v3_init_telemetry(vm);
v3_cpu_arch_t cpu_type = v3_get_cpu_type(V3_Get_CPU());
struct v3_vm_info * vm = core->vm_info;
-#ifdef CONFIG_INSPECTOR
- v3_init_inspector_core(core);
-#endif
+
/*
* Initialize the subsystem data strutures
}
if (reg->flags.alloced == 0) {
- PrintError("In GPA->HPA: Tried to translate physical address of non allocated page (addr=%p)\n",
- (void *)gpa);
- v3_print_mem_map(info->vm_info);
+ //PrintError("In GPA->HPA: Tried to translate physical address of non allocated page (addr=%p)\n",
+ // (void *)gpa);
+ //v3_print_mem_map(info->vm_info);
return -1;
}
*hva = 0;
if (v3_gpa_to_hpa(guest_info, gpa, &hpa) != 0) {
- PrintError("In GPA->HVA: Invalid GPA(%p)->HPA lookup\n",
- (void *)gpa);
+ // PrintError("In GPA->HVA: Invalid GPA(%p)->HPA lookup\n",
+ // (void *)gpa);
return -1;
}
#ifdef __V3_32BIT__
print_vmcs_field(VMCS_IO_BITMAP_A_ADDR_HIGH);
#endif
+
print_vmcs_field(VMCS_IO_BITMAP_B_ADDR);
#ifdef __V3_32BIT__
print_vmcs_field(VMCS_IO_BITMAP_B_ADDR_HIGH);
/*
* Returns the field length in bytes
+ * It doesn't get much uglier than this... Thanks Intel
*/
int v3_vmcs_get_field_len(vmcs_field_t field) {
- switch(field) {
- /* 16 bit Control Fields */
- case VMCS_GUEST_ES_SELECTOR:
- case VMCS_GUEST_CS_SELECTOR:
- case VMCS_GUEST_SS_SELECTOR:
- case VMCS_GUEST_DS_SELECTOR:
- case VMCS_GUEST_FS_SELECTOR:
- case VMCS_GUEST_GS_SELECTOR:
- case VMCS_GUEST_LDTR_SELECTOR:
- case VMCS_GUEST_TR_SELECTOR:
- case VMCS_HOST_ES_SELECTOR:
- case VMCS_HOST_CS_SELECTOR:
- case VMCS_HOST_SS_SELECTOR:
- case VMCS_HOST_DS_SELECTOR:
- case VMCS_HOST_FS_SELECTOR:
- case VMCS_HOST_GS_SELECTOR:
- case VMCS_HOST_TR_SELECTOR:
- return 2;
-
- /* 32 bit Control Fields */
- case VMCS_PIN_CTRLS:
- case VMCS_PROC_CTRLS:
- case VMCS_SEC_PROC_CTRLS:
- case VMCS_EXCP_BITMAP:
- case VMCS_PG_FAULT_ERR_MASK:
- case VMCS_PG_FAULT_ERR_MATCH:
- case VMCS_CR3_TGT_CNT:
- case VMCS_EXIT_CTRLS:
- case VMCS_EXIT_MSR_STORE_CNT:
- case VMCS_EXIT_MSR_LOAD_CNT:
- case VMCS_ENTRY_CTRLS:
- case VMCS_ENTRY_MSR_LOAD_CNT:
- case VMCS_ENTRY_INT_INFO:
- case VMCS_ENTRY_EXCP_ERR:
- case VMCS_ENTRY_INSTR_LEN:
- case VMCS_TPR_THRESHOLD:
- case VMCS_INSTR_ERR:
- case VMCS_EXIT_REASON:
- case VMCS_EXIT_INT_INFO:
- case VMCS_EXIT_INT_ERR:
- case VMCS_IDT_VECTOR_INFO:
- case VMCS_IDT_VECTOR_ERR:
- case VMCS_EXIT_INSTR_LEN:
- case VMCS_EXIT_INSTR_INFO:
- case VMCS_GUEST_ES_LIMIT:
- case VMCS_GUEST_CS_LIMIT:
- case VMCS_GUEST_SS_LIMIT:
- case VMCS_GUEST_DS_LIMIT:
- case VMCS_GUEST_FS_LIMIT:
- case VMCS_GUEST_GS_LIMIT:
- case VMCS_GUEST_LDTR_LIMIT:
- case VMCS_GUEST_TR_LIMIT:
- case VMCS_GUEST_GDTR_LIMIT:
- case VMCS_GUEST_IDTR_LIMIT:
- case VMCS_GUEST_ES_ACCESS:
- case VMCS_GUEST_CS_ACCESS:
- case VMCS_GUEST_SS_ACCESS:
- case VMCS_GUEST_DS_ACCESS:
- case VMCS_GUEST_FS_ACCESS:
- case VMCS_GUEST_GS_ACCESS:
- case VMCS_GUEST_LDTR_ACCESS:
- case VMCS_GUEST_TR_ACCESS:
- case VMCS_GUEST_INT_STATE:
- case VMCS_GUEST_ACTIVITY_STATE:
- case VMCS_GUEST_SMBASE:
- case VMCS_GUEST_SYSENTER_CS:
- case VMCS_HOST_SYSENTER_CS:
- return 4;
+ struct vmcs_field_encoding * enc = (struct vmcs_field_encoding *)&field;
-
- /* high bits of variable width fields
- * We can probably just delete most of these....
- */
- case VMCS_IO_BITMAP_A_ADDR_HIGH:
- case VMCS_IO_BITMAP_B_ADDR_HIGH:
- case VMCS_MSR_BITMAP_HIGH:
- case VMCS_EXIT_MSR_STORE_ADDR_HIGH:
- case VMCS_EXIT_MSR_LOAD_ADDR_HIGH:
- case VMCS_ENTRY_MSR_LOAD_ADDR_HIGH:
- case VMCS_EXEC_PTR_HIGH:
- case VMCS_TSC_OFFSET_HIGH:
- case VMCS_VAPIC_ADDR_HIGH:
- case VMCS_APIC_ACCESS_ADDR_HIGH:
- case VMCS_LINK_PTR_HIGH:
- case VMCS_GUEST_DBG_CTL_HIGH:
- case VMCS_GUEST_PERF_GLOBAL_CTRL_HIGH:
- case VMCS_HOST_PERF_GLOBAL_CTRL_HIGH:
- case VMCS_GUEST_EFER_HIGH:
+ switch (enc->width) {
+ case 0:
+ return 2;
+ case 1: {
+ if (enc->access_type == 1) {
+ return 4;
+ } else {
+#ifdef __V3_64BIT__
+ return 8;
+#else
+ return 4;
+#endif
+ }
+ }
+ case 2:
return 4;
-
- /* Natural Width Control Fields */
- case VMCS_IO_BITMAP_A_ADDR:
- case VMCS_IO_BITMAP_B_ADDR:
- case VMCS_MSR_BITMAP:
- case VMCS_EXIT_MSR_STORE_ADDR:
- case VMCS_EXIT_MSR_LOAD_ADDR:
- case VMCS_ENTRY_MSR_LOAD_ADDR:
- case VMCS_EXEC_PTR:
- case VMCS_TSC_OFFSET:
- case VMCS_VAPIC_ADDR:
- case VMCS_APIC_ACCESS_ADDR:
- case VMCS_LINK_PTR:
- case VMCS_GUEST_DBG_CTL:
- case VMCS_GUEST_PERF_GLOBAL_CTRL:
- case VMCS_HOST_PERF_GLOBAL_CTRL:
- case VMCS_CR0_MASK:
- case VMCS_CR4_MASK:
- case VMCS_CR0_READ_SHDW:
- case VMCS_CR4_READ_SHDW:
- case VMCS_CR3_TGT_VAL_0:
- case VMCS_CR3_TGT_VAL_1:
- case VMCS_CR3_TGT_VAL_2:
- case VMCS_CR3_TGT_VAL_3:
- case VMCS_EXIT_QUAL:
- case VMCS_IO_RCX:
- case VMCS_IO_RSI:
- case VMCS_IO_RDI:
- case VMCS_IO_RIP:
- case VMCS_GUEST_LINEAR_ADDR:
- case VMCS_GUEST_CR0:
- case VMCS_GUEST_CR3:
- case VMCS_GUEST_CR4:
- case VMCS_GUEST_ES_BASE:
- case VMCS_GUEST_CS_BASE:
- case VMCS_GUEST_SS_BASE:
- case VMCS_GUEST_DS_BASE:
- case VMCS_GUEST_FS_BASE:
- case VMCS_GUEST_GS_BASE:
- case VMCS_GUEST_LDTR_BASE:
- case VMCS_GUEST_TR_BASE:
- case VMCS_GUEST_GDTR_BASE:
- case VMCS_GUEST_IDTR_BASE:
- case VMCS_GUEST_DR7:
- case VMCS_GUEST_RSP:
- case VMCS_GUEST_RIP:
- case VMCS_GUEST_RFLAGS:
- case VMCS_GUEST_PENDING_DBG_EXCP:
- case VMCS_GUEST_SYSENTER_ESP:
- case VMCS_GUEST_SYSENTER_EIP:
- case VMCS_HOST_CR0:
- case VMCS_HOST_CR3:
- case VMCS_HOST_CR4:
- case VMCS_HOST_FS_BASE:
- case VMCS_HOST_GS_BASE:
- case VMCS_HOST_TR_BASE:
- case VMCS_HOST_GDTR_BASE:
- case VMCS_HOST_IDTR_BASE:
- case VMCS_HOST_SYSENTER_ESP:
- case VMCS_HOST_SYSENTER_EIP:
- case VMCS_HOST_RSP:
- case VMCS_HOST_RIP:
- case VMCS_GUEST_EFER:
+ case 3:
return sizeof(addr_t);
-
default:
PrintError("Invalid VMCS field: 0x%x\n", field);
return -1;
struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
- V3_Print("CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
-
-
if (vm == NULL) {
PrintError("Could not configure guest\n");
return NULL;
}
+ V3_Print("CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
+
if (name == NULL) {
name = "[V3_VM]";
} else if (strlen(name) >= 128) {
break;
}
- V3_Print("Yielding\n");
-
v3_yield(NULL);
}
-/*
+/*
* This file is part of the Palacios Virtual Machine Monitor developed
* by the V3VEE Project with funding from the United States National
* Science Foundation and the Department of Energy.
* and the University of New Mexico. You can find out more at
* http://www.v3vee.org
*
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
* All rights reserved.
*
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ * Author: Jack Lange <jacklangel@cs.pitt.edu>
*
* This is free software. You are permitted to use,
* redistribute, and modify it as specified in the file "V3VEE_LICENSE".
*/
-#ifndef __VMM_MUXER_H__
-#define __VMM_MUXER_H__
-#ifdef __V3VEE__
-
-
-struct v3_vm_info;
-
-
-
-struct v3_vm_info * v3_get_foreground_vm();
-void v3_set_foreground_vm(struct v3_vm_info * vm);
-
-
-int v3_add_mux_notification(int (*focus_change)(struct v3_vm_info * old_vm, struct v3_vm_info * new_vm));
-
-
-#endif
-
-#endif
+#include <util/vmm_barrier.h>
info->core_run_state = CORE_STOPPED;
+ if (v3_init_core_extensions(info) == -1) {
+ PrintError("Error intializing extension core states\n");
+ return -1;
+ }
+
if (info->vm_info->vm_class == V3_PC_VM) {
if (post_config_pc_core(info, cfg) == -1) {
PrintError("PC Post configuration failure\n");
+
static int setup_memory_map(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
v3_cfg_tree_t * mem_region = v3_cfg_subtree(v3_cfg_subtree(cfg, "memmap"), "region");
}
- if (vm->num_cores > 1) {
+ if (vm->num_cores>1 && !v3_find_dev(vm,"apic")) {
+ PrintError("palacios: VM has more than one core, but no device named \"apic\"!\n");
+ return -1;
+ }
+
+ if (v3_find_dev(vm,"apic")) {
+ if (!v3_find_dev(vm,"ioapic")) {
+ PrintError("palacios: VM cores have apics, but there is no device named \"ioapic\"!\n");
+ }
if (v3_inject_mptable(vm) == -1) {
PrintError("Failed to inject mptable during configuration\n");
return -1;
}
}
-
+
return 0;
}
* and the University of New Mexico. You can find out more at
* http://www.v3vee.org
*
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu>
* All rights reserved.
*
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
*
* This is free software. You are permitted to use,
* redistribute, and modify it as specified in the file "V3VEE_LICENSE".
#include <palacios/vmm_lowlevel.h>
#include <palacios/vm_guest.h>
+struct masked_cpuid {
+ uint32_t rax_mask;
+ uint32_t rbx_mask;
+ uint32_t rcx_mask;
+ uint32_t rdx_mask;
+
+ uint32_t rax;
+ uint32_t rbx;
+ uint32_t rcx;
+ uint32_t rdx;
+};
+
void v3_init_cpuid_map(struct v3_vm_info * vm) {
vm->cpuid_map.map.rb_node = NULL;
+
+ // Setup default cpuid entries
+
+
+ // Disable XSAVE (cpuid 0x01, ECX bit 26)
+ v3_cpuid_add_fields(vm, 0x01, 0, 0, 0, 0, (1 << 26), 0, 0, 0);
+
}
+
+
+
int v3_deinit_cpuid_map(struct v3_vm_info * vm) {
struct rb_node * node = v3_rb_first(&(vm->cpuid_map.map));
struct v3_cpuid_hook * hook = NULL;
}
+
+static int mask_hook(struct guest_info * core, uint32_t cpuid,
+ uint32_t * eax, uint32_t * ebx,
+ uint32_t * ecx, uint32_t * edx,
+ void * priv_data) {
+ struct masked_cpuid * mask = (struct masked_cpuid *)priv_data;
+
+ v3_cpuid(cpuid, eax, ebx, ecx, edx);
+
+ *eax &= ~(mask->rax_mask);
+ *eax |= mask->rax;
+
+ *ebx &= ~(mask->rbx_mask);
+ *ebx |= mask->rbx;
+
+ *ecx &= ~(mask->rcx_mask);
+ *ecx |= mask->rcx;
+
+ *edx &= ~(mask->rdx_mask);
+ *edx |= mask->rdx;
+
+ return 0;
+}
+
+int v3_cpuid_add_fields(struct v3_vm_info * vm, uint32_t cpuid,
+ uint32_t rax_mask, uint32_t rax,
+ uint32_t rbx_mask, uint32_t rbx,
+ uint32_t rcx_mask, uint32_t rcx,
+ uint32_t rdx_mask, uint32_t rdx) {
+ struct v3_cpuid_hook * hook = get_cpuid_hook(vm, cpuid);
+
+ if (hook == NULL) {
+ struct masked_cpuid * mask = V3_Malloc(sizeof(struct masked_cpuid));
+ memset(mask, 0, sizeof(struct masked_cpuid));
+
+ mask->rax_mask = rax_mask;
+ mask->rax = rax;
+ mask->rbx_mask = rbx_mask;
+ mask->rbx = rbx;
+ mask->rcx_mask = rcx_mask;
+ mask->rcx = rcx;
+ mask->rdx_mask = rdx_mask;
+ mask->rdx = rdx;
+
+ if (v3_hook_cpuid(vm, cpuid, mask_hook, mask) == -1) {
+ PrintError("Error hooking cpuid %d\n", cpuid);
+ return -1;
+ }
+ } else {
+ struct masked_cpuid * mask = NULL;
+ uint32_t tmp_val = 0;
+
+ if (hook->hook_fn != mask_hook) {
+ PrintError("trying to add fields to a fully hooked cpuid (%d)\n", cpuid);
+ return -1;
+ }
+
+ mask = (struct masked_cpuid *)(hook->private_data);
+
+ if ((mask->rax_mask & rax_mask) ||
+ (mask->rbx_mask & rbx_mask) ||
+ (mask->rcx_mask & rcx_mask) ||
+ (mask->rdx_mask & rdx_mask)) {
+ PrintError("Trying to add fields that have already been masked\n");
+ return -1;
+ }
+
+ if ((~rax_mask & rax) || (~rbx_mask & rbx) ||
+ (~rcx_mask & rcx) || (~rdx_mask & rdx)) {
+ PrintError("Invalid cpuid reg value (mask overrun)\n");
+ return -1;
+ }
+
+ mask->rax_mask |= rax_mask;
+ mask->rbx_mask |= rbx_mask;
+ mask->rcx_mask |= rcx_mask;
+ mask->rdx_mask |= rdx_mask;
+
+ mask->rax |= rax;
+ tmp_val = (~rax_mask | rax);
+ mask->rax &= tmp_val;
+
+ mask->rbx |= rbx;
+ tmp_val = (~rbx_mask | rbx);
+ mask->rbx &= tmp_val;
+
+ mask->rcx |= rcx;
+ tmp_val = (~rcx_mask | rcx);
+ mask->rcx &= tmp_val;
+
+ mask->rdx |= rdx;
+ tmp_val = (~rdx_mask | rdx);
+ mask->rdx &= tmp_val;
+
+ }
+
+ return 0;
+}
+
int v3_unhook_cpuid(struct v3_vm_info * vm, uint32_t cpuid) {
struct v3_cpuid_hook * hook = get_cpuid_hook(vm, cpuid);
return 0;
}
+
+
+
+
+
struct rflags * flags_reg = (struct rflags *)&(core->ctrl_regs.rflags);
- PrintError("Emulation_len=%d, tmp_rcx=%d\n", emulation_length, (uint_t)tmp_rcx);
+ PrintDebug("Emulation_len=%d, tmp_rcx=%d\n", emulation_length, (uint_t)tmp_rcx);
if (instr->op_type == V3_OP_MOVS) {
}
+
+
int V3_deinit_extensions() {
v3_free_htable(ext_table, 0, 0);
return 0;
return 0;
}
+
+int v3_deinit_ext_manager(struct v3_vm_info * vm) {
+
+ PrintError("I should really do something here... \n");
+ return -1;
+}
+
+
+
int v3_add_extension(struct v3_vm_info * vm, const char * name, v3_cfg_tree_t * cfg) {
struct v3_extension_impl * impl = NULL;
struct v3_extension * ext = NULL;
return 0;
}
+
+int v3_init_core_extensions(struct guest_info * core) {
+ struct v3_extension * ext = NULL;
+
+ list_for_each_entry(ext, &(core->vm_info->extensions.extensions), node) {
+ if ((ext->impl) && (ext->impl->core_init)) {
+ if (ext->impl->core_init(core, ext->priv_data) == -1) {
+ PrintError("Error configuring per core extension %s on core %d\n",
+ ext->impl->name, core->cpu_id);
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+
+
+void * v3_get_extension_state(struct v3_vm_info * vm, const char * name) {
+ struct v3_extension * ext = NULL;
+
+ list_for_each_entry(ext, &(vm->extensions.extensions), node) {
+ if (strncmp(ext->impl->name, name, strlen(ext->impl->name)) == 0) {
+ return ext->priv_data;
+ }
+ }
+
+ return NULL;
+}
#include <palacios/vmm.h>
#include <palacios/vmm_host_events.h>
#include <palacios/vm_guest.h>
-#include <palacios/vmm_muxer.h>
int v3_init_host_events(struct v3_vm_info * vm) {
struct v3_host_events * host_evts = &(vm->host_event_hooks);
struct v3_host_events * host_evts = NULL;
struct v3_host_event_hook * hook = NULL;
- if (vm == NULL) {
- vm = v3_get_foreground_vm();
- }
host_evts = &(vm->host_event_hooks);
struct v3_host_events * host_evts = NULL;
struct v3_host_event_hook * hook = NULL;
- if (vm == NULL) {
- vm = v3_get_foreground_vm();
- }
host_evts = &(vm->host_event_hooks);
struct v3_host_events * host_evts = NULL;
struct v3_host_event_hook * hook = NULL;
- if (vm == NULL) {
- vm = v3_get_foreground_vm();
- }
host_evts = &(vm->host_event_hooks);
struct v3_host_events * host_evts = NULL;
struct v3_host_event_hook * hook = NULL;
- if (vm == NULL) {
- vm = v3_get_foreground_vm();
- }
host_evts = &(vm->host_event_hooks);
struct v3_host_events * host_evts = NULL;
struct v3_host_event_hook * hook = NULL;
- if (vm == NULL) {
- vm = v3_get_foreground_vm();
- }
host_evts = &(vm->host_event_hooks);
struct v3_host_events * host_evts = NULL;
struct v3_host_event_hook * hook = NULL;
- if (vm == NULL) {
- vm = v3_get_foreground_vm();
- }
host_evts = &(vm->host_event_hooks);
+++ /dev/null
-/*
- * This file is part of the Palacios Virtual Machine Monitor developed
- * by the V3VEE Project with funding from the United States National
- * Science Foundation and the Department of Energy.
- *
- * The V3VEE Project is a joint project between Northwestern University
- * and the University of New Mexico. You can find out more at
- * http://www.v3vee.org
- *
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
- * All rights reserved.
- *
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
- *
- * This is free software. You are permitted to use,
- * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
- */
-
-#include <palacios/vmm.h>
-#include <palacios/vmm_muxer.h>
-#include <palacios/vmm_list.h>
-
-
-
-static struct v3_vm_info * foreground_vm = NULL;
-
-// list of notification callbacks
-static LIST_HEAD(cb_list);
-
-
-struct mux_callback {
- struct list_head cb_node;
-
- int (*focus_change)(struct v3_vm_info * old_vm, struct v3_vm_info * new_vm);
-};
-
-
-struct v3_vm_info * v3_get_foreground_vm() {
- return foreground_vm;
-}
-
-
-void v3_set_foreground_vm(struct v3_vm_info * vm) {
- struct mux_callback * tmp_cb;
-
- list_for_each_entry(tmp_cb, &(cb_list), cb_node) {
- tmp_cb->focus_change(foreground_vm, vm);
- }
-
- foreground_vm = vm;
-}
-
-
-int v3_add_mux_notification(int (*focus_change)(struct v3_vm_info * old_vm,
- struct v3_vm_info * new_vm)) {
-
- struct mux_callback * cb = (struct mux_callback *)V3_Malloc(sizeof(struct mux_callback));
-
- cb->focus_change = focus_change;
-
- list_add(&(cb->cb_node), &cb_list);
-
- return 0;
-}
#include <palacios/vmm_queue.h>
-void v3_init_queue(struct gen_queue * queue) {
+void v3_init_queue(struct v3_queue * queue) {
queue->num_entries = 0;
INIT_LIST_HEAD(&(queue->entries));
v3_lock_init(&queue->lock);
}
-struct gen_queue * v3_create_queue() {
- struct gen_queue * tmp_queue = V3_Malloc(sizeof(struct gen_queue));
+struct v3_queue * v3_create_queue() {
+ struct v3_queue * tmp_queue = V3_Malloc(sizeof(struct v3_queue));
v3_init_queue(tmp_queue);
return tmp_queue;
}
-void v3_enqueue(struct gen_queue * queue, addr_t entry) {
- struct queue_entry * q_entry = V3_Malloc(sizeof(struct queue_entry));
+void v3_enqueue(struct v3_queue * queue, addr_t entry) {
+ struct v3_queue_entry * q_entry = V3_Malloc(sizeof(struct v3_queue_entry));
v3_lock(queue->lock);
q_entry->entry = entry;
}
-addr_t v3_dequeue(struct gen_queue * queue) {
+addr_t v3_dequeue(struct v3_queue * queue) {
addr_t entry_val = 0;
v3_lock(queue->lock);
if (!list_empty(&(queue->entries))) {
struct list_head * q_entry = queue->entries.next;
- struct queue_entry * tmp_entry = list_entry(q_entry, struct queue_entry, entry_list);
+ struct v3_queue_entry * tmp_entry = list_entry(q_entry, struct v3_queue_entry, entry_list);
entry_val = tmp_entry->entry;
list_del(q_entry);
#define PrintDebug(fmt, args...)
#endif
+int v3_net_debug = 0;
+
struct eth_hdr {
uint8_t dst_mac[ETH_ALEN];
uint8_t src_mac[ETH_ALEN];
struct v3_vnet_dev_ops dev_ops;
void * private_data;
- int active;
-
- uint64_t bytes_tx, bytes_rx;
- uint32_t pkts_tx, pkt_rx;
-
struct list_head node;
} __attribute__((packed));
uint8_t type;
- int active;
void * private_data;
} __attribute__((packed));
} __attribute__((packed));
+struct queue_entry{
+ uint8_t use;
+ struct v3_vnet_pkt pkt;
+ uint8_t data[ETHERNET_PACKET_LEN];
+};
+
+#define VNET_QUEUE_SIZE 10240
+struct vnet_queue {
+ struct queue_entry buf[VNET_QUEUE_SIZE];
+ int head, tail;
+ int count;
+ v3_lock_t lock;
+};
+
static struct {
struct list_head routes;
struct list_head devs;
v3_lock_t lock;
struct vnet_stat stats;
- struct hashtable * route_cache;
-} vnet_state;
+ void * pkt_flush_thread;
+ struct vnet_queue pkt_q;
+ struct hashtable * route_cache;
+} vnet_state;
+
#ifdef CONFIG_DEBUG_VNET
static inline void mac_to_string(uint8_t * mac, char * buf) {
return 0;
}
-static int look_into_cache(const struct v3_vnet_pkt * pkt, struct route_list ** routes) {
+static int look_into_cache(const struct v3_vnet_pkt * pkt,
+ struct route_list ** routes) {
*routes = (struct route_list *)v3_htable_search(vnet_state.route_cache, (addr_t)(pkt->hash_buf));
return 0;
int max_rank = 0;
struct list_head match_list;
struct eth_hdr * hdr = (struct eth_hdr *)(pkt->data);
-// uint8_t src_type = pkt->src_type;
- // uint32_t src_link = pkt->src_id;
+ // uint8_t src_type = pkt->src_type;
+ // uint32_t src_link = pkt->src_id;
#ifdef CONFIG_DEBUG_VNET
{
}
-int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
+int vnet_tx_one_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
struct route_list * matched_routes = NULL;
unsigned long flags;
int i;
-#ifdef CONFIG_DEBUG_VNET
- {
- int cpu = V3_Get_CPU();
- PrintDebug("VNET/P Core: cpu %d: pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n",
+ int cpu = V3_Get_CPU();
+ V3_Net_Print(2, "VNET/P Core: cpu %d: pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n",
cpu, pkt->size, pkt->src_id,
pkt->src_type, pkt->dst_id, pkt->dst_type);
- }
-#endif
+ if(v3_net_debug >= 4){
+ v3_hexdump(pkt->data, pkt->size, NULL, 0);
+ }
flags = v3_lock_irqsave(vnet_state.lock);
for (i = 0; i < matched_routes->num_routes; i++) {
struct vnet_route_info * route = matched_routes->routes[i];
- if (route->route_def.dst_type == LINK_EDGE) {
- struct vnet_brg_dev *bridge = vnet_state.bridge;
- pkt->dst_type = LINK_EDGE;
- pkt->dst_id = route->route_def.dst_id;
+ if (route->route_def.dst_type == LINK_EDGE) {
+ struct vnet_brg_dev * bridge = vnet_state.bridge;
+ pkt->dst_type = LINK_EDGE;
+ pkt->dst_id = route->route_def.dst_id;
- if (bridge == NULL || (bridge->active == 0)) {
- PrintDebug("VNET/P Core: No active bridge to sent data to\n");
+ if (bridge == NULL) {
+ V3_Net_Print(2, "VNET/P Core: No active bridge to sent data to\n");
continue;
}
if(bridge->brg_ops.input(bridge->vm, pkt, bridge->private_data) < 0){
- PrintDebug("VNET/P Core: Packet not sent properly to bridge\n");
+ V3_Net_Print(2, "VNET/P Core: Packet not sent properly to bridge\n");
continue;
}
vnet_state.stats.tx_bytes += pkt->size;
vnet_state.stats.tx_pkts ++;
} else if (route->route_def.dst_type == LINK_INTERFACE) {
- if (route->dst_dev == NULL || route->dst_dev->active == 0){
- PrintDebug("VNET/P Core: No active device to sent data to\n");
+ if (route->dst_dev == NULL){
+ V3_Net_Print(2, "VNET/P Core: No active device to sent data to\n");
continue;
}
if(route->dst_dev->dev_ops.input(route->dst_dev->vm, pkt, route->dst_dev->private_data) < 0) {
- PrintDebug("VNET/P Core: Packet not sent properly\n");
+ V3_Net_Print(2, "VNET/P Core: Packet not sent properly\n");
continue;
}
vnet_state.stats.tx_bytes += pkt->size;
return 0;
}
+
+static int vnet_pkt_enqueue(struct v3_vnet_pkt * pkt){
+ unsigned long flags;
+ struct queue_entry * entry;
+ struct vnet_queue * q = &(vnet_state.pkt_q);
+
+ flags = v3_lock_irqsave(q->lock);
+
+ if (q->count >= VNET_QUEUE_SIZE){
+ V3_Net_Print(1, "VNET Queue overflow!\n");
+ v3_unlock_irqrestore(q->lock, flags);
+ return -1;
+ }
+
+ q->count ++;
+ entry = &(q->buf[q->tail++]);
+ q->tail %= VNET_QUEUE_SIZE;
+
+ v3_unlock_irqrestore(q->lock, flags);
+
+ /* this is ugly, but should happen very unlikely */
+ while(entry->use);
+
+ entry->pkt.data = entry->data;
+ memcpy(&(entry->pkt), pkt, sizeof(struct v3_vnet_pkt));
+ memcpy(entry->data, pkt->data, pkt->size);
+
+ entry->use = 1;
+
+ return 0;
+}
+
+
+int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data, int synchronize) {
+ if(synchronize){
+ vnet_tx_one_pkt(pkt, NULL);
+ }else {
+ vnet_pkt_enqueue(pkt);
+ V3_Net_Print(2, "VNET/P Core: Put pkt into Queue: pkt size %d\n", pkt->size);
+ }
+
+ return 0;
+}
+
int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac,
struct v3_vnet_dev_ops *ops,
void * priv_data){
memcpy(new_dev->mac_addr, mac, 6);
new_dev->dev_ops.input = ops->input;
- new_dev->dev_ops.poll = ops->poll;
new_dev->private_data = priv_data;
new_dev->vm = vm;
new_dev->dev_id = 0;
- new_dev->active = 1;
flags = v3_lock_irqsave(vnet_state.lock);
}
-
int v3_vnet_del_dev(int dev_id){
struct vnet_dev * dev = NULL;
unsigned long flags;
return 0;
}
+
int v3_vnet_stat(struct vnet_stat * stats){
stats->rx_bytes = vnet_state.stats.rx_bytes;
struct vnet_brg_dev * tmp_bridge = NULL;
flags = v3_lock_irqsave(vnet_state.lock);
-
if (vnet_state.bridge == NULL) {
bridge_free = 1;
vnet_state.bridge = (void *)1;
}
-
v3_unlock_irqrestore(vnet_state.lock, flags);
if (bridge_free == 0) {
tmp_bridge->brg_ops.input = ops->input;
tmp_bridge->brg_ops.poll = ops->poll;
tmp_bridge->private_data = priv_data;
- tmp_bridge->active = 1;
tmp_bridge->type = type;
/* make this atomic to avoid possible race conditions */
}
-void v3_vnet_do_poll(struct v3_vm_info * vm){
- struct vnet_dev * dev = NULL;
+static int vnet_tx_flush(void *args){
+ unsigned long flags;
+ struct queue_entry * entry;
+ struct vnet_queue * q = &(vnet_state.pkt_q);
- /* TODO: run this on separate threads
- * round-robin schedule, with maximal budget for each poll
- */
- list_for_each_entry(dev, &(vnet_state.devs), node) {
- if(dev->dev_ops.poll != NULL){
- dev->dev_ops.poll(vm, -1, dev->private_data);
- }
+ V3_Print("VNET/P Handing Pkt Thread Starting ....\n");
+
+ //V3_THREAD_SLEEP();
+ /* we need thread sleep/wakeup in Palacios */
+ while(1){
+ flags = v3_lock_irqsave(q->lock);
+
+ if (q->count <= 0){
+ v3_unlock_irqrestore(q->lock, flags);
+ v3_yield(NULL);
+ //V3_THREAD_SLEEP();
+ }else {
+ q->count --;
+ entry = &(q->buf[q->head++]);
+ q->head %= VNET_QUEUE_SIZE;
+
+ v3_unlock_irqrestore(q->lock, flags);
+
+ /* this is ugly, but should happen very unlikely */
+ while(!entry->use);
+ vnet_tx_one_pkt(&(entry->pkt), NULL);
+ entry->use = 0;
+
+ V3_Net_Print(2, "vnet_tx_flush: pkt (size %d)\n", entry->pkt.size);
+ }
}
}
-
int v3_init_vnet() {
memset(&vnet_state, 0, sizeof(vnet_state));
}
vnet_state.route_cache = v3_create_htable(0, &hash_fn, &hash_eq);
-
if (vnet_state.route_cache == NULL) {
PrintError("VNET/P Core: Fails to initiate route cache\n");
return -1;
}
+ v3_lock_init(&(vnet_state.pkt_q.lock));
+
+ vnet_state.pkt_flush_thread = V3_CREATE_THREAD(vnet_tx_flush, NULL, "VNET_Pkts");
+
PrintDebug("VNET/P Core is initiated\n");
return 0;
}
}
- V3_Print("Operand 0 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op)));
+// V3_Print("Operand 0 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op)));
if (xed_operand_read(op)) {
}
}
- V3_Print("Operand 1 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op)));
+// V3_Print("Operand 1 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op)));
if (xed_operand_read(op)) {
v3_op->read = 1;
#include <palacios/vmx_io.h>
#include <palacios/vmx_msr.h>
+#include <palacios/vmx_hw_info.h>
#ifndef CONFIG_DEBUG_VMX
#undef PrintDebug
#endif
-static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
+/* These fields contain the hardware feature sets supported by the local CPU */
+static struct vmx_hw_info hw_info;
+
+
static addr_t active_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
+static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
int ret = 0;
- ret = vmcs_write(field,val);
+ ret = vmcs_write(field, val);
if (ret != VMX_SUCCESS) {
PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
static addr_t allocate_vmcs() {
- reg_ex_t msr;
struct vmcs_data * vmcs_page = NULL;
PrintDebug("Allocating page\n");
vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
memset(vmcs_page, 0, 4096);
- v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
-
- vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision;
- PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision);
+ vmcs_page->revision = hw_info.basic_info.revision;
+ PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
return (addr_t)V3_PAddr((void *)vmcs_page);
}
// reenable global interrupts for vm state initialization now
// that the vm state is initialized. If another VM kicks us off,
// it'll update our vmx state so that we know to reload ourself
- v3_disable_ints();
+ v3_enable_ints();
return 0;
}
// disable global interrupts for vm state transition
v3_disable_ints();
+
+ if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
+ vmcs_load(vmx_info->vmcs_ptr_phys);
+ active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
+ }
+
+
v3_vmx_restore_vmcs(info);
check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
- if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
- vmcs_load(vmx_info->vmcs_ptr_phys);
- active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
- }
if (vmx_info->state == VMX_UNLAUNCHED) {
vmx_info->state = VMX_LAUNCHED;
update_irq_exit_state(info);
#endif
- // Handle any exits needed still in the atomic section
- if (v3_handle_atomic_vmx_exit(info, &exit_info) == -1) {
- PrintError("Error in atomic VMX exit handler\n");
- return -1;
+ if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
+ // This is a special case whose only job is to inject an interrupt
+ vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
+ vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
+ vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
+
+#ifdef CONFIG_DEBUG_INTERRUPTS
+ PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip);
+#endif
}
// reenable global interrupts after vm exit
}
+
+
+#define VMX_FEATURE_CONTROL_MSR 0x0000003a
+#define CPUID_VMX_FEATURES 0x00000005 /* LOCK and VMXON */
+#define CPUID_1_ECX_VTXFLAG 0x00000020
+
int v3_is_vmx_capable() {
v3_msr_t feature_msr;
uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
- if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
+ if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
PrintDebug("VMX is locked -- enable in the BIOS\n");
return 0;
}
return 1;
}
-static int has_vmx_nested_paging() {
- return 0;
-}
-void v3_init_vmx_cpu(int cpu_id) {
- extern v3_cpu_arch_t v3_cpu_types[];
- struct v3_msr tmp_msr;
- uint64_t ret = 0;
- v3_get_msr(VMX_CR4_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
-#ifdef __V3_64BIT__
- __asm__ __volatile__ (
- "movq %%cr4, %%rbx;"
- "orq $0x00002000, %%rbx;"
- "movq %%rbx, %0;"
- : "=m"(ret)
- :
- : "%rbx"
- );
-
- if ((~ret & tmp_msr.value) == 0) {
- __asm__ __volatile__ (
- "movq %0, %%cr4;"
- :
- : "q"(ret)
- );
- } else {
- PrintError("Invalid CR4 Settings!\n");
- return;
- }
+void v3_init_vmx_cpu(int cpu_id) {
+ extern v3_cpu_arch_t v3_cpu_types[];
- __asm__ __volatile__ (
- "movq %%cr0, %%rbx; "
- "orq $0x00000020,%%rbx; "
- "movq %%rbx, %%cr0;"
- :
- :
- : "%rbx"
- );
-#elif __V3_32BIT__
- __asm__ __volatile__ (
- "movl %%cr4, %%ecx;"
- "orl $0x00002000, %%ecx;"
- "movl %%ecx, %0;"
- : "=m"(ret)
- :
- : "%ecx"
- );
-
- if ((~ret & tmp_msr.value) == 0) {
- __asm__ __volatile__ (
- "movl %0, %%cr4;"
- :
- : "q"(ret)
- );
- } else {
- PrintError("Invalid CR4 Settings!\n");
- return;
+ if (cpu_id == 0) {
+ if (v3_init_vmx_hw(&hw_info) == -1) {
+ PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
+ return;
+ }
}
- __asm__ __volatile__ (
- "movl %%cr0, %%ecx; "
- "orl $0x00000020,%%ecx; "
- "movl %%ecx, %%cr0;"
- :
- :
- : "%ecx"
- );
-#endif
-
- //
- // Should check and return Error here....
+ enable_vmx();
// Setup VMXON Region
PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
- if (v3_enable_vmx(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
+ if (vmx_on(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
PrintDebug("VMX Enabled\n");
} else {
PrintError("VMX initialization failure\n");
}
- if (has_vmx_nested_paging() == 1) {
- v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
- } else {
- v3_cpu_types[cpu_id] = V3_VMX_CPU;
- }
+ v3_cpu_types[cpu_id] = V3_VMX_CPU;
+
}
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu>
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
#endif
/* At this point the GPRs are already copied into the guest_info state */
-int v3_handle_atomic_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_info) {
- struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
-
- switch (exit_info->exit_reason) {
- case VMEXIT_INTR_WINDOW:
- // This is here because we touch the VMCS
- vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
- vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
- vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
-
-#ifdef CONFIG_DEBUG_INTERRUPTS
- PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip);
-#endif
- break;
- }
- return 0;
-}
-
-/* At this point the GPRs are already copied into the guest_info state */
int v3_handle_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_info) {
/*
PrintError("Handling VMEXIT: %s (%u), %lu (0x%lx)\n",
--- /dev/null
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico. You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu>
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software. You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_lowlevel.h>
+#include <palacios/vmx_hw_info.h>
+#include <palacios/vmm_msr.h>
+
+// Intel VMX Feature MSRs
+
+
+
+static int get_ex_ctrl_caps(struct vmx_hw_info * hw_info, struct vmx_ctrl_field * field,
+ uint32_t old_msr, uint32_t true_msr) {
+ uint32_t old_0; /* Bit is 1 => MB1 */
+ uint32_t old_1; /* Bit is 0 => MBZ */
+ uint32_t true_0; /* Bit is 1 => MB1 */
+ uint32_t true_1; /* Bit is 0 => MBZ */
+
+ v3_get_msr(old_msr, &old_1, &old_0);
+ field->def_val = old_0;
+
+ if (hw_info->basic_info.def1_maybe_0) {
+ v3_get_msr(true_msr, &true_1, &true_0);
+ } else {
+ true_0 = old_0;
+ true_1 = old_1;
+ }
+
+ field->req_val = true_0;
+ field->req_mask = ~(true_1 ^ true_0);
+
+ return 0;
+}
+
+
+static int get_ctrl_caps(struct vmx_ctrl_field * field, uint32_t msr) {
+ uint32_t mbz = 0; /* Bit is 0 => MBZ */
+ uint32_t mb1 = 0; /* Bit is 1 => MB1 */
+
+ v3_get_msr(msr, &mbz, &mb1);
+
+ field->def_val = mb1;
+ field->req_val = mb1;
+ field->req_mask = ~(mbz ^ mb1);
+
+ return 0;
+}
+
+
+
+static int get_cr_fields(struct vmx_cr_field * field, uint32_t fixed_1_msr, uint32_t fixed_0_msr) {
+ struct v3_msr mbz; /* Bit is 0 => MBZ */
+ struct v3_msr mb1; /* Bit is 0 => MBZ */
+
+ v3_get_msr(fixed_1_msr, &(mbz.hi), &(mbz.lo));
+ v3_get_msr(fixed_0_msr, &(mb1.hi), &(mb1.lo));
+
+ field->def_val = mb1.value;
+ field->req_val = mb1.value;
+ field->req_mask = ~(mbz.value ^ mb1.value);
+
+ return 0;
+}
+
+
+
+
+
+int v3_init_vmx_hw(struct vmx_hw_info * hw_info) {
+ // extern v3_cpu_arch_t v3_cpu_types[];
+
+ memset(hw_info, 0, sizeof(struct vmx_hw_info));
+
+ v3_get_msr(VMX_BASIC_MSR, &(hw_info->basic_info.hi), &(hw_info->basic_info.lo));
+ v3_get_msr(VMX_MISC_MSR, &(hw_info->misc_info.hi), &(hw_info->misc_info.lo));
+ v3_get_msr(VMX_EPT_VPID_CAP_MSR, &(hw_info->ept_info.hi), &(hw_info->ept_info.lo));
+
+ PrintError("BASIC_MSR: Lo: %x, Hi: %x\n", hw_info->basic_info.lo, hw_info->basic_info.hi);
+
+ get_ex_ctrl_caps(hw_info, &(hw_info->pin_ctrls), VMX_PINBASED_CTLS_MSR, VMX_TRUE_PINBASED_CTLS_MSR);
+ get_ex_ctrl_caps(hw_info, &(hw_info->proc_ctrls), VMX_PROCBASED_CTLS_MSR, VMX_TRUE_PROCBASED_CTLS_MSR);
+ get_ex_ctrl_caps(hw_info, &(hw_info->exit_ctrls), VMX_EXIT_CTLS_MSR, VMX_TRUE_EXIT_CTLS_MSR);
+ get_ex_ctrl_caps(hw_info, &(hw_info->entry_ctrls), VMX_ENTRY_CTLS_MSR, VMX_TRUE_ENTRY_CTLS_MSR);
+
+ /* Get secondary PROCBASED controls if secondary controls are available (optional or required) */
+ /* Intel Manual 3B. Sect. G.3.3 */
+ if ( ((hw_info->proc_ctrls.req_mask & 0x80000000) == 0) ||
+ ((hw_info->proc_ctrls.req_val & 0x80000000) == 1) ) {
+ get_ctrl_caps(&(hw_info->proc_ctrls_2), VMX_PROCBASED_CTLS2_MSR);
+ }
+
+ get_cr_fields(&(hw_info->cr0), VMX_CR0_FIXED1_MSR, VMX_CR0_FIXED0_MSR);
+ get_cr_fields(&(hw_info->cr4), VMX_CR4_FIXED1_MSR, VMX_CR4_FIXED0_MSR);
+
+ return 0;
+}