Merge branch 'devel' of /home-remote/palacios/palacios into devel

diff --git a/Kconfig b/Kconfig

index d312c0b..4241627 100644 (file)
--- a/Kconfig
+++ b/Kconfig
@@ -128,7 +128,7 @@ config MAX_CPUS
 endmenu
 
 source "palacios/src/interfaces/Kconfig"
-
+source "palacios/src/extensions/Kconfig"
 
 config TELEMETRY
        bool "Enable VMM telemetry support"
diff --git a/Makefile b/Makefile

index 46227ae..ed13298 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -435,6 +435,7 @@ core-y          := palacios/src/palacios/
 libs-y         := palacios/lib/$(ARCH)/
 devices-y       := palacios/src/devices/
 interfaces-y    := palacios/src/interfaces/
+extensions-y    := palacios/src/extensions/
 modules-y       := modules/
 
 
@@ -529,7 +530,7 @@ export      INSTALL_PATH ?= /build
 
 
 palacios-dirs  := $(patsubst %/,%,$(filter %/,  \
-                    $(core-y) $(devices-y) $(interfaces-y) $(libs-y)) $(modules-y))
+                    $(core-y) $(devices-y) $(interfaces-y) $(extensions-y) $(libs-y)) $(modules-y))
 
 
 
@@ -540,13 +541,14 @@ palacios-dirs     := $(patsubst %/,%,$(filter %/,  \
 
 palacios-cleandirs := $(sort $(palacios-dirs) $(patsubst %/,%,$(filter %/, \
                        $(core-n) $(core-) $(devices-n) $(devices-) \
-                       $(interfaces-n) $(interfaces-) $(modules-n) $(modules-))))
+                       $(interfaces-n) $(interfaces-) $(extensions-n) $(extensions-) $(modules-n) $(modules-))))
 
 
 
 core-y         := $(patsubst %/, %/built-in.o, $(core-y))
 devices-y      := $(patsubst %/, %/built-in.o, $(devices-y))
 interfaces-y    := $(patsubst %/, %/built-in.o, $(interfaces-y))
+extensions-y    := $(patsubst %/, %/built-in.o, $(extensions-y))
 libs-y         := $(patsubst %/, %/built-in.o, $(libs-y))
 modules-y       := $(patsubst %/, %/built-in.o, $(modules-y))
 #lnxmod-y        := $(patsubst %/, %/built-in.o, $(lnxmod-y))
@@ -573,7 +575,7 @@ modules-y       := $(patsubst %/, %/built-in.o, $(modules-y))
 
 
 
-palacios := $(core-y) $(devices-y) $(interfaces-y) $(libs-y) $(modules-y)
+palacios := $(core-y) $(devices-y) $(interfaces-y) $(extensions-y) $(libs-y) $(modules-y)
 
 
 # Rule to link palacios - also used during CONFIG_CONFIGKALLSYMS
diff --git a/linux_module/palacios-debugfs.c b/linux_module/palacios-debugfs.c

new file mode 100644 (file)

index 0000000..b35120e
--- /dev/null
+++ b/linux_module/palacios-debugfs.c
@@ -0,0 +1,79 @@
+/* 
+ * DebugFS interface
+ * (c) Jack Lange, 2011
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include <interfaces/inspector.h>
+
+#include "palacios.h"
+
+struct dentry * v3_dir = NULL;
+
+
+int palacios_init_debugfs( void ) {
+
+    v3_dir = debugfs_create_dir("v3vee", NULL);
+
+    if (IS_ERR(v3_dir)) {
+       printk("Error creating v3vee debugfs directory\n");
+       return -1;
+    }
+
+    return 0;
+}
+
+
+int palacios_deinit_debugfs( void ) {
+    debugfs_remove(v3_dir);
+    return 0;
+}
+
+
+
+static int dfs_register_tree(struct dentry * dir, v3_inspect_node_t * root) {
+    v3_inspect_node_t * tmp_node = v3_inspection_first_child(root);
+    struct v3_inspection_value tmp_value;
+
+    while (tmp_node) {
+       tmp_value = v3_inspection_value(tmp_node);
+
+       if (tmp_value.size == 0) {
+           struct dentry * new_dir = debugfs_create_dir(tmp_value.name, dir);
+           dfs_register_tree(new_dir, tmp_node);
+       } else if (tmp_value.size == 1) {
+           debugfs_create_u8(tmp_value.name, 0644, dir, (u8 *)tmp_value.value);
+       } else if (tmp_value.size == 2) {
+           debugfs_create_u16(tmp_value.name, 0644, dir, (u16 *)tmp_value.value);
+       } else if (tmp_value.size == 4) {
+           debugfs_create_u32(tmp_value.name, 0644, dir, (u32 *)tmp_value.value);
+       } else if (tmp_value.size == 8) {
+           debugfs_create_u64(tmp_value.name, 0644, dir, (u64 *)tmp_value.value);
+       } else {
+
+           // buffer
+       }
+
+       tmp_node = v3_inspection_node_next(tmp_node);
+
+    }
+
+    return 0;
+}
+
+
+int dfs_register_vm(struct v3_guest * guest) {
+    v3_inspect_node_t * root = v3_get_inspection_root(guest->v3_ctx);
+
+    if (root == NULL) {
+       printk("No inspection root found\n");
+       return -1;
+    }
+
+    dfs_register_tree(v3_dir, root);
+    return 0;
+}
diff --git a/linux_module/palacios-debugfs.h b/linux_module/palacios-debugfs.h

new file mode 100644 (file)

index 0000000..1caad52
--- /dev/null
+++ b/linux_module/palacios-debugfs.h
@@ -0,0 +1,14 @@
+/* 
+ * DebugFS interface
+ * (c) Jack Lange, 2011
+ */
+
+#include "palacios.h"
+
+int palacios_init_debugfs( void );
+int palacios_deinit_debugfs( void );
+
+
+
+int dfs_register_vm(struct v3_guest * guest);
+
diff --git a/palacios/include/palacios/vmm_inspector.h b/palacios/include/interfaces/inspector.h

similarity index 97%

rename from palacios/include/palacios/vmm_inspector.h

rename to palacios/include/interfaces/inspector.h

index ee0f70d..396e490 100644 (file)
--- a/palacios/include/palacios/vmm_inspector.h
+++ b/palacios/include/interfaces/inspector.h
@@ -36,12 +36,6 @@ typedef void v3_inspect_node_t;
 #define READ_ONLY 2
 #define HOOKED 4
 
-struct v3_inspector_state {
-    struct v3_mtree state_tree;
-
-};
-
-
 
 int v3_init_inspector(struct v3_vm_info * vm);
 int v3_init_inspector_core(struct guest_info * core);
diff --git a/palacios/include/interfaces/vmm_host_dev.h b/palacios/include/interfaces/vmm_host_dev.h

index 138839f..2b893b5 100644 (file)
--- a/palacios/include/interfaces/vmm_host_dev.h
+++ b/palacios/include/interfaces/vmm_host_dev.h
@@ -23,7 +23,6 @@
 
 #include <palacios/vmm.h>
 
-
 /*
 
   The purpose of this interface is to make it possible to implement
@@ -78,9 +77,12 @@ typedef enum { V3_BUS_CLASS_DIRECT, V3_BUS_CLASS_PCI } v3_bus_class_t;
 
 #ifdef __V3VEE__
 
+struct v3_vm_info;
+
 v3_host_dev_t v3_host_dev_open(char *impl, 
                               v3_bus_class_t bus,
-                              v3_guest_dev_t gdev); 
+                              v3_guest_dev_t gdev,
+                              struct v3_vm_info *vm); 
 
 int v3_host_dev_close(v3_host_dev_t hdev);
     
@@ -106,13 +108,13 @@ uint64_t v3_host_dev_write_mem(v3_host_dev_t hostdev,
 
 int v3_host_dev_ack_irq(v3_host_dev_t hostdev, uint8_t irq);
 
-uint64_t v3_host_dev_config_read(v3_host_dev_t hostdev, 
+uint64_t v3_host_dev_read_config(v3_host_dev_t hostdev, 
                                 uint64_t      offset,
                                 void          *dest,
                                 uint64_t      len);
 
-uint64_t v3_host_dev_config_write(v3_host_dev_t hostdev, 
-                                uint64_t      offset,
+uint64_t v3_host_dev_write_config(v3_host_dev_t hostdev, 
+                                 uint64_t      offset,
                                  void          *src,
                                  uint64_t      len);
  
@@ -124,10 +126,12 @@ struct v3_host_dev_hooks {
     // this device is attached to and an opaque pointer back to the
     // guest device.  It returns an opaque representation of 
     // the host device it has attached to, with zero indicating
-    // failure
+    // failure.  The host_priv_data arguement supplies to the 
+    // host the pointer that the VM was originally registered with
     v3_host_dev_t (*open)(char *impl, 
                          v3_bus_class_t bus,
-                         v3_guest_dev_t gdev);
+                         v3_guest_dev_t gdev,
+                         void *host_priv_data);
 
     int (*close)(v3_host_dev_t hdev);
     
@@ -150,12 +154,12 @@ struct v3_host_dev_hooks {
     // fail, returning != len
     // Callee gets the host dev id, and the guest physical address
     uint64_t (*read_mem)(v3_host_dev_t hostdev, 
-                        addr_t        gpa,
+                        void *        gpa,
                         void          *dest,
                         uint64_t      len);
     
     uint64_t (*write_mem)(v3_host_dev_t hostdev, 
-                         addr_t        gpa,
+                         void *        gpa,
                          void          *src,
                          uint64_t      len);
     
@@ -202,19 +206,16 @@ int v3_host_dev_raise_irq(v3_host_dev_t hostdev,
 
 /* These functions allow the host to read and write the guest
    memory by physical address, for example to implement DMA 
-
-   These functions are incremental - that is, they can return
-   a smaller amount than requested
 */
 uint64_t v3_host_dev_read_guest_mem(v3_host_dev_t  hostdev,
                                    v3_guest_dev_t guest_dev,
-                                   addr_t         gpa,
+                                   void *         gpa,
                                    void           *dest,
                                    uint64_t       len);
 
 uint64_t v3_host_dev_write_guest_mem(v3_host_dev_t  hostdev,
                                     v3_guest_dev_t guest_dev,
-                                    addr_t         gpa,
+                                    void *         gpa,
                                     void           *src,
                                     uint64_t       len);
                              
diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h

index 4b2728f..5d4527f 100644 (file)
--- a/palacios/include/palacios/vm_guest.h
+++ b/palacios/include/palacios/vm_guest.h
@@ -50,9 +50,6 @@
 struct v3_sym_core_state;
 #endif
 
-#ifdef CONFIG_INSPECTOR
-#include  <palacios/vmm_inspector.h>
-#endif
 
 
 #include <palacios/vmm_config.h>
@@ -187,9 +184,6 @@ struct v3_vm_info {
     struct v3_telemetry_state telemetry;
 #endif
 
-#ifdef CONFIG_INSPECTOR
-    struct v3_inspector_state inspector;
-#endif
 
     uint64_t yield_cycle_period;  
 
diff --git a/palacios/include/palacios/vmcs.h b/palacios/include/palacios/vmcs.h

index 9129d1b..c80a23c 100644 (file)
--- a/palacios/include/palacios/vmcs.h
+++ b/palacios/include/palacios/vmcs.h
@@ -39,6 +39,15 @@
 
 
 
+struct vmcs_field_encoding {
+    uint8_t access_type    : 1; /*  0 = full, 1 = high, (for accessing 64 bit fields on 32bit CPU) */
+    uint16_t index         : 9;
+    uint8_t type           : 2; /* 0=ctrl, 1=read-only, 2 = guest state, 3 = host state */
+    uint8_t rsvd1          : 1; /* MBZ */
+    uint8_t width          : 2; /* 0 = 16bit, 1 = 64bit, 2 = 32bit, 3 = natural width */
+    uint32_t rsvd2         : 17;
+} __attribute__((packed));
+
 
 typedef enum {
     VMCS_GUEST_ES_SELECTOR       = 0x00000800,
diff --git a/palacios/include/palacios/vmm.h b/palacios/include/palacios/vmm.h

index ae4421c..5cb1db1 100644 (file)
--- a/palacios/include/palacios/vmm.h
+++ b/palacios/include/palacios/vmm.h
@@ -184,13 +184,33 @@ struct guest_info;
 
 #ifdef CONFIG_MULTITHREAD_OS
 
-#define V3_CREATE_THREAD(fn, arg, name)                                \
-    do {                                                       \
+#define V3_CREATE_THREAD(fn, arg, name)        ({                      \
+       void * thread = NULL;                                                   \
        extern struct v3_os_hooks * os_hooks;                   \
        if ((os_hooks) && (os_hooks)->start_kernel_thread) {    \
-           (os_hooks)->start_kernel_thread(fn, arg, name);     \
+           thread = (os_hooks)->start_kernel_thread(fn, arg, name);    \
        }                                                       \
-    } while (0)
+       thread;                                         \
+    })
+
+
+#define V3_THREAD_SLEEP()              \
+    do{                                                        \
+       extern struct v3_os_hooks * os_hooks;                   \
+       if ((os_hooks) && (os_hooks)->kernel_thread_sleep) {    \
+           (os_hooks)->kernel_thread_sleep();  \
+       }                                                       \
+    }while(0)
+
+
+#define V3_THREAD_WAKEUP(thread)               \
+    do{                                                        \
+       extern struct v3_os_hooks * os_hooks;                   \
+       if ((os_hooks) && (os_hooks)->kernel_thread_wakeup) {   \
+           (os_hooks)->kernel_thread_wakeup(thread);   \
+       }                                                       \
+    }while(0)
+
 
 
 #define V3_Call_On_CPU(cpu, fn, arg)                   \
@@ -298,7 +318,9 @@ struct v3_os_hooks {
 
 
 
-    void (*start_kernel_thread)(int (*fn)(void * arg), void * arg, char * thread_name); 
+    void * (*start_kernel_thread)(int (*fn)(void * arg), void * arg, char * thread_name); 
+    void (*kernel_thread_sleep)(void);
+    void (*kernel_thread_wakeup)(void * thread);
     void (*interrupt_cpu)(struct v3_vm_info * vm, int logical_cpu, int vector);
     void (*call_on_cpu)(int logical_cpu, void (*fn)(void * arg), void * arg);
     void * (*start_thread_on_cpu)(int cpu_id, int (*fn)(void * arg), void * arg, char * thread_name);
diff --git a/palacios/include/palacios/vmm_muxer.h b/palacios/include/palacios/vmm_barrier.h

similarity index 55%

copy from palacios/include/palacios/vmm_muxer.h

copy to palacios/include/palacios/vmm_barrier.h

index 1c50789..4513c09 100644 (file)
--- a/palacios/include/palacios/vmm_muxer.h
+++ b/palacios/include/palacios/vmm_barrier.h
@@ -1,4 +1,4 @@
-/* 
+/*
  * This file is part of the Palacios Virtual Machine Monitor developed
  * by the V3VEE Project with funding from the United States National 
  * Science Foundation and the Department of Energy.  
@@ -7,31 +7,34 @@
  * and the University of New Mexico.  You can find out more at 
  * http://www.v3vee.org
  *
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu> 
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
  * All rights reserved.
  *
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
  *
  * This is free software.  You are permitted to use,
  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  */
 
-#ifndef __VMM_MUXER_H__
-#define __VMM_MUXER_H__
+#ifndef __VMM_BARRIER_H__
+#define __VMM_BARRIER_H__
 
 #ifdef __V3VEE__
 
 
-struct v3_vm_info;
+#include <util/vmm_lock.h>
 
+struct v3_barrier {
+    
 
+    int active;     // If 1, barrier is active, everyone must wait 
+                    // If 0, barrier is clear, can proceed
 
-struct v3_vm_info * v3_get_foreground_vm();
-void v3_set_foreground_vm(struct v3_vm_info * vm);
+    v3_lock_t lock;
+};
 
 
-int v3_add_mux_notification(int (*focus_change)(struct v3_vm_info * old_vm, struct v3_vm_info * new_vm));
 
 
 #endif
diff --git a/palacios/include/palacios/vmm_cpuid.h b/palacios/include/palacios/vmm_cpuid.h

index 30467fd..88d48bd 100644 (file)
--- a/palacios/include/palacios/vmm_cpuid.h
+++ b/palacios/include/palacios/vmm_cpuid.h
@@ -54,6 +54,12 @@ struct v3_cpuid_map {
 
 void v3_print_cpuid_map(struct v3_vm_info * vm);
 
+int v3_cpuid_add_fields(struct v3_vm_info * vm, uint32_t cpuid, 
+                       uint32_t rax_mask, uint32_t rax,
+                       uint32_t rbx_mask, uint32_t rbx, 
+                       uint32_t rcx_mask, uint32_t rcx, 
+                       uint32_t rdx_mask, uint32_t rdx);
+
 int v3_hook_cpuid(struct v3_vm_info * vm, uint32_t cpuid, 
                  int (*hook_fn)(struct guest_info * info, uint32_t cpuid, \
                                 uint32_t * eax, uint32_t * ebx, \
diff --git a/palacios/include/palacios/vmm_dev_mgr.h b/palacios/include/palacios/vmm_dev_mgr.h

index e789207..c9999bd 100644 (file)
--- a/palacios/include/palacios/vmm_dev_mgr.h
+++ b/palacios/include/palacios/vmm_dev_mgr.h
@@ -179,11 +179,10 @@ struct v3_dev_blk_ops {
 
 struct v3_dev_net_ops {
     /* Backend implemented functions */
-    int (*send)(uint8_t * buf, uint32_t count, void * private_data);
+    int (*send)(uint8_t * buf, uint32_t len, int synchronize, void * private_data);
 
     /* Frontend implemented functions */
-    int (*recv)(uint8_t * buf, uint32_t count, void * frnt_data);
-    void (*poll)(struct v3_vm_info * vm, int budget, void * frnt_data);
+    int (*recv)(uint8_t * buf, uint32_t len, void * frnt_data);
 
     /* This is ugly... */
     void * frontend_data; 
diff --git a/palacios/include/palacios/vmm_ethernet.h b/palacios/include/palacios/vmm_ethernet.h

index 3794d77..2b9319b 100644 (file)
--- a/palacios/include/palacios/vmm_ethernet.h
+++ b/palacios/include/palacios/vmm_ethernet.h
@@ -25,21 +25,40 @@
 #define ETHERNET_PACKET_LEN (ETHERNET_HEADER_LEN + ETHERNET_MTU)
 #define ETH_ALEN 6
 
+#define MIN_MTU 68
+//#define MAX_MTU 65535
+#define MAX_MTU 9000
+
+#define MAX_PACKET_LEN (ETHERNET_HEADER_LEN + MAX_MTU)
+
+
+extern int v3_net_debug;
 
 #ifdef __V3VEE__
 
 #include <palacios/vmm.h>
 
+#define V3_Net_Print(level, fmt, args...)                                      \
+    do {                                                               \
+       if(level <= v3_net_debug) {   \
+           extern struct v3_os_hooks * os_hooks;                       \
+           if ((os_hooks) && (os_hooks)->print) {                      \
+               (os_hooks)->print((fmt), ##args);                       \
+           }                                                   \
+       }                                                       \
+    } while (0)        
+
 struct nic_statistics {
-    uint32_t tx_pkts;
+    uint64_t tx_pkts;
     uint64_t tx_bytes;
-    uint32_t tx_dropped;
+    uint64_t tx_dropped;
        
-    uint32_t rx_pkts;
+    uint64_t rx_pkts;
     uint64_t rx_bytes;
-    uint32_t rx_dropped;
+    uint64_t rx_dropped;
 
-    uint32_t interrupts;
+    uint32_t tx_interrupts;
+    uint32_t rx_interrupts;
 };
     
 static inline int is_multicast_ethaddr(const uint8_t * addr)
diff --git a/palacios/include/palacios/vmm_extensions.h b/palacios/include/palacios/vmm_extensions.h

index 0135f88..fdddb69 100644 (file)
--- a/palacios/include/palacios/vmm_extensions.h
+++ b/palacios/include/palacios/vmm_extensions.h
@@ -23,8 +23,8 @@
 #ifdef __V3VEE__
 
 #include <palacios/vmm.h>
-#include <palacios/vmm_list.h>
 #include <palacios/vmm_config.h>
+#include <palacios/vmm_list.h>
 
 
 struct v3_vm_info;
@@ -41,10 +41,10 @@ struct v3_extension_impl {
     char * name;
     int (*init)(struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data);
     int (*deinit)(struct v3_vm_info * vm, void * priv_data);
-    int (*core_init)(struct guest_info * core);
-    int (*core_deinit)(struct guest_info * core);
-    int (*on_entry)(struct guest_info * core);
-    int (*on_exit)(struct guest_info * core);
+    int (*core_init)(struct guest_info * core, void * priv_data);
+    int (*core_deinit)(struct guest_info * core, void * priv_data);
+    int (*on_entry)(struct guest_info * core, void * priv_data);
+    int (*on_exit)(struct guest_info * core, void * priv_data);
 };
 
 struct v3_extension {
@@ -64,6 +64,9 @@ int V3_deinit_extensions();
 
 int v3_init_ext_manager(struct v3_vm_info * vm);
 int v3_add_extension(struct v3_vm_info * vm, const char * name, v3_cfg_tree_t * cfg);
+int v3_init_core_extensions(struct guest_info * core);
+
+void * v3_get_extension_state(struct v3_vm_info * vm, const char * name);
 
 
 #define register_extension(ext)                                        \
diff --git a/palacios/include/palacios/vmm_instr_emulator.h b/palacios/include/palacios/vmm_instr_emulator.h

index 84b07a4..7559f05 100644 (file)
--- a/palacios/include/palacios/vmm_instr_emulator.h
+++ b/palacios/include/palacios/vmm_instr_emulator.h
@@ -23,7 +23,7 @@
 
 #define MAKE_1OP_8FLAGS_INST(iname) static inline void iname##8(addr_t * dst,  addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -42,7 +42,7 @@
 
 #define MAKE_1OP_16FLAGS_INST(iname) static inline void iname##16(addr_t * dst,  addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -61,7 +61,7 @@
 
 #define MAKE_1OP_32FLAGS_INST(iname) static inline void iname##32(addr_t * dst,  addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -80,7 +80,7 @@
 
 #define MAKE_1OP_64FLAGS_INST(iname) static inline void iname##64(addr_t * dst,  addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushfq; "                                        \
@@ -134,7 +134,7 @@
 
 #define MAKE_2OP_64FLAGS_INST(iname) static inline void iname##64(addr_t * dst, addr_t * src, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushfq\r\n"                                      \
@@ -156,7 +156,7 @@
 
 #define MAKE_2OP_32FLAGS_INST(iname) static inline void iname##32(addr_t * dst, addr_t * src, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -175,7 +175,7 @@
 
 #define MAKE_2OP_16FLAGS_INST(iname) static inline void iname##16(addr_t * dst, addr_t * src, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -193,7 +193,7 @@
 
 #define MAKE_2OP_8FLAGS_INST(iname) static inline void iname##8(addr_t * dst, addr_t * src, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -217,7 +217,7 @@
                                                                addr_t * src, \
                                                                addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushfq; "                                        \
@@ -239,7 +239,7 @@
                                                                addr_t * src, \
                                                                addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -260,7 +260,7 @@
                                                                addr_t * src, \
                                                                addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -283,7 +283,7 @@
                                                              addr_t * src, \
                                                              addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -307,7 +307,7 @@
                                                                addr_t * src, \
                                                                addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushfq; "                                        \
@@ -330,7 +330,7 @@
                                                                addr_t * src, \
                                                                addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -351,7 +351,7 @@
                                                                addr_t * src, \
                                                                addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -374,7 +374,7 @@
                                                              addr_t * src, \
                                                              addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
diff --git a/palacios/include/palacios/vmm_queue.h b/palacios/include/palacios/vmm_queue.h

index e88329f..811f19d 100644 (file)
--- a/palacios/include/palacios/vmm_queue.h
+++ b/palacios/include/palacios/vmm_queue.h
@@ -28,30 +28,26 @@
 #include <palacios/vmm_lock.h>
 
 
-/* IMPORTANT:
- * This implementation currently does no locking, and as such is not 
- * SMP/thread/interrupt safe
- */
 
 
-struct queue_entry {
+struct v3_queue_entry {
     addr_t entry;
     struct list_head entry_list;
 };
 
 
-struct gen_queue {
+struct v3_queue {
     uint_t num_entries;
     struct list_head entries;
     v3_lock_t lock;
 };
 
 
-struct gen_queue * v3_create_queue();
-void v3_init_queue(struct gen_queue * queue);
+struct v3_queue * v3_create_queue();
+void v3_init_queue(struct v3_queue * queue);
 
-void v3_enqueue(struct gen_queue * queue, addr_t entry);
-addr_t v3_dequeue(struct gen_queue * queue);
+void v3_enqueue(struct v3_queue * queue, addr_t entry);
+addr_t v3_dequeue(struct v3_queue * queue);
 
 
 
diff --git a/palacios/include/palacios/vmm_vnet.h b/palacios/include/palacios/vmm_vnet.h

index 1750fff..0f8c793 100644 (file)
--- a/palacios/include/palacios/vmm_vnet.h
+++ b/palacios/include/palacios/vmm_vnet.h
@@ -19,8 +19,8 @@
  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  */
 
-#ifndef __VNET_H__
-#define __VNET_H__
+#ifndef __VNET_CORE_H__
+#define __VNET_CORE_H__
 
 #include <palacios/vmm.h>
 #include <palacios/vmm_ethernet.h>
@@ -38,7 +38,8 @@
 
 #define VNET_HASH_SIZE         17
 
-//routing table entry
+extern int v3_vnet_debug;
+
 struct v3_vnet_route {
     uint8_t src_mac[ETH_ALEN];
     uint8_t dst_mac[ETH_ALEN];
@@ -100,7 +101,7 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm,
                uint8_t type,
                void * priv_data);
 int v3_vnet_add_route(struct v3_vnet_route route);
-int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data);
+int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data, int synchronize);
 int v3_vnet_find_dev(uint8_t  * mac);
 int v3_vnet_stat(struct vnet_stat * stats);
 
@@ -110,19 +111,17 @@ struct v3_vnet_dev_ops {
     int (*input)(struct v3_vm_info * vm, 
                struct v3_vnet_pkt * pkt, 
                void * dev_data);
-    void (*poll) (struct v3_vm_info * vm, int budget, void * dev_data);
 };
 
 int v3_init_vnet(void);        
 void v3_deinit_vnet(void);
 
-void v3_vnet_do_poll(struct v3_vm_info * vm);
-
 int v3_vnet_add_dev(struct v3_vm_info * info, uint8_t * mac, 
                    struct v3_vnet_dev_ops * ops,
                    void * priv_data);
 int v3_vnet_del_dev(int dev_id);
 
+
 #endif
 
 #endif
diff --git a/palacios/include/palacios/vmx.h b/palacios/include/palacios/vmx.h

index fd5e6ce..7a0a039 100644 (file)
--- a/palacios/include/palacios/vmx.h
+++ b/palacios/include/palacios/vmx.h
@@ -30,31 +30,14 @@
 #include <palacios/vmm.h>
 #include <palacios/vm_guest.h>
 
-// Intel VMX Specific MSRs
-#define VMX_FEATURE_CONTROL_MSR     0x0000003a
-#define VMX_BASIC_MSR               0x00000480
-#define VMX_PINBASED_CTLS_MSR       0x00000481
-#define VMX_PROCBASED_CTLS_MSR      0x00000482
-#define VMX_EXIT_CTLS_MSR           0x00000483
-#define VMX_ENTRY_CTLS_MSR          0x00000484
-#define VMX_MISC_MSR                0x00000485
-#define VMX_CR0_FIXED0_MSR          0x00000486
-#define VMX_CR0_FIXED1_MSR          0x00000487
-#define VMX_CR4_FIXED0_MSR          0x00000488
-#define VMX_CR4_FIXED1_MSR          0x00000489
-#define VMX_VMCS_ENUM_MSR           0x0000048A
 
 #define VMX_SUCCESS        0
 #define VMX_FAIL_INVALID   1
 #define VMX_FAIL_VALID     2
 #define VMM_ERROR          3
 
-#define FEATURE_CONTROL_LOCK  0x00000001
-#define FEATURE_CONTROL_VMXON 0x00000004
-#define FEATURE_CONTROL_VALID ( FEATURE_CONTROL_LOCK | FEATURE_CONTROL_VMXON )
 
 
-#define CPUID_1_ECX_VTXFLAG 0x00000020
 
 
 struct vmx_pin_ctrls {
@@ -168,15 +151,6 @@ struct vmx_entry_ctrls {
     } __attribute__((packed));
 } __attribute__((packed));
 
-struct vmx_basic_msr {
-    uint32_t revision;
-    uint_t regionSize   : 13;
-    uint_t rsvd1        : 4; // Always 0
-    uint_t physWidth    : 1;
-    uint_t smm          : 1; // Always 1
-    uint_t memType      : 4;
-    uint_t rsvd2        : 10; // Always 0
-}  __attribute__((packed));
 
 typedef enum { 
     VMXASSIST_DISABLED,
diff --git a/palacios/include/palacios/vmx_ept.h b/palacios/include/palacios/vmx_ept.h

new file mode 100644 (file)

index 0000000..55cb363
--- /dev/null
+++ b/palacios/include/palacios/vmx_ept.h
@@ -0,0 +1,124 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+#ifndef __VMX_EPT_H__
+#define __VMX_EPT_H__
+
+
+#ifdef __V3VEE__
+
+/* The actual format of these data structures is specified as being machine 
+   dependent. Thus the lengths of the base address fields are defined as variable. 
+   To be safe we assume the maximum(?) size fields 
+*/
+
+
+typedef struct vmx_eptp {
+    uint8_t psmt            : 3;
+    uint8_t pwl1            : 3;
+    uint8_t rsvd1           : 6;
+    uint64_t pml_base_addr  : 39;
+    uint16_t rsvd2          : 13;
+} __attribute__((packed)) vmx_eptp_t;
+
+
+typedef struct vmx_pml4 {
+    uint8_t read            : 1;
+    uint8_t write           : 1;
+    uint8_t exec            : 1;
+    uint8_t rsvd1           : 5;
+    uint8_t ignore1         : 4;
+    uint64_t pdp_base_addr  : 39;
+    uint8_t rsvd2           : 1;
+    uint32_t ignore2        : 12;
+} __attribute__((packed)) vmx_pml4_t;
+
+
+typedef struct vmx_pdp_1GB {
+    uint8_t read            : 1;
+    uint8_t write           : 1;
+    uint8_t exec            : 1;
+    uint8_t mt              : 3;
+    uint8_t ipat            : 1;
+    uint8_t large_page      : 1;
+    uint8_t ignore1         : 4;
+    uint32_t rsvd1          : 18;
+    uint32_t page_base_addr : 21;
+    uint8_t rsvd2           : 1;
+    uint32_t ignore2        : 12;
+} __attribute__((packed)) vmx_pdp_1GB_t;
+
+typedef struct vmx_pdp {
+    uint8_t read            : 1;
+    uint8_t write           : 1;
+    uint8_t exec            : 1;
+    uint8_t rsvd1           : 4;
+    uint8_t large_page      : 1;
+    uint8_t ignore1         : 4;
+    uint32_t page_base_addr : 39;
+    uint8_t rsvd2           : 1;
+    uint32_t ignore2        : 12;
+} __attribute__((packed)) vmx_pdp_t;
+
+
+typedef struct vmx_pde_2MB {
+    uint8_t read            : 1;
+    uint8_t write           : 1;
+    uint8_t exec            : 1;
+    uint8_t mt              : 3;
+    uint8_t ipat            : 1;
+    uint8_t large_page      : 1;
+    uint8_t ignore1         : 4;
+    uint32_t rsvd1          : 9;
+    uint32_t page_base_addr : 30;
+    uint8_t rsvd2           : 1;
+    uint32_t ignore2        : 12;
+} __attribute__((packed)) vmx_pde_2MB_t;
+
+
+typedef struct vmx_pde {
+    uint8_t read            : 1;
+    uint8_t write           : 1;
+    uint8_t exec            : 1;
+    uint8_t rsvd1           : 4;
+    uint8_t large_page      : 1;
+    uint8_t ignore1         : 4;
+    uint32_t page_base_addr : 39;
+    uint8_t rsvd2           : 1;
+    uint32_t ignore2        : 12;
+} __attribute__((packed)) vmx_pde_t;
+
+
+
+typedef struct vmx_pte {
+    uint8_t read            : 1;
+    uint8_t write           : 1;
+    uint8_t exec            : 1;
+    uint8_t mt              : 3;
+    uint8_t ipat            : 1;
+    uint8_t ignore1         : 5;
+    uint32_t page_base_addr : 39;
+    uint8_t rsvd2           : 1;
+    uint32_t ignore2        : 12;
+} __attribute__((packed)) vmx_pte_t;
+
+#endif 
+
+#endif
+
diff --git a/palacios/include/palacios/vmx_hw_info.h b/palacios/include/palacios/vmx_hw_info.h

new file mode 100644 (file)

index 0000000..e130545
--- /dev/null
+++ b/palacios/include/palacios/vmx_hw_info.h
@@ -0,0 +1,169 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+#ifndef __VMX_HW_INFO_H__
+#define __VMX_HW_INFO_H__
+
+#ifdef __V3VEE__
+
+
+
+#define VMX_BASIC_MSR               0x00000480
+#define VMX_PINBASED_CTLS_MSR       0x00000481
+#define VMX_PROCBASED_CTLS_MSR      0x00000482
+#define VMX_EXIT_CTLS_MSR           0x00000483
+#define VMX_ENTRY_CTLS_MSR          0x00000484
+#define VMX_MISC_MSR                0x00000485
+#define VMX_CR0_FIXED0_MSR          0x00000486
+#define VMX_CR0_FIXED1_MSR          0x00000487
+#define VMX_CR4_FIXED0_MSR          0x00000488
+#define VMX_CR4_FIXED1_MSR          0x00000489
+#define VMX_VMCS_ENUM_MSR           0x0000048A
+#define VMX_PROCBASED_CTLS2_MSR     0x0000048B
+#define VMX_EPT_VPID_CAP_MSR        0x0000048C
+#define VMX_TRUE_PINBASED_CTLS_MSR  0x0000048D
+#define VMX_TRUE_PROCBASED_CTLS_MSR 0x0000048E
+#define VMX_TRUE_EXIT_CTLS_MSR      0x0000048F
+#define VMX_TRUE_ENTRY_CTLS_MSR     0x00000490
+
+
+
+struct vmx_basic_msr {
+    union {
+       struct {
+           uint32_t lo;
+           uint32_t hi;
+       } __attribute__((packed));
+
+       struct {    uint32_t revision;
+           uint32_t regionSize   : 13;
+           uint8_t rsvd1         : 3; /* Always 0 */
+           uint8_t physWidth     : 1; /* VMCS address field widths 
+                                         (1=32bits, 0=natural width) */
+           uint8_t smm           : 1;
+           uint8_t memType       : 4; /* 0 = UC, 6 = WriteBack */
+           uint8_t io_str_info   : 1;
+           uint8_t def1_maybe_0  : 1; /* 1="Any VMX ctrls that default to 1 may be cleared to 0" */
+           uint32_t rsvd2        : 8; /* Always 0 */
+       }  __attribute__((packed));
+    }  __attribute__((packed));
+}  __attribute__((packed));
+
+
+struct vmx_misc_msr {
+    union {
+       struct {
+           uint32_t lo;
+           uint32_t hi;
+       } __attribute__((packed));
+
+       struct {
+           uint8_t tsc_multiple       : 5; /* Bit position in TSC field that drives vmx timer step */
+           uint8_t exits_store_LMA    : 1;
+           uint8_t can_halt           : 1;
+           uint8_t can_shtdown        : 1;
+           uint8_t can_wait_for_sipi  : 1;
+           uint8_t rsvd1              : 7;
+           uint16_t num_cr3_targets   : 9;
+           uint8_t max_msr_cache_size : 3; /* (512 * (max_msr_cache_size + 1)) == max msr load/store list size */
+           uint8_t SMM_ctrl_avail     : 1;
+           uint8_t rsvd2              : 3; 
+           uint32_t MSEG_rev_id;
+       }  __attribute__((packed));
+    }  __attribute__((packed));
+} __attribute__((packed));
+
+
+struct vmx_ept_msr {
+    union {
+       struct {
+           uint32_t lo;
+           uint32_t hi;
+       } __attribute__((packed));
+
+       struct {
+           uint8_t exec_only_ok             : 1;
+           uint8_t rsvd1                    : 5;
+           uint8_t pg_walk_len4             : 1; /* support for a page walk of length 4 */
+           uint8_t rsvd2                    : 1;
+           uint8_t ept_uc_ok                : 1; /* EPT page tables can be uncacheable */
+           uint8_t rsvd3                    : 5;
+           uint8_t ept_wb_ok                : 1; /* EPT page tables can be writeback */
+           uint8_t rsvd4                    : 1;
+           uint8_t ept_2MB_ok               : 1; /* 2MB EPT pages supported */
+           uint8_t ept_1GB_ok               : 1; /* 1GB EPT pages supported */
+           uint8_t rsvd5                    : 2;
+           uint8_t INVEPT_avail             : 1; /* INVEPT instruction is available */
+           uint8_t rsvd6                    : 4;
+           uint8_t INVEPT_single_ctx_avail  : 1;
+           uint8_t INVEPT_all_ctx_avail     : 1;
+           uint8_t rsvd7                    : 5;
+           uint8_t INVVPID_avail            : 1;
+           uint8_t rsvd8                    : 7;
+           uint8_t INVVPID_1addr_avail      : 1;
+           uint8_t INVVPID_single_ctx_avail : 1;
+           uint8_t INVVPID_all_ctx_avail    : 1;
+           uint8_t INVVPID_single_ctx_w_glbls_avail : 1;
+           uint32_t rsvd9                   : 20;
+       }  __attribute__((packed));
+    }  __attribute__((packed));
+}__attribute__((packed));
+
+
+struct vmx_ctrl_field {
+    uint32_t def_val;
+    uint32_t req_val;  /* Required values: field_val & req_mask == req_val */ 
+    uint32_t req_mask; /* If a mask bit is set it's value is restricted (i.e. the VMM cannot change it) */
+};
+
+
+struct vmx_cr_field {
+    uint64_t def_val;
+    uint64_t req_val;  /* Required values: field_val & req_mask == req_val */ 
+    uint64_t req_mask; /* If a mask bit is set it's value is restricted (i.e. the VMM cannot change it) */
+};
+
+
+
+
+struct vmx_hw_info {
+    struct vmx_basic_msr basic_info;
+    struct vmx_misc_msr misc_info;
+    struct vmx_ept_msr ept_info;
+
+    struct vmx_ctrl_field pin_ctrls;
+    struct vmx_ctrl_field proc_ctrls;
+    struct vmx_ctrl_field exit_ctrls;
+    struct vmx_ctrl_field entry_ctrls;
+    struct vmx_ctrl_field proc_ctrls_2;
+
+    struct vmx_cr_field cr0;
+    struct vmx_cr_field cr4;
+};
+
+
+int v3_init_vmx_hw(struct vmx_hw_info * hw_info);
+
+
+
+
+#endif
+
+#endif
diff --git a/palacios/include/palacios/vmx_lowlevel.h b/palacios/include/palacios/vmx_lowlevel.h

index 6db9f17..ce6a440 100644 (file)
--- a/palacios/include/palacios/vmx_lowlevel.h
+++ b/palacios/include/palacios/vmx_lowlevel.h
@@ -57,24 +57,7 @@
 
 
 
-static inline int v3_enable_vmx(addr_t vmxon_ptr) {
-    uint64_t vmxon_ptr_64 __attribute__((aligned(8))) = (uint64_t)vmxon_ptr;
-    uint8_t ret_invalid = 0;
 
-    __asm__ __volatile__ (
-                VMXON_OPCODE
-                EAX_06_MODRM
-                "setnaeb %0;" // fail invalid (CF=1)
-                : "=q"(ret_invalid)
-                : "a"(&vmxon_ptr_64),"0"(ret_invalid)
-                : "memory");
-
-    if (ret_invalid) {
-        return VMX_FAIL_INVALID;
-    } else {
-        return VMX_SUCCESS;
-    }
-}
 
 static inline int vmcs_clear(addr_t vmcs_ptr) {
     uint64_t vmcs_ptr_64 __attribute__ ((aligned(8))) = (uint64_t)vmcs_ptr;
@@ -181,6 +164,26 @@ static inline int vmcs_write(vmcs_field_t vmcs_field, addr_t value) {
     return VMX_SUCCESS;
 }
 
+
+static inline int vmx_on(addr_t vmxon_ptr) {
+    uint64_t vmxon_ptr_64 __attribute__((aligned(8))) = (uint64_t)vmxon_ptr;
+    uint8_t ret_invalid = 0;
+
+    __asm__ __volatile__ (
+                VMXON_OPCODE
+                EAX_06_MODRM
+                "setnaeb %0;" // fail invalid (CF=1)
+                : "=q"(ret_invalid)
+                : "a"(&vmxon_ptr_64),"0"(ret_invalid)
+                : "memory");
+
+    if (ret_invalid) {
+        return VMX_FAIL_INVALID;
+    } else {
+        return VMX_SUCCESS;
+    }
+}
+
 static inline int vmx_off() {
     uint8_t ret_valid = 0;
     uint8_t ret_invalid = 0;
@@ -198,6 +201,57 @@ static inline int vmx_off() {
     return VMX_SUCCESS;
 }
 
+
+static inline int enable_vmx() {
+#ifdef __V3_64BIT__
+    __asm__ __volatile__ (
+                         "movq %%cr4, %%rbx;"
+                         "orq  $0x00002000, %%rbx;"
+                         "movq %%rbx, %%cr4;"
+                         : 
+                         :
+                         : "%rbx"
+                         );
+
+
+    __asm__ __volatile__ (
+                         "movq %%cr0, %%rbx; "
+                         "orq  $0x00000020,%%rbx; "
+                         "movq %%rbx, %%cr0;"
+                         :
+                         :
+                         : "%rbx"
+                         );
+#elif __V3_32BIT__
+    __asm__ __volatile__ (
+                         "movl %%cr4, %%ecx;"
+                         "orl  $0x00002000, %%ecx;"
+                         "movl %%ecx, %%cr4;"
+                         : 
+                         :
+                         : "%ecx"
+                         );
+
+
+
+    __asm__ __volatile__ (
+                         "movl %%cr0, %%ecx; "
+                         "orl  $0x00000020,%%ecx; "
+                         "movl %%ecx, %%cr0;"
+                         :
+                         :
+                         : "%ecx"
+                         );
+    
+#endif
+
+    return 0;
+}
+
+
+
+
+
 #endif
 
 #endif
diff --git a/palacios/src/devices/Kconfig b/palacios/src/devices/Kconfig

index eb15aca..35b8523 100644 (file)
--- a/palacios/src/devices/Kconfig
+++ b/palacios/src/devices/Kconfig
@@ -43,7 +43,16 @@ config GENERIC
        bool "Generic Device"
        default y
        help 
-         Includes the Virtual Generic device
+         Includes the virtual generic device.  This device allows you
+          to see guest I/O port and memory region interaction with a physical
+          device on the underlying hardware, as well as to ignore such
+          interaction.  The generic device also serves as a front-end
+          device for non-PCI host-based virtual device implementations.  If
+          you want to handle either host-based virtual or physical devices
+          that are not PCI devices, this is what you want.  If you want
+          to handle a host-based virtual device that is a PCI device, you  
+          want to use the PCI front-end device.  If you want to handle
+          a physical PCI device, you want the passthrough PCI device.  
 
 config DEBUG_GENERIC
        bool "Generic device Debugging"
@@ -156,7 +165,7 @@ config LINUX_VIRTIO_VNET
         default n
         depends on PCI && EXPERIMENTAL && VNET
         help
-          Enable the Virtio VNET interface
+          Enable the Virtio VNET interface for Control VM
 
 config DEBUG_LINUX_VIRTIO_VNET
         bool "Virtio VNET Interface Debugging"
@@ -167,11 +176,11 @@ config DEBUG_LINUX_VIRTIO_VNET
 
 
 config VNET_NIC
-        bool "Enable VNET VIrtio NIC Device"
+        bool "Enable VNET Backend Device"
         default n
        depends on PCI && EXPERIMENTAL && VNET
         help
-          Enable the VNET Virtio backend device
+          Enable the VNET backend device
 
 config DEBUG_VNET_NIC
         bool "VNET NIC Device Debugging"
@@ -267,6 +276,7 @@ config PASSTHROUGH_PCI
        help 
          Enables hardware devices to be passed through to the VM
 
+
 config DEBUG_PCI
        bool "PCI debugging"
        depends on PCI && DEBUG_ON
@@ -274,6 +284,26 @@ config DEBUG_PCI
          Enable debugging for the PCI  
 
 
+config PCI_FRONT
+       bool "PCI front-end device"
+       default y 
+       depends on PCI && HOST_DEVICE
+       help 
+         PCI front-end device for a host-based PCI device implementation
+          This device allows you to project a host-based *virtual* device 
+          into the guest as a PCI device.   If you want to project a 
+          physical PCI device, use Passthrough PCI instead.  If you want
+          to project a non-PCI virtual or physical device, 
+          use the generic device.
+          
+
+config DEBUG_PCI_FRONT
+       bool "PCI front-end debugging"
+       depends on PCI_FRONT && DEBUG_ON
+       help 
+         Enable debugging for the PCI front-end device 
+          
+
 
 config PIC
        bool "8259A PIC"
diff --git a/palacios/src/devices/Makefile b/palacios/src/devices/Makefile

index f5b40be..51b43e9 100644 (file)
--- a/palacios/src/devices/Makefile
+++ b/palacios/src/devices/Makefile
@@ -45,3 +45,5 @@ obj-$(CONFIG_MCHECK) += mcheck.o
 
 obj-$(CONFIG_VGA) += vga.o
 
+obj-$(CONFIG_PCI_FRONT) += pci_front.o
+
diff --git a/palacios/src/devices/generic.c b/palacios/src/devices/generic.c

index b7d1a18..73b778a 100644 (file)
--- a/palacios/src/devices/generic.c
+++ b/palacios/src/devices/generic.c
@@ -24,12 +24,19 @@
 #include <palacios/vmm_list.h>
 #include <palacios/vmm_io.h>
 #include <palacios/vmm_dev_mgr.h>
+#include <palacios/vm_guest_mem.h>
+
+#ifdef CONFIG_HOST_DEVICE
+#include <interfaces/vmm_host_dev.h>
+#endif
 
 #ifndef CONFIG_DEBUG_GENERIC
 #undef PrintDebug
 #define PrintDebug(fmt, args...)
 #endif
 
+#define MAX_NAME      32
+#define MAX_MEM_HOOKS 16
 
 typedef enum {GENERIC_IGNORE, 
              GENERIC_PASSTHROUGH, 
@@ -37,32 +44,64 @@ typedef enum {GENERIC_IGNORE,
              GENERIC_PRINT_AND_IGNORE} generic_mode_t;
 
 struct generic_internal {
+    enum {GENERIC_PHYSICAL, GENERIC_HOST} forward_type;
+#ifdef CONFIG_HOST_DEVICE
+    v3_host_dev_t                         host_dev;
+#endif
+    struct vm_device                      *dev; // me
+
+    char                                  name[MAX_NAME];
+    
+    uint32_t                              num_mem_hooks;
+    addr_t                                mem_hook[MAX_MEM_HOOKS];
 };
 
 
 
 
-static int generic_write_port_passthrough(struct guest_info * core, uint16_t port, void * src, 
-                                         uint_t length, void * priv_data) {
+static int generic_write_port_passthrough(struct guest_info * core, 
+                                         uint16_t port, 
+                                         void * src, 
+                                         uint_t length, 
+                                         void * priv_data) 
+{
+    struct generic_internal *state = (struct generic_internal *) priv_data;
     uint_t i;
 
-    switch (length) {
-       case 1:
-           v3_outb(port, ((uint8_t *)src)[0]);
-           break;
-       case 2:
-           v3_outw(port, ((uint16_t *)src)[0]);
+    switch (state->forward_type) { 
+       case GENERIC_PHYSICAL:
+           switch (length) {
+               case 1:
+                   v3_outb(port, ((uint8_t *)src)[0]);
+                   break;
+               case 2:
+                   v3_outw(port, ((uint16_t *)src)[0]);
+                   break;
+               case 4:
+                   v3_outdw(port, ((uint32_t *)src)[0]);
+                   break;
+               default:
+                   for (i = 0; i < length; i++) { 
+                       v3_outb(port, ((uint8_t *)src)[i]);
+                   }
+                   break;
+           }
+           return length;
            break;
-       case 4:
-           v3_outdw(port, ((uint32_t *)src)[0]);
+#ifdef CONFIG_HOST_DEVICE
+       case GENERIC_HOST:
+           if (state->host_dev) { 
+               return v3_host_dev_write_io(state->host_dev,port,src,length);
+           } else {
+               return -1;
+           }
            break;
+#endif
        default:
-           for (i = 0; i < length; i++) { 
-               v3_outb(port, ((uint8_t *)src)[i]);
-           }
+           PrintError("generic (%s): unknown forwarding type\n", state->name);
+           return -1;
+           break;
     }
-
-    return length;
 }
 
 static int generic_write_port_print_and_passthrough(struct guest_info * core, uint16_t port, void * src, 
@@ -70,7 +109,16 @@ static int generic_write_port_print_and_passthrough(struct guest_info * core, ui
     uint_t i;
     int rc;
 
-    PrintDebug("generic: writing 0x");
+#ifdef CONFIG_DEBUG_GENERIC
+    struct generic_internal *state = (struct generic_internal *) priv_data;
+#endif
+
+    PrintDebug("generic (%s): writing 0x%x bytes to port 0x%x using %s ...", state->name,
+              length, port,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" :
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+
+    PrintDebug("generic (%s): writing 0x", state->name);
 
     for (i = 0; i < length; i++) { 
        PrintDebug("%x", ((uint8_t *)src)[i]);
@@ -85,35 +133,63 @@ static int generic_write_port_print_and_passthrough(struct guest_info * core, ui
     return rc;
 }
 
-static int generic_read_port_passthrough(struct guest_info * core, uint16_t port, void * src, 
-                                        uint_t length, void * priv_data) {
+static int generic_read_port_passthrough(struct guest_info * core, 
+                                        uint16_t port, 
+                                        void * dst, 
+                                        uint_t length, 
+                                        void * priv_data) 
+{
+    struct generic_internal *state = (struct generic_internal *) priv_data;
+
     uint_t i;
 
-    switch (length) {
-       case 1:
-           ((uint8_t *)src)[0] = v3_inb(port);
-           break;
-       case 2:
-           ((uint16_t *)src)[0] = v3_inw(port);
+    switch (state->forward_type) { 
+       case GENERIC_PHYSICAL:
+           switch (length) {
+               case 1:
+                   ((uint8_t *)dst)[0] = v3_inb(port);
+                   break;
+               case 2:
+                   ((uint16_t *)dst)[0] = v3_inw(port);
+                   break;
+               case 4:
+                   ((uint32_t *)dst)[0] = v3_indw(port);
+                   break;
+               default:
+                   for (i = 0; i < length; i++) { 
+                       ((uint8_t *)dst)[i] = v3_inb(port);
+                   }
+           }
+           return length;
            break;
-       case 4:
-           ((uint32_t *)src)[0] = v3_indw(port);
+#ifdef CONFIG_HOST_DEVICE
+       case GENERIC_HOST:
+           if (state->host_dev) { 
+               return v3_host_dev_read_io(state->host_dev,port,dst,length);
+           }
            break;
+#endif
        default:
-           for (i = 0; i < length; i++) { 
-               ((uint8_t *)src)[i] = v3_inb(port);
-           }
+           PrintError("generic (%s): unknown forwarding type\n", state->name);
+           return -1;
+           break;
     }
 
-    return length;
+    return -1;
 }
 
 static int generic_read_port_print_and_passthrough(struct guest_info * core, uint16_t port, void * src, 
                                                   uint_t length, void * priv_data) {
     uint_t i;
     int rc;
-    
-    PrintDebug("generic: reading 0x%x bytes from port 0x%x ...", length, port);
+
+#ifdef CONFIG_DEBUG_GENERIC
+    struct generic_internal *state = (struct generic_internal *) priv_data;
+#endif
+
+    PrintDebug("generic (%s): reading 0x%x bytes from port 0x%x using %s ...", state->name, length, port,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" :
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
 
 
     rc=generic_read_port_passthrough(core,port,src,length,priv_data);
@@ -141,7 +217,14 @@ static int generic_read_port_ignore(struct guest_info * core, uint16_t port, voi
 static int generic_read_port_print_and_ignore(struct guest_info * core, uint16_t port, void * src, 
                                              uint_t length, void * priv_data) {
    
-    PrintDebug("generic: reading 0x%x bytes from port 0x%x ...", length, port);
+#ifdef CONFIG_DEBUG_GENERIC
+    struct generic_internal *state = (struct generic_internal *) priv_data;
+#endif
+
+    PrintDebug("generic (%s): reading 0x%x bytes from port 0x%x using %s ...", state->name, length, port,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" :
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+
 
     memset((uint8_t *)src, 0, length);
     PrintDebug(" ignored (return zeroed buffer)\n");
@@ -159,8 +242,14 @@ static int generic_write_port_print_and_ignore(struct guest_info * core, uint16_
                                              uint_t length, void * priv_data) {
     int i;
 
-    PrintDebug("generic: writing 0x%x bytes to port 0x%x ", length, port);
+#ifdef CONFIG_DEBUG_GENERIC
+    struct generic_internal *state = (struct generic_internal *) priv_data;
+#endif
 
+    PrintDebug("generic (%s): writing 0x%x bytes to port 0x%x using %s ", state->name, length, port,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" :
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+    
     memset((uint8_t *)src, 0, length);
     PrintDebug(" ignored - data was: 0x");
 
@@ -175,10 +264,197 @@ static int generic_write_port_print_and_ignore(struct guest_info * core, uint16_
 
 
 
+static int generic_write_mem_passthrough(struct guest_info * core, 
+                                        addr_t              gpa,
+                                        void              * src,
+                                        uint_t              len,
+                                        void              * priv)
+{
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+    
+    switch (state->forward_type) { 
+       case GENERIC_PHYSICAL:
+           memcpy(V3_VAddr((void*)gpa),src,len);
+           return len;
+           break;
+#ifdef CONFIG_HOST_DEVICE
+       case GENERIC_HOST:
+           if (state->host_dev) { 
+               return v3_host_dev_write_mem(state->host_dev,gpa,src,len);
+           } else {
+               return -1;
+           }
+           break;
+#endif
+       default:
+           PrintError("generic (%s): unknown forwarding type\n", state->name);
+           return -1;
+           break;
+    }
+}
 
-static int generic_free(struct generic_internal * state) {
-    PrintDebug("generic: deinit_device\n");
+static int generic_write_mem_print_and_passthrough(struct guest_info * core, 
+                                                  addr_t              gpa,
+                                                  void              * src,
+                                                  uint_t              len,
+                                                  void              * priv)
+{
+#ifdef CONFIG_DEBUG_GENERIC
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+#endif
+
+    PrintDebug("generic (%s): writing %u bytes to GPA 0x%p via %s ... ", state->name,
+              len,(void*)gpa,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" : 
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+    
+    int rc = generic_write_mem_passthrough(core,gpa,src,len,priv);
+
+    PrintDebug("done\n");
+    
+    return rc;
+}
 
+static int generic_write_mem_ignore(struct guest_info * core, 
+                                   addr_t              gpa,
+                                   void              * src,
+                                   uint_t              len,
+                                   void              * priv)
+{
+    return len;
+}
+
+static int generic_write_mem_print_and_ignore(struct guest_info * core, 
+                                             addr_t              gpa,
+                                             void              * src,
+                                             uint_t              len,
+                                             void              * priv)
+{
+#ifdef CONFIG_DEBUG_GENERIC
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+#endif
+
+    PrintDebug("generic (%s): ignoring write of %u bytes to GPA 0x%p via %s", state->name,
+              len,(void*)gpa,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" : 
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+    
+    return len;
+}
+
+static int generic_read_mem_passthrough(struct guest_info * core, 
+                                       addr_t              gpa,
+                                       void              * dst,
+                                       uint_t              len,
+                                       void              * priv)
+{
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+    
+    switch (state->forward_type) { 
+       case GENERIC_PHYSICAL:
+           memcpy(dst,V3_VAddr((void*)gpa),len);
+           return len;
+           break;
+#ifdef CONFIG_HOST_DEVICE
+       case GENERIC_HOST:
+           if (state->host_dev) { 
+               return v3_host_dev_read_mem(state->host_dev,gpa,dst,len);
+           } else {
+               return -1;
+           }
+           break;
+#endif
+       default:
+           PrintError("generic (%s): unknown forwarding type\n", state->name);
+           break;
+    }
+    
+    return -1;
+}
+
+static int generic_read_mem_print_and_passthrough(struct guest_info * core, 
+                                                 addr_t              gpa,
+                                                 void              * dst,
+                                                 uint_t              len,
+                                                 void              * priv)
+{
+#ifdef CONFIG_DEBUG_GENERIC
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+#endif
+
+    PrintDebug("generic (%s): attempting to read %u bytes from GPA 0x%p via %s ... ", state->name,
+              len,(void*)gpa,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" : 
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+    
+    int rc = generic_read_mem_passthrough(core,gpa,dst,len,priv);
+
+    PrintDebug("done - read %d bytes\n", rc);
+    
+    return rc;
+}
+
+static int generic_read_mem_ignore(struct guest_info * core, 
+                                  addr_t              gpa,
+                                  void              * dst,
+                                  uint_t              len,
+                                  void              * priv)
+{
+#ifdef CONFIG_DEBUG_GENERIC
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+#endif
+
+    PrintDebug("generic (%s): ignoring attempt to read %u bytes from GPA 0x%p via %s ... ", state->name,
+              len,(void*)gpa,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" : 
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+
+    memset((uint8_t *)dst, 0, len);
+
+    PrintDebug("returning zeros\n");
+
+    return len;
+}
+
+
+static int generic_read_mem_print_and_ignore(struct guest_info * core, 
+                                            addr_t              gpa,
+                                            void              * dst,
+                                            uint_t              len,
+                                            void              * priv)
+{
+    memset((uint8_t *)dst, 0, len);
+    return len;
+}
+
+
+static int generic_free(struct generic_internal * state) {
+    int i;
+    
+    PrintDebug("generic (%s): deinit_device\n", state->name);
+    
+#ifdef CONFIG_HOST_DEVICE
+    if (state->host_dev) { 
+       v3_host_dev_close(state->host_dev);
+       state->host_dev=0;
+    }
+#endif
+    
+    // Note that the device manager handles unhooking the I/O ports
+    // We need to handle unhooking memory regions    
+    for (i=0;i<state->num_mem_hooks;i++) {
+       if (v3_unhook_mem(state->dev->vm,V3_MEM_CORE_ANY,state->mem_hook[i])<0) { 
+           PrintError("generic (%s): unable to unhook memory starting at 0x%p\n", state->name,(void*)(state->mem_hook[i]));
+           return -1;
+       }
+    }
+            
     V3_Free(state);
     return 0;
 }
@@ -197,73 +473,223 @@ static struct v3_device_ops dev_ops = {
 static int add_port_range(struct vm_device * dev, uint_t start, uint_t end, generic_mode_t mode) {
     uint_t i = 0;
 
-    PrintDebug("generic: Adding Port Range: 0x%x to 0x%x as %s\n", 
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+
+    PrintDebug("generic (%s): adding port range 0x%x to 0x%x as %s\n", state->name,
               start, end, 
-              (mode == GENERIC_PRINT_AND_PASSTHROUGH) ? "print-and-passthrough" : "print-and-ignore");
-    
+              (mode == GENERIC_PRINT_AND_PASSTHROUGH) ? "print-and-passthrough" : 
+              (mode == GENERIC_PRINT_AND_IGNORE) ? "print-and-ignore" :
+              (mode == GENERIC_PASSTHROUGH) ? "passthrough" :
+              (mode == GENERIC_IGNORE) ? "ignore" : "UNKNOWN");
+       
     for (i = start; i <= end; i++) { 
-       if (mode == GENERIC_PRINT_AND_PASSTHROUGH) { 
-           if (v3_dev_hook_io(dev, i, 
-                               &generic_read_port_print_and_passthrough, 
-                               &generic_write_port_print_and_passthrough) == -1) { 
-               PrintError("generic: can't hook port 0x%x (already hooked?)\n", i);
+       switch (mode) { 
+           case GENERIC_PRINT_AND_PASSTHROUGH:
+               if (v3_dev_hook_io(dev, i, 
+                                  &generic_read_port_print_and_passthrough, 
+                                  &generic_write_port_print_and_passthrough) == -1) { 
+                   PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i);
+                   return -1;
+               }
+               break;
+               
+           case GENERIC_PRINT_AND_IGNORE:
+               if (v3_dev_hook_io(dev, i, 
+                                  &generic_read_port_print_and_ignore, 
+                                  &generic_write_port_print_and_ignore) == -1) { 
+                   PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i);
+                   return -1;
+               }
+               break;
+           case GENERIC_PASSTHROUGH:
+               if (v3_dev_hook_io(dev, i, 
+                                  &generic_read_port_passthrough, 
+                                  &generic_write_port_passthrough) == -1) { 
+                   PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i);
+                   return -1;
+               }
+               break;
+           case  GENERIC_IGNORE:
+               if (v3_dev_hook_io(dev, i, 
+                                  &generic_read_port_ignore, 
+                                  &generic_write_port_ignore) == -1) { 
+                   PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i);
+                   return -1;
+               }
+               break;
+           default:
+               PrintError("generic (%s): huh?\n", state->name);
+               break;
+       }
+    }
+    
+    return 0;
+}
+
+
+static int add_mem_range(struct vm_device * dev, addr_t start, addr_t end, generic_mode_t mode) {
+
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+
+    PrintDebug("generic (%s): adding memory range 0x%p to 0x%p as %s\n", state->name,
+              (void*)start, (void*)end, 
+              (mode == GENERIC_PRINT_AND_PASSTHROUGH) ? "print-and-passthrough" : 
+              (mode == GENERIC_PRINT_AND_IGNORE) ? "print-and-ignore" :
+              (mode == GENERIC_PASSTHROUGH) ? "passthrough" :
+              (mode == GENERIC_IGNORE) ? "ignore" : "UNKNOWN");
+       
+    switch (mode) { 
+       case GENERIC_PRINT_AND_PASSTHROUGH:
+           if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1, 
+                                &generic_read_mem_print_and_passthrough, 
+                                &generic_write_mem_print_and_passthrough, dev) == -1) { 
+               PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end);
                return -1;
            }
-       } else if (mode == GENERIC_PRINT_AND_IGNORE) { 
-           if (v3_dev_hook_io(dev, i, 
-                               &generic_read_port_print_and_ignore, 
-                               &generic_write_port_print_and_ignore) == -1) { 
-               PrintError("generic: can't hook port 0x%x (already hooked?)\n", i);
+           break;
+           
+       case GENERIC_PRINT_AND_IGNORE:
+           if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1, 
+                                &generic_read_mem_print_and_ignore, 
+                                &generic_write_mem_print_and_ignore, dev) == -1) { 
+               PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end);
                return -1;
            }
-       } else if (mode == GENERIC_PASSTHROUGH) { 
-           if (v3_dev_hook_io(dev, i, 
-                               &generic_read_port_passthrough, 
-                               &generic_write_port_passthrough) == -1) { 
-               PrintError("generic: can't hook port 0x%x (already hooked?)\n", i);
+           break;
+
+       case GENERIC_PASSTHROUGH:
+           if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1, 
+                                &generic_read_mem_passthrough, 
+                                &generic_write_mem_passthrough, dev) == -1) { 
+               PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end);
                return -1;
            }
-       } else if (mode == GENERIC_IGNORE) { 
-           if (v3_dev_hook_io(dev, i, 
-                               &generic_read_port_ignore, 
-                               &generic_write_port_ignore) == -1) { 
-               PrintError("generic: can't hook port 0x%x (already hooked?)\n", i);
+           break;
+
+       case  GENERIC_IGNORE:
+           if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1, 
+                                &generic_read_mem_ignore, 
+                                &generic_write_mem_ignore, dev) == -1) { 
+               PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end);
                return -1;
            }
-       } 
+           break;
+       default:
+           PrintError("generic (%s): huh?\n",state->name);
+           break;
     }
-    
+
     return 0;
 }
 
 
 
+/*
+   The device can be used to forward to the underlying physical device 
+   or to a host device that has a given url.   Both memory and ports can be forwarded as
+
+        GENERIC_PASSTHROUGH => send writes and reads to physical device or host
+        GENERIC_PRINT_AND_PASSTHROUGH => also print what it's doing
+
+        GENERIC_IGNORE => ignore writes and reads
+        GENERIC_PRINT_AND_PASSTHROUGH => also print what it's doing
+
+
+       The purpose of the "PRINT" variants is to make it easy to spy on
+       device interactions (although you will not see DMA or interrupts)
+
 
+   <device class="generic" id="my_id" 
+         empty | forward="physical_device" or forward="host_device" host_device="url">
+
+  (empty implies physical_dev)
+
+     <ports>
+         <start>portno1</start>
+         <end>portno2</end>   => portno1 through portno2 (inclusive)
+         <mode>PRINT_AND_PASSTHROUGH</mode>  (as above)
+     </ports>
+
+     <memory>
+         <start>gpa1</start>
+         <end>gpa2</end>     => memory addreses gpa1 through gpa2 (inclusive); page granularity
+         <mode> ... as above </mode>
+     </memory>
+
+*/
 
 static int generic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
     struct generic_internal * state = NULL;
     char * dev_id = v3_cfg_val(cfg, "ID");
+    char * forward = v3_cfg_val(cfg, "forward");
+#ifdef CONFIG_HOST_DEVICE
+    char * host_dev = v3_cfg_val(cfg, "hostdev");
+#endif
     v3_cfg_tree_t * port_cfg = v3_cfg_subtree(cfg, "ports");
+    v3_cfg_tree_t * mem_cfg = v3_cfg_subtree(cfg, "memory");
 
 
     state = (struct generic_internal *)V3_Malloc(sizeof(struct generic_internal));
 
     if (state == NULL) {
-       PrintError("Could not allocate generic state\n");
+       PrintError("generic (%s): could not allocate generic state\n",dev_id);
        return -1;
     }
     
     memset(state, 0, sizeof(struct generic_internal));
+    strncpy(state->name,dev_id,MAX_NAME);
+
+    if (!forward) { 
+       state->forward_type=GENERIC_PHYSICAL;
+    } else {
+       if (!strcasecmp(forward,"physical_device")) { 
+           state->forward_type=GENERIC_PHYSICAL;
+       } else if (!strcasecmp(forward,"host_device")) { 
+#ifdef CONFIG_HOST_DEVICE
+           state->forward_type=GENERIC_HOST;
+#else
+           PrintError("generic (%s): cannot configure host device since host device support is not built in\n", state->name);
+           V3_Free(state);
+           return -1;
+#endif
+       } else {
+           PrintError("generic (%s): unknown forwarding type \"%s\"\n", state->name, forward);
+           V3_Free(state);
+           return -1;
+       }
+    }
     
     struct vm_device * dev = v3_add_device(vm, dev_id, &dev_ops, state);
 
     if (dev == NULL) {
-       PrintError("Could not attach device %s\n", dev_id);
+       PrintError("generic: could not attach device %s\n", state->name);
        V3_Free(state);
        return -1;
     }
 
-    PrintDebug("generic: init_device\n");
+    state->dev=dev;
+
+
+#ifdef CONFIG_HOST_DEVICE
+    if (state->forward_type==GENERIC_HOST) { 
+       if (!host_dev) { 
+           PrintError("generic (%s): host forwarding requested, but no host device given\n", state->name);
+           v3_remove_device(dev);
+           return -1;
+       } else {
+           state->host_dev = v3_host_dev_open(host_dev,V3_BUS_CLASS_DIRECT,dev,vm);
+           if (!(state->host_dev)) { 
+               PrintError("generic (%s): unable to open host device \"%s\"\n", state->name,host_dev);
+               v3_remove_device(dev);
+               return -1;
+           } else {
+               PrintDebug("generic (%s): successfully attached host device \"%s\"\n", state->name,host_dev);
+           }
+       }
+    }
+#endif
+
+    PrintDebug("generic (%s): init_device\n", state->name);
 
     // scan port list....
     while (port_cfg) {
@@ -271,7 +697,6 @@ static int generic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
        uint16_t end = atox(v3_cfg_val(port_cfg, "end"));
        char * mode_str = v3_cfg_val(port_cfg, "mode");
        generic_mode_t mode = GENERIC_IGNORE;
-
        if (strcasecmp(mode_str, "print_and_ignore") == 0) {
            mode = GENERIC_PRINT_AND_IGNORE;
        } else if (strcasecmp(mode_str, "print_and_passthrough") == 0) {
@@ -281,13 +706,14 @@ static int generic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
        } else if (strcasecmp(mode_str, "ignore") == 0) {
            mode = GENERIC_IGNORE;
        } else {
-           PrintError("Invalid Mode %s\n", mode_str);
+           PrintError("generic (%s): invalid mode %s in adding ports\n", state->name, mode_str);
            v3_remove_device(dev);
            return -1;
        }
        
+       
        if (add_port_range(dev, start, end, mode) == -1) {
-           PrintError("Could not add port range %d-%d\n", start, end);
+           PrintError("generic (%s): could not add port range 0x%x to 0x%x\n", state->name, start, end);
            v3_remove_device(dev);
            return -1;
        }
@@ -295,6 +721,46 @@ static int generic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
        port_cfg = v3_cfg_next_branch(port_cfg);
     }
 
+    // scan memory list....
+    while (mem_cfg) {
+       addr_t  start = atox(v3_cfg_val(mem_cfg, "start"));
+       addr_t end = atox(v3_cfg_val(mem_cfg, "end"));
+       char * mode_str = v3_cfg_val(mem_cfg, "mode");
+       generic_mode_t mode = GENERIC_IGNORE;
+
+       if (strcasecmp(mode_str, "print_and_ignore") == 0) {
+           mode = GENERIC_PRINT_AND_IGNORE;
+       } else if (strcasecmp(mode_str, "print_and_passthrough") == 0) {
+           mode = GENERIC_PRINT_AND_PASSTHROUGH;
+       } else if (strcasecmp(mode_str, "passthrough") == 0) {
+           mode = GENERIC_PASSTHROUGH;
+       } else if (strcasecmp(mode_str, "ignore") == 0) {
+           mode = GENERIC_IGNORE;
+       } else {
+           PrintError("generic (%s): invalid mode %s for adding memory\n", state->name, mode_str);
+           v3_remove_device(dev);
+           return -1;
+       }
+
+       if (state->num_mem_hooks>=MAX_MEM_HOOKS) { 
+           PrintError("generic (%s): cannot add another memory hook (increase MAX_MEM_HOOKS)\n", state->name);
+           v3_remove_device(dev);
+           return -1;
+       }
+       
+       if (add_mem_range(dev, start, end, mode) == -1) {
+           PrintError("generic (%s): could not add memory range 0x%p to 0x%p\n", state->name, (void*)start, (void*)end);
+           v3_remove_device(dev);
+           return -1;
+       }
+       
+       state->mem_hook[state->num_mem_hooks] = start;
+       state->num_mem_hooks++;
+
+       mem_cfg = v3_cfg_next_branch(port_cfg);
+    }
+    
+    PrintDebug("generic (%s): initialization complete\n", state->name);
 
     return 0;
 }
diff --git a/palacios/src/devices/lnx_virtio_nic.c b/palacios/src/devices/lnx_virtio_nic.c

index bb13a69..07c7d7b 100644 (file)
--- a/palacios/src/devices/lnx_virtio_nic.c
+++ b/palacios/src/devices/lnx_virtio_nic.c
@@ -38,38 +38,62 @@
 #define PrintDebug(fmt, args...)
 #endif
 
+#define TX_QUEUE_SIZE 4096
+#define RX_QUEUE_SIZE 4096
+#define CTRL_QUEUE_SIZE 64
+
+/* The feature bitmap for virtio nic
+  * from Linux */
+#define VIRTIO_NET_F_CSUM       0       /* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM 1       /* Guest handles pkts w/ partial csum */
+#define VIRTIO_NET_F_MAC        5       /* Host has given MAC address. */
+#define VIRTIO_NET_F_GSO        6       /* Host handles pkts w/ any GSO type */
+#define VIRTIO_NET_F_GUEST_TSO4 7       /* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6 8       /* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN  9       /* Guest can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_GUEST_UFO  10      /* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4  11      /* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6  12      /* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN   13      /* Host can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO   14      /* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF  15      /* Host can merge receive buffers. */
+#define VIRTIO_NET_F_STATUS     16      /* virtio_net_config.status available */
+
+/* Port to get virtio config */
+#define VIRTIO_NET_CONFIG 20  
+
 #define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10))
 
+/* for gso_type in virtio_net_hdr */
+#define VIRTIO_NET_HDR_GSO_NONE         0      
+#define VIRTIO_NET_HDR_GSO_TCPV4        1     /* GSO frame, IPv4 TCP (TSO) */
+#define VIRTIO_NET_HDR_GSO_UDP          3       /* GSO frame, IPv4 UDP (UFO) */
+#define VIRTIO_NET_HDR_GSO_TCPV6        4       /* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_ECN          0x80    /* TCP has ECN set */  
 
-struct virtio_net_hdr {
-       uint8_t flags;
-       
-       uint8_t gso_type;
-       uint16_t hdr_len;               /* Ethernet + IP + tcp/udp hdrs */
-       uint16_t gso_size;              /* Bytes to append to hdr_len per frame */
-       uint16_t csum_start;            /* Position to start checksumming from */
-       uint16_t csum_offset;           /* Offset after that to place checksum */
-}__attribute__((packed));
 
+/* for flags in virtio_net_hdr */
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM     1       /* Use csum_start, csum_offset */
 
-struct virtio_net_hdr_mrg_rxbuf {
-       struct virtio_net_hdr hdr;
-       uint16_t num_buffers;   /* Number of merged rx buffers */
-};
 
-       
-#define TX_QUEUE_SIZE 256
-#define RX_QUEUE_SIZE 4096
-#define CTRL_QUEUE_SIZE 64
+/* First element of the scatter-gather list, used with GSO or CSUM features */
+struct virtio_net_hdr
+{
+    uint8_t flags;
+    uint8_t gso_type;
+    uint16_t hdr_len;          /* Ethernet + IP + tcp/udp hdrs */
+    uint16_t gso_size;         /* Bytes to append to hdr_len per frame */
+    uint16_t csum_start;       /* Position to start checksumming from */
+    uint16_t csum_offset;      /* Offset after that to place checksum */
+}__attribute__((packed));
 
-#define VIRTIO_NET_F_MRG_RXBUF 15      /* Host can merge receive buffers. */
-#define VIRTIO_NET_F_MAC       5       /* Host has given MAC address. */
-#define VIRTIO_NET_F_GSO       6       /* Host handles pkts w/ any GSO type */
-#define VIRTIO_NET_F_HOST_TSO4 11      /* Host can handle TSOv4 in. */
-#define VIRTIO_NET_F_HOST_UFO  14      /* Host can handle UFO in. */
 
-/* Port to get virtio config */
-#define VIRTIO_NET_CONFIG 20  
+/* The header to use when the MRG_RXBUF 
+ * feature has been negotiated. */
+struct virtio_net_hdr_mrg_rxbuf {
+    struct virtio_net_hdr hdr;
+    uint16_t num_buffers;      /* Number of merged rx buffers */
+};
 
 struct virtio_net_config
 {
@@ -89,6 +113,7 @@ struct virtio_net_state {
     struct virtio_net_config net_cfg;
     struct virtio_config virtio_cfg;
 
+    struct v3_vm_info * vm;
     struct vm_device * dev;
     struct pci_device * pci_dev; 
     int io_range_size;
@@ -98,21 +123,23 @@ struct virtio_net_state {
     struct virtio_queue ctrl_vq;       /* idx 2*/
 
     struct v3_timer * timer;
+    void * poll_thread;
 
-    struct nic_statistics statistics;
+    struct nic_statistics stats;
 
     struct v3_dev_net_ops * net_ops;
     v3_lock_t rx_lock, tx_lock;
 
     uint8_t tx_notify, rx_notify;
     uint32_t tx_pkts, rx_pkts;
-    uint64_t past_ms;
+    uint64_t past_us;
 
     void * backend_data;
     struct virtio_dev_state * virtio_dev;
     struct list_head dev_link;
 };
 
+
 static int virtio_init_state(struct virtio_net_state * virtio) 
 {
     virtio->rx_vq.queue_size = RX_QUEUE_SIZE;
@@ -139,9 +166,10 @@ static int virtio_init_state(struct virtio_net_state * virtio)
 
     virtio->virtio_cfg.pci_isr = 0;
        
-    virtio->virtio_cfg.host_features = 0 | (1 << VIRTIO_NET_F_MAC) | 
-                                                               (1 << VIRTIO_NET_F_HOST_UFO) | 
-                                                               (1 << VIRTIO_NET_F_HOST_TSO4);
+    virtio->virtio_cfg.host_features = 0 | (1 << VIRTIO_NET_F_MAC);
+       //                                 (1 << VIRTIO_NET_F_GSO) | 
+       //                                 (1 << VIRTIO_NET_F_HOST_UFO) | 
+               //                         (1 << VIRTIO_NET_F_HOST_TSO4);
 
     if ((v3_lock_init(&(virtio->rx_lock)) == -1) ||
        (v3_lock_init(&(virtio->tx_lock)) == -1)){
@@ -152,32 +180,36 @@ static int virtio_init_state(struct virtio_net_state * virtio)
 }
 
 static int tx_one_pkt(struct guest_info * core, 
-       struct virtio_net_state * virtio, 
-       struct vring_desc * buf_desc) 
+                     struct virtio_net_state * virtio, 
+                     struct vring_desc * buf_desc) 
 {
     uint8_t * buf = NULL;
     uint32_t len = buf_desc->length;
+    int synchronize = 1; // (virtio->tx_notify == 1)?1:0;
 
     if (v3_gpa_to_hva(core, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) {
        PrintDebug("Could not translate buffer address\n");
        return -1;
     }
 
-    if(virtio->net_ops->send(buf, len, virtio->backend_data) >= 0){
-       virtio->statistics.tx_pkts ++;
-       virtio->statistics.tx_bytes += len;
+    V3_Net_Print(2, "Virtio-NIC: virtio_tx: size: %d\n", len);
+    if(v3_net_debug >= 4){
+       v3_hexdump(buf, len, NULL, 0);
+    }
 
-       return 0;
+    if(virtio->net_ops->send(buf, len, synchronize, virtio->backend_data) < 0){
+       virtio->stats.tx_dropped ++;
+       return -1;
     }
 
-    virtio->statistics.tx_dropped ++;
+    virtio->stats.tx_pkts ++;
+    virtio->stats.tx_bytes += len;
 
-    return -1;
+    return 0;
 }
 
 
-static int 
-copy_data_to_desc(struct guest_info * core, 
+static inline int copy_data_to_desc(struct guest_info * core, 
                  struct virtio_net_state * virtio_state, 
                  struct vring_desc * desc, 
                  uchar_t * buf, 
@@ -188,7 +220,7 @@ copy_data_to_desc(struct guest_info * core,
     uint8_t * desc_buf = NULL;
 
     if (v3_gpa_to_hva(core, desc->addr_gpa, (addr_t *)&(desc_buf)) == -1) {
-       PrintError("Could not translate buffer address\n");
+       PrintDebug("Could not translate buffer address\n");
        return -1;
     }
     len = (desc->length < buf_len)?(desc->length - offset):buf_len;
@@ -198,7 +230,7 @@ copy_data_to_desc(struct guest_info * core,
 }
 
 
-static int get_desc_count(struct virtio_queue * q, int index) {
+static inline int get_desc_count(struct virtio_queue * q, int index) {
     struct vring_desc * tmp_desc = &(q->desc[index]);
     int cnt = 1;
     
@@ -218,32 +250,10 @@ static inline void disable_cb(struct virtio_queue *queue) {
     queue->used->flags |= VRING_NO_NOTIFY_FLAG;
 }
 
-
-/* interrupt the guest, so the guest core get EXIT to Palacios */
-static inline void notify_guest(struct virtio_net_state * virtio){
-    v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0);
-}
-
-
-/* guest free some pkts for rx queue */
-static int handle_rx_queue_kick(struct guest_info * core, 
-                         struct virtio_net_state * virtio) 
-{
-    return 0;
-}
-
-
-static int handle_ctrl(struct guest_info * core, 
-                      struct virtio_net_state * virtio) {
-       
-    return 0;
-}
-
 static int handle_pkt_tx(struct guest_info * core, 
                         struct virtio_net_state * virtio_state) 
 {
     struct virtio_queue *q = &(virtio_state->tx_vq);
-    struct virtio_net_hdr *hdr = NULL;
     int txed = 0;
     unsigned long flags;
 
@@ -253,12 +263,16 @@ static int handle_pkt_tx(struct guest_info * core,
 
     flags = v3_lock_irqsave(virtio_state->tx_lock);
     while (q->cur_avail_idx != q->avail->index) {
+       struct virtio_net_hdr *hdr = NULL;
        struct vring_desc * hdr_desc = NULL;
        addr_t hdr_addr = 0;
        uint16_t desc_idx = q->avail->ring[q->cur_avail_idx % q->queue_size];
        int desc_cnt = get_desc_count(q, desc_idx);
-       uint32_t req_len = 0;
-       int i = 0;
+
+       if(desc_cnt > 2){
+           PrintError("VNIC: merged rx buffer not supported, desc_cnt %d\n", desc_cnt);
+           goto exit_error;
+       }
 
        hdr_desc = &(q->desc[desc_idx]);
        if (v3_gpa_to_hva(core, hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
@@ -269,25 +283,15 @@ static int handle_pkt_tx(struct guest_info * core,
        hdr = (struct virtio_net_hdr *)hdr_addr;
        desc_idx = hdr_desc->next;
 
-       if(desc_cnt > 2){
-           PrintError("VNIC: merged rx buffer not supported, desc_cnt %d\n", desc_cnt);
+       /* here we assumed that one ethernet pkt is not splitted into multiple buffer */        
+       struct vring_desc * buf_desc = &(q->desc[desc_idx]);
+       if (tx_one_pkt(core, virtio_state, buf_desc) == -1) {
+           PrintError("Virtio NIC: Error handling nic operation\n");
            goto exit_error;
        }
-
-       /* here we assumed that one ethernet pkt is not splitted into multiple virtio buffer */
-       for (i = 0; i < desc_cnt - 1; i++) {    
-           struct vring_desc * buf_desc = &(q->desc[desc_idx]);
-           if (tx_one_pkt(core, virtio_state, buf_desc) == -1) {
-               PrintError("Error handling nic operation\n");
-               goto exit_error;
-           }
-
-           req_len += buf_desc->length;
-           desc_idx = buf_desc->next;
-       }
-
+           
        q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size];
-       q->used->ring[q->used->index % q->queue_size].length = req_len; /* What do we set this to???? */
+       q->used->ring[q->used->index % q->queue_size].length = buf_desc->length; /* What do we set this to???? */
        q->used->index ++;
        
        q->cur_avail_idx ++;
@@ -296,12 +300,17 @@ static int handle_pkt_tx(struct guest_info * core,
     }
 
     v3_unlock_irqrestore(virtio_state->tx_lock, flags);
-       
+
+    //virtio_state->virtio_cfg.pci_isr == 0 && 
     if (txed && !(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
        v3_pci_raise_irq(virtio_state->virtio_dev->pci_bus, 0, virtio_state->pci_dev);
        virtio_state->virtio_cfg.pci_isr = 0x1;
 
-       virtio_state->statistics.interrupts ++;
+       virtio_state->stats.rx_interrupts ++;
+    }
+
+    if(txed > 0) {
+       V3_Net_Print(2, "Virtio Handle TX: txed pkts: %d\n", txed);
     }
 
     return 0;
@@ -386,6 +395,10 @@ static int virtio_io_write(struct guest_info *core,
                    break;
                case 1:
                    virtio_setup_queue(core, virtio, &virtio->tx_vq, pfn, page_addr);
+                   if(virtio->tx_notify == 0){
+                       disable_cb(&virtio->tx_vq);
+                       V3_THREAD_WAKEUP(virtio->poll_thread);
+                   }
                    break;
                case 2:
                    virtio_setup_queue(core, virtio, &virtio->ctrl_vq, pfn, page_addr);
@@ -408,20 +421,16 @@ static int virtio_io_write(struct guest_info *core,
            {
                uint16_t queue_idx = *(uint16_t *)src;                  
                if (queue_idx == 0){
-                   if(handle_rx_queue_kick(core, virtio) == -1){
-                       PrintError("Could not handle Virtio NIC rx kick\n");
-                       return -1;
-                   }
+                   /* receive queue refill */
+                   virtio->stats.tx_interrupts ++;
                } else if (queue_idx == 1){
                    if (handle_pkt_tx(core, virtio) == -1) {
                        PrintError("Could not handle Virtio NIC tx kick\n");
                        return -1;
                    }
+                   virtio->stats.tx_interrupts ++;
                } else if (queue_idx == 2){
-                   if (handle_ctrl(core, virtio) == -1) {
-                       PrintError("Could not handle Virtio NIC ctrl kick\n");
-                       return -1;
-                   }
+                   /* ctrl */
                } else {
                    PrintError("Wrong queue index %d\n", queue_idx);
                }       
@@ -462,7 +471,7 @@ static int virtio_io_read(struct guest_info *core,
        case HOST_FEATURES_PORT:
            if (length != 4) {
                PrintError("Illegal read length for host features\n");
-               return -1;
+               //return -1;
            }
            *(uint32_t *)dst = virtio->virtio_cfg.host_features;
            break;
@@ -541,13 +550,13 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) {
     struct virtio_net_hdr_mrg_rxbuf hdr;
     uint32_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
     uint32_t data_len;
-    uint32_t offset = 0;
+    //uint32_t offset = 0;
     unsigned long flags;
 
-#ifdef CONFIG_DEBUG_VIRTIO_NET
-    PrintDebug("Virtio-NIC: virtio_rx: size: %d\n", size);     
-    v3_hexdump(buf, size, NULL, 0);
-#endif
+    V3_Net_Print(2, "Virtio-NIC: virtio_rx: size: %d\n", size);
+    if(v3_net_debug >= 4){
+       v3_hexdump(buf, size, NULL, 0);
+    }
 
     flags = v3_lock_irqsave(virtio->rx_lock);
 
@@ -555,29 +564,28 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) {
     memset(&hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
 
     if (q->ring_avail_addr == 0) {
-       PrintDebug("Queue is not set\n");
+       V3_Net_Print(2, "Virtio NIC: RX Queue not set\n");
+       virtio->stats.rx_dropped ++;
        goto err_exit;
     }
 
     if (q->cur_avail_idx != q->avail->index){
        addr_t hdr_addr = 0;
-       uint16_t hdr_idx = q->avail->ring[q->cur_avail_idx % q->queue_size];
        uint16_t buf_idx = 0;
+       uint16_t hdr_idx = q->avail->ring[q->cur_avail_idx % q->queue_size];
        struct vring_desc * hdr_desc = NULL;
+       struct vring_desc * buf_desc = NULL;
+       uint32_t len;
 
        hdr_desc = &(q->desc[hdr_idx]);
        if (v3_gpa_to_hva(&(virtio->virtio_dev->vm->cores[0]), hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
-           PrintDebug("Could not translate receive buffer address\n");
+           V3_Net_Print(2, "Virtio NIC: Could not translate receive buffer address\n");
+           virtio->stats.rx_dropped ++;
            goto err_exit;
        }
-       hdr.num_buffers = 1;
-       memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf));
-       if (offset >= data_len) {
-           hdr_desc->flags &= ~VIRTIO_NEXT_FLAG;
-       }
 
-       struct vring_desc * buf_desc = NULL;
-       for (buf_idx = hdr_desc->next; offset < data_len; buf_idx = q->desc[hdr_idx].next) {
+#if 0 /* merged buffer */
+       for(buf_idx = hdr_desc->next; offset < data_len; buf_idx = q->desc[hdr_idx].next) {
            uint32_t len = 0;
            buf_desc = &(q->desc[buf_idx]);
 
@@ -587,33 +595,65 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) {
                buf_desc->flags = VIRTIO_NEXT_FLAG;             
            }
            buf_desc->length = len;
+           hdr.num_buffers ++;
        }
        buf_desc->flags &= ~VIRTIO_NEXT_FLAG;
-       
+       memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf));
+#endif
+
+       hdr.num_buffers = 1;
+       memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf));
+       if (data_len == 0) {
+           hdr_desc->flags &= ~VIRTIO_NEXT_FLAG;
+       }
+
+       buf_idx = hdr_desc->next;
+       buf_desc = &(q->desc[buf_idx]);
+       len = copy_data_to_desc(&(virtio->virtio_dev->vm->cores[0]), virtio, buf_desc, buf, data_len, 0);           
+       if (len < data_len) {
+           V3_Net_Print(2, "Virtio NIC: ring buffer len less than pkt size, merged buffer not supported\n");
+           virtio->stats.rx_dropped ++;
+               
+           goto err_exit;
+       }
+       buf_desc->flags &= ~VIRTIO_NEXT_FLAG;
+
        q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size];
        q->used->ring[q->used->index % q->queue_size].length = data_len + hdr_len; /* This should be the total length of data sent to guest (header+pkt_data) */
        q->used->index++;
        q->cur_avail_idx++;
 
-       virtio->statistics.rx_pkts ++;
-       virtio->statistics.rx_bytes += size;
+       virtio->stats.rx_pkts ++;
+       virtio->stats.rx_bytes += size;
     } else {
-       virtio->statistics.rx_dropped ++;
+       V3_Net_Print(2, "Virtio NIC: Guest RX queue is full\n");
+       virtio->stats.rx_dropped ++;
+
+       /* kick guest to refill the queue */
+       virtio->virtio_cfg.pci_isr = 0x1;       
+       v3_pci_raise_irq(virtio->virtio_dev->pci_bus, 0, virtio->pci_dev);
+       v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0);
+       virtio->stats.rx_interrupts ++;
        
        goto err_exit;
     }
 
+    V3_Net_Print(2, "pci_isr %d, virtio flags %d\n",  virtio->virtio_cfg.pci_isr, q->avail->flags);
+    //virtio->virtio_cfg.pci_isr == 0 && 
+
     if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
-       PrintDebug("Raising IRQ %d\n",  virtio->pci_dev->config_header.intr_line);
-       
+       V3_Net_Print(2, "Raising IRQ %d\n",  virtio->pci_dev->config_header.intr_line);
+
+       virtio->virtio_cfg.pci_isr = 0x1;       
        v3_pci_raise_irq(virtio->virtio_dev->pci_bus, 0, virtio->pci_dev);
-       virtio->virtio_cfg.pci_isr = 0x1;
-       virtio->statistics.interrupts ++;
+
+       virtio->stats.rx_interrupts ++;
     }
 
     v3_unlock_irqrestore(virtio->rx_lock, flags);
 
-    /* notify guest if guest is running */
+    /* notify guest if it is in guest mode */
+    /* ISSUE: What is gonna happen if guest thread is running on the same core as this thread? */
     if(virtio->rx_notify == 1){
        v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0);
     }
@@ -650,12 +690,21 @@ static struct v3_device_ops dev_ops = {
 };
 
 
-static void virtio_nic_poll(struct v3_vm_info * vm, int budget, void * data){
-    struct virtio_net_state * virtio = (struct virtio_net_state *)data;
+static int virtio_tx_flush(void * args){
+    struct virtio_net_state *virtio  = (struct virtio_net_state *)args;
+
+    V3_Print("Virtio TX Poll Thread Starting for %s\n", virtio->vm->name);
 
-    if(virtio->tx_notify == 0){
-       handle_pkt_tx(&(vm->cores[0]), virtio);
+    while(1){
+       if(virtio->tx_notify == 0){
+           handle_pkt_tx(&(virtio->vm->cores[0]), virtio);
+           v3_yield(NULL);
+       }else {
+           V3_THREAD_SLEEP();
+       }
     }
+
+    return 0;
 }
 
 static int register_dev(struct virtio_dev_state * virtio, 
@@ -732,31 +781,33 @@ static int register_dev(struct virtio_dev_state * virtio,
 
 #define RATE_UPPER_THRESHOLD 10  /* 10000 pkts per second, around 100Mbits */
 #define RATE_LOWER_THRESHOLD 1
-#define PROFILE_PERIOD 50 /*50ms*/
+#define PROFILE_PERIOD 10000 /*us*/
 
-/* Timer Functions */
 static void virtio_nic_timer(struct guest_info * core, 
                             uint64_t cpu_cycles, uint64_t cpu_freq, 
                             void * priv_data) {
     struct virtio_net_state * net_state = (struct virtio_net_state *)priv_data;
-    uint64_t period_ms;
+    uint64_t period_us;
+    static int profile_ms = 0;
 
-    period_ms = cpu_cycles/cpu_freq;
-    net_state->past_ms += period_ms;
+    period_us = (1000*cpu_cycles)/cpu_freq;
+    net_state->past_us += period_us;
 
-    if(net_state->past_ms >  PROFILE_PERIOD){ 
+#if 0
+    if(net_state->past_us > PROFILE_PERIOD){ 
        uint32_t tx_rate, rx_rate;
        
-       tx_rate = (net_state->statistics.tx_pkts - net_state->tx_pkts)/net_state->past_ms; /* pkts/per ms */
-       rx_rate = (net_state->statistics.rx_pkts - net_state->rx_pkts)/net_state->past_ms;
+       tx_rate = (net_state->stats.tx_pkts - net_state->tx_pkts)/(net_state->past_us/1000); /* pkts/per ms */
+       rx_rate = (net_state->stats.rx_pkts - net_state->rx_pkts)/(net_state->past_us/1000);
 
-       net_state->tx_pkts = net_state->statistics.tx_pkts;
-       net_state->rx_pkts = net_state->statistics.rx_pkts;
+       net_state->tx_pkts = net_state->stats.tx_pkts;
+       net_state->rx_pkts = net_state->stats.rx_pkts;
 
        if(tx_rate > RATE_UPPER_THRESHOLD && net_state->tx_notify == 1){
            V3_Print("Virtio NIC: Switch TX to VMM driven mode\n");
            disable_cb(&(net_state->tx_vq));
            net_state->tx_notify = 0;
+           V3_THREAD_WAKEUP(net_state->poll_thread);
        }
 
        if(tx_rate < RATE_LOWER_THRESHOLD && net_state->tx_notify == 0){
@@ -766,19 +817,30 @@ static void virtio_nic_timer(struct guest_info * core,
        }
 
        if(rx_rate > RATE_UPPER_THRESHOLD && net_state->rx_notify == 1){
-           PrintDebug("Virtio NIC: Switch RX to VMM None notify mode\n");
+           V3_Print("Virtio NIC: Switch RX to VMM None notify mode\n");
            net_state->rx_notify = 0;
        }
 
        if(rx_rate < RATE_LOWER_THRESHOLD && net_state->rx_notify == 0){
-           PrintDebug("Virtio NIC: Switch RX to VMM notify mode\n");
+           V3_Print("Virtio NIC: Switch RX to VMM notify mode\n");
            net_state->rx_notify = 1;
        }
 
-       net_state->past_ms = 0;
+       net_state->past_us = 0;
     }
-}
+#endif
 
+    profile_ms += period_us/1000;
+    if(profile_ms > 20000){
+       V3_Net_Print(1, "Virtio NIC: TX: Pkt: %lld, Bytes: %lld\n\t\tRX Pkt: %lld. Bytes: %lld\n\t\tDropped: tx %lld, rx %lld\nInterrupts: tx %d, rx %d\nTotal Exit: %lld\n",
+               net_state->stats.tx_pkts, net_state->stats.tx_bytes,
+               net_state->stats.rx_pkts, net_state->stats.rx_bytes,
+               net_state->stats.tx_dropped, net_state->stats.rx_dropped,
+               net_state->stats.tx_interrupts, net_state->stats.rx_interrupts,
+               net_state->vm->cores[0].num_exits);
+       profile_ms = 0;
+    }
+}
 
 static struct v3_timer_ops timer_ops = {
     .update_timer = virtio_nic_timer,
@@ -796,19 +858,21 @@ static int connect_fn(struct v3_vm_info * info,
     memset(net_state, 0, sizeof(struct virtio_net_state));
     register_dev(virtio, net_state);
 
+    net_state->vm = info;
     net_state->net_ops = ops;
     net_state->backend_data = private_data;
     net_state->virtio_dev = virtio;
-    net_state->tx_notify = 1;
-    net_state->rx_notify = 1;
-
+    net_state->tx_notify = 0;
+    net_state->rx_notify = 0;
+       
     net_state->timer = v3_add_timer(&(info->cores[0]),&timer_ops,net_state);
 
     ops->recv = virtio_rx;
-    ops->poll = virtio_nic_poll;
     ops->frontend_data = net_state;
     memcpy(ops->fnt_mac, virtio->mac, ETH_ALEN);
 
+    net_state->poll_thread = V3_CREATE_THREAD(virtio_tx_flush, (void *)net_state, "Virtio_Poll");
+
     return 0;
 }
 
@@ -834,14 +898,7 @@ static int virtio_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
 
     if (macstr != NULL && !str2mac(macstr, virtio_state->mac)) {
        PrintDebug("Virtio NIC: Mac specified %s\n", macstr);
-       PrintDebug("MAC: %x:%x:%x:%x:%x:%x\n", virtio_state->mac[0],
-                               virtio_state->mac[1],
-                               virtio_state->mac[2],
-                               virtio_state->mac[3],
-                               virtio_state->mac[4],
-                               virtio_state->mac[5]);
     }else {
-       PrintDebug("Virtio NIC: MAC not specified\n");
        random_ethaddr(virtio_state->mac);
     }
 
diff --git a/palacios/src/devices/lnx_virtio_vnet.c b/palacios/src/devices/lnx_virtio_vnet.c

index b4b7342..87f158d 100644 (file)
--- a/palacios/src/devices/lnx_virtio_vnet.c
+++ b/palacios/src/devices/lnx_virtio_vnet.c
@@ -303,7 +303,7 @@ static int do_tx_pkts(struct guest_info * core,
        memcpy(pkt.header, virtio_pkt->pkt, ETHERNET_HEADER_LEN);
        pkt.data = virtio_pkt->pkt;
 
-       v3_vnet_send_pkt(&pkt, NULL);
+       v3_vnet_send_pkt(&pkt, NULL, 1);
        
        q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size];
        q->used->ring[q->used->index % q->queue_size].length = pkt_desc->length; // What do we set this to????
diff --git a/palacios/src/devices/nic_bridge.c b/palacios/src/devices/nic_bridge.c

index 9bc28d2..5eec5e6 100644 (file)
--- a/palacios/src/devices/nic_bridge.c
+++ b/palacios/src/devices/nic_bridge.c
@@ -36,6 +36,7 @@ struct nic_bridge_state {
 };
 
 static int bridge_send(uint8_t * buf, uint32_t len, 
+                      int synchronize,
                       void * private_data) {
 
 #ifdef CONFIG_DEBUG_NIC_BRIDGE
diff --git a/palacios/src/devices/nvram.c b/palacios/src/devices/nvram.c

index f2c45a3..785d38b 100644 (file)
--- a/palacios/src/devices/nvram.c
+++ b/palacios/src/devices/nvram.c
@@ -472,7 +472,8 @@ static void nvram_update_timer(struct guest_info *vm,
     uint64_t period_us;
 
     
-    period_us = (1000000*cpu_cycles/cpu_freq);
+    // cpu freq in khz
+    period_us = (1000*cpu_cycles/cpu_freq);
 
     update_time(nvram_state,period_us);
 
diff --git a/palacios/src/devices/pci_front.c b/palacios/src/devices/pci_front.c

new file mode 100644 (file)

index 0000000..487ab5c
--- /dev/null
+++ b/palacios/src/devices/pci_front.c
@@ -0,0 +1,824 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Peter Dinda <pdinda@northwestern.edu>
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Authors: 
+ *    Peter Dinda <pdinda@northwestern.edu>    (PCI front device forwarding to host dev interface)
+ *    Jack Lange <jarusl@cs.northwestern.edu>  (original PCI passthrough to physical hardware)
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+/* 
+  This is front-end PCI device intended to be used together with the
+  host device interface and a *virtual* PCI device implementation in
+  the host OS.  It makes it possible to project such a virtual device
+  into the guest as a PCI device.  It's based on the PCI passthrough
+  device, which projects *physical* PCI devices into the guest.
+
+  If you need to project a non-PCI host-based virtual or physical
+  device into the guest, you should use the generic device.
+
+*/
+
+/* 
+ * The basic idea is that we do not change the hardware PCI configuration
+ * Instead we modify the guest environment to map onto the physical configuration
+ * 
+ * The pci subsystem handles most of the configuration space, except for the bar registers.
+ * We handle them here, by either letting them go directly to hardware or remapping through virtual hooks
+ * 
+ * Memory Bars are always remapped via the shadow map, 
+ * IO Bars are selectively remapped through hooks if the guest changes them 
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_dev_mgr.h>
+#include <palacios/vmm_sprintf.h>
+#include <palacios/vmm_lowlevel.h>
+#include <palacios/vm_guest.h> 
+#include <palacios/vmm_symspy.h>
+
+#include <devices/pci.h>
+#include <devices/pci_types.h>
+
+#include <interfaces/vmm_host_dev.h>
+
+
+#ifndef CONFIG_DEBUG_PCI_FRONT
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+
+// Our own address in PCI-land
+union pci_addr_reg {
+    uint32_t value;
+    struct {
+       uint_t rsvd1   : 2;
+       uint_t reg     : 6;
+       uint_t func    : 3;
+       uint_t dev     : 5;
+       uint_t bus     : 8;
+       uint_t rsvd2   : 7;
+       uint_t enable  : 1;
+    } __attribute__((packed));
+} __attribute__((packed));
+
+
+// identical to PCI passthrough device
+typedef enum { PT_BAR_NONE,
+              PT_BAR_IO, 
+              PT_BAR_MEM32, 
+              PT_BAR_MEM24, 
+              PT_BAR_MEM64_LO, 
+              PT_BAR_MEM64_HI,
+              PT_EXP_ROM } pt_bar_type_t;
+
+// identical to PCI passthrough device
+struct pt_bar {
+    uint32_t size;
+    pt_bar_type_t type;
+
+    /*  We store 64 bit memory bar addresses in the high BAR
+     *  because they are the last to be updated
+     *  This means that the addr field must be 64 bits
+     */
+    uint64_t addr; 
+
+    uint32_t val;
+};
+
+
+
+
+struct pci_front_internal {
+    // this is our local cache of what the host device has
+    union {
+       uint8_t config_space[256];
+       struct pci_config_header real_hdr;
+    } __attribute__((packed));
+    
+    // We do need a representation of the bars
+    // since we need to be made aware when they are written
+    // so that we can change the hooks.
+    //
+    // We assume here that the PCI subsystem, on a bar write
+    // will first send us a config_update, which we forward to
+    // the host dev.   Then it will send us a bar update
+    // which we will use to rehook the device
+    //
+    struct pt_bar bars[6];      // our bars (for update purposes)
+    //
+    // Currently unsupported
+    //
+    //struct pt_bar exp_rom;      // and exp ram areas of the config space, above
+     
+    struct vm_device  *pci_bus;  // what bus we are attached to
+    struct pci_device *pci_dev;  // our representation as a registered PCI device
+
+    union pci_addr_reg pci_addr; // our pci address
+
+    char name[32];
+
+    v3_host_dev_t     host_dev;  // the actual implementation
+};
+
+
+
+/*
+static int push_config(struct pci_front_internal *state, uint8_t *config)
+{
+    if (v3_host_dev_config_write(state->host_dev, 0, config, 256) != 256) { 
+       return -1;
+    } else {
+       return 0;
+    }
+}
+*/
+
+static int pull_config(struct pci_front_internal *state, uint8_t *config)
+{
+    if (v3_host_dev_read_config(state->host_dev, 0, config, 256) != 256) { 
+       return -1;
+    } else {
+       return 0;
+    }
+}
+
+
+static int pci_front_read_mem(struct guest_info * core, 
+                             addr_t              gpa,
+                             void              * dst,
+                             uint_t              len,
+                             void              * priv)
+{
+    int i;
+    int rc;
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct pci_front_internal *state = (struct pci_front_internal *) dev->private_data;
+
+    PrintDebug("pci_front (%s): reading 0x%x bytes from gpa 0x%p from host dev 0x%p ...",
+              state->name, len, (void*)gpa, state->host_dev);
+
+    rc = v3_host_dev_read_mem(state->host_dev, gpa, dst, len);
+
+    PrintDebug(" done ... read %d bytes: 0x", rc);
+
+    for (i = 0; i < rc; i++) { 
+       PrintDebug("%x", ((uint8_t *)dst)[i]);
+    }
+
+    PrintDebug("\n");
+
+    return rc;
+}
+
+static int pci_front_write_mem(struct guest_info * core, 
+                              addr_t              gpa,
+                              void              * src,
+                              uint_t              len,
+                              void              * priv)
+{
+    int i;
+    int rc;
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct pci_front_internal *state = (struct pci_front_internal *) dev->private_data;
+
+    PrintDebug("pci_front (%s): writing 0x%x bytes to gpa 0x%p to host dev 0x%p bytes=0x",
+              state->name, len, (void*)gpa, state->host_dev);
+
+    for (i = 0; i < len; i++) { 
+       PrintDebug("%x", ((uint8_t *)src)[i]);
+    }
+
+    rc = v3_host_dev_write_mem(state->host_dev, gpa, src, len);
+
+    PrintDebug(" %d bytes written\n",rc);
+    
+    return rc;
+}
+
+
+static int pci_front_read_port(struct guest_info * core, 
+                              uint16_t            port, 
+                              void              * dst, 
+                              uint_t              len, 
+                              void              * priv_data) 
+{
+    int i;
+    struct pci_front_internal *state = (struct pci_front_internal *) priv_data;
+    
+    PrintDebug("pci_front (%s): reading 0x%x bytes from port 0x%x from host dev 0x%p ...",
+              state->name, len, port, state->host_dev);
+
+    int rc = v3_host_dev_read_io(state->host_dev, port, dst, len);
+    
+    PrintDebug(" done ... read %d bytes: 0x", rc);
+
+    for (i = 0; i < rc; i++) { 
+       PrintDebug("%x", ((uint8_t *)dst)[i]);
+    }
+
+    PrintDebug("\n");
+
+    return rc;
+    
+}
+
+static int pci_front_write_port(struct guest_info * core, 
+                               uint16_t            port, 
+                               void              * src, 
+                               uint_t              len, 
+                               void              * priv_data) 
+{
+    int i;
+    struct pci_front_internal *state = (struct pci_front_internal *) priv_data;
+    
+    PrintDebug("pci_front (%s): writing 0x%x bytes to port 0x%x to host dev 0x%p bytes=0x",
+              state->name, len, port, state->host_dev);
+
+    for (i = 0; i < len; i++) { 
+       PrintDebug("%x", ((uint8_t *)src)[i]);
+    }
+
+    int rc = v3_host_dev_write_io(state->host_dev, port, src, len);
+
+    PrintDebug(" %d bytes written\n",rc);
+    
+    return rc;
+}
+
+
+
+//
+// This is called at registration time for the device
+// 
+// We assume that someone has called pull_config to get a local
+// copy of the config data from the host device by this point
+//
+static int pci_bar_init(int bar_num, uint32_t * dst, void * private_data) {
+    struct vm_device * dev = (struct vm_device *)private_data;
+    struct pci_front_internal * state = (struct pci_front_internal *)(dev->private_data);
+
+
+    const uint32_t bar_base_reg = 4;   // offset in 32bit words to skip to the first bar
+
+    union pci_addr_reg pci_addr = {state->pci_addr.value};  // my address
+
+    uint32_t bar_val = 0;
+    uint32_t max_val = 0;
+
+    struct pt_bar * pbar = &(state->bars[bar_num]);
+
+    pci_addr.reg = bar_base_reg + bar_num;
+
+    PrintDebug("pci_front (%s): pci_bar_init: PCI Address = 0x%x\n", state->name, pci_addr.value);
+
+    // This assumees that pull_config() has been previously called and 
+    // we have a local copy of the host device's configuration space
+    bar_val = *((uint32_t*)(&(state->config_space[(bar_base_reg+bar_num)*4])));
+
+    // Now let's set our copy of the relevant bar accordingly
+    pbar->val = bar_val; 
+    
+    // Now we will configure the hooks relevant to this bar
+
+    // We preset this type when we encounter a MEM64 Low BAR
+    // This is a 64 bit memory region that we turn into a memory hook
+    if (pbar->type == PT_BAR_MEM64_HI) {
+       struct pt_bar * lo_pbar = &(state->bars[bar_num - 1]);
+
+       max_val = PCI_MEM64_MASK_HI;
+
+       pbar->size += lo_pbar->size;
+
+       PrintDebug("pci_front (%s): pci_bar_init: Adding 64 bit PCI mem region: start=0x%p, end=0x%p as a full hook\n",
+                  state->name, 
+                  (void *)(addr_t)pbar->addr, 
+                  (void *)(addr_t)(pbar->addr + pbar->size));
+
+       if (v3_hook_full_mem(dev->vm,
+                            V3_MEM_CORE_ANY,
+                            pbar->addr,
+                            pbar->addr+pbar->size-1,
+                            pci_front_read_mem,
+                            pci_front_write_mem,
+                            dev)<0) { 
+           
+           PrintError("pci_front (%s): pci_bar_init: failed to hook 64 bit region (0x%p, 0x%p)\n",
+                      state->name, 
+                      (void *)(addr_t)pbar->addr,
+                      (void *)(addr_t)(pbar->addr + pbar->size - 1));
+           return -1;
+       }
+
+    } else if ((bar_val & 0x3) == 0x1) {
+       // This an I/O port region which we will turn into a range of hooks
+
+       int i = 0;
+
+       pbar->type = PT_BAR_IO;
+       pbar->addr = PCI_IO_BASE(bar_val);
+
+       max_val = bar_val | PCI_IO_MASK;
+
+       pbar->size = (uint16_t)~PCI_IO_BASE(max_val) + 1;
+
+       
+       PrintDebug("pci_front (%s): pci_bar_init: hooking ports 0x%x through 0x%x\n",
+                  state->name, (uint32_t)pbar->addr, (uint32_t)pbar->addr + pbar->size - 1);
+
+       for (i = 0; i < pbar->size; i++) {
+           if (v3_dev_hook_io(dev,
+                              pbar->addr + i, 
+                              pci_front_read_port,
+                              pci_front_write_port)<0) {
+               PrintError("pci_front (%s): pci_bar_init: unabled to hook I/O port 0x%x\n",state->name, (unsigned)(pbar->addr+i));
+               return -1;
+           }
+       }
+
+    } else {
+
+       // might be a 32 bit memory region or an empty bar
+
+       max_val = bar_val | PCI_MEM_MASK;
+
+       if (max_val == 0) {
+           // nothing, so just ignore it
+           pbar->type = PT_BAR_NONE;
+       } else {
+
+           // memory region - hook it
+
+           if ((bar_val & 0x6) == 0x0) {
+               // 32 bit memory region
+
+               pbar->type = PT_BAR_MEM32;
+               pbar->addr = PCI_MEM32_BASE(bar_val);
+               pbar->size = ~PCI_MEM32_BASE(max_val) + 1;
+
+               PrintDebug("pci_front (%s): pci_init_bar: adding 32 bit PCI mem region: start=0x%p, end=0x%p\n",
+                          state->name, 
+                          (void *)(addr_t)pbar->addr, 
+                          (void *)(addr_t)(pbar->addr + pbar->size));
+
+               if (v3_hook_full_mem(dev->vm, 
+                                    V3_MEM_CORE_ANY,
+                                    pbar->addr,
+                                    pbar->addr+pbar->size-1,
+                                    pci_front_read_mem,
+                                    pci_front_write_mem,
+                                    dev) < 0 ) { 
+                   PrintError("pci_front (%s): pci_init_bar: unable to hook 32 bit memory region 0x%p to 0x%p\n",
+                              state->name, (void*)(pbar->addr), (void*)(pbar->addr+pbar->size-1));
+                   return -1;
+               }
+
+           } else if ((bar_val & 0x6) == 0x2) {
+
+               // 24 bit memory region
+
+               pbar->type = PT_BAR_MEM24;
+               pbar->addr = PCI_MEM24_BASE(bar_val);
+               pbar->size = ~PCI_MEM24_BASE(max_val) + 1;
+
+
+               if (v3_hook_full_mem(dev->vm, 
+                                    V3_MEM_CORE_ANY,
+                                    pbar->addr,
+                                    pbar->addr+pbar->size-1,
+                                    pci_front_read_mem,
+                                    pci_front_write_mem,
+                                    dev) < 0 ) { 
+                   PrintError("pci_front (%s): pci_init_bar: unable to hook 24 bit memory region 0x%p to 0x%p\n",
+                              state->name, (void*)(pbar->addr), (void*)(pbar->addr+pbar->size-1));
+                   return -1;
+               }
+
+           } else if ((bar_val & 0x6) == 0x4) {
+               
+               // partial update of a 64 bit region, no hook done yet
+
+               struct pt_bar * hi_pbar = &(state->bars[bar_num + 1]);
+
+               pbar->type = PT_BAR_MEM64_LO;
+               hi_pbar->type = PT_BAR_MEM64_HI;
+
+               // Set the low bits, only for temporary storage until we calculate the high BAR
+               pbar->addr = PCI_MEM64_BASE_LO(bar_val);
+               pbar->size = ~PCI_MEM64_BASE_LO(max_val) + 1;
+
+               PrintDebug("pci_front (%s): pci_bar_init: partial 64 bit update\n",state->name);
+
+           } else {
+               PrintError("pci_front (%s): pci_bar_init: invalid memory bar type\n",state->name);
+               return -1;
+           }
+
+       }
+    }
+
+
+
+    // Update the pci subsystem versions
+    *dst = bar_val;
+
+    return 0;
+}
+
+
+//
+// If the guest modifies a BAR, we expect that pci.c will do the following,
+// in this order
+//
+//    1. notify us via the config_update callback, which we will feed back
+//       to the host device
+//    2. notify us of the bar change via the following callback 
+//
+// This callback will unhook as needed for the old bar value and rehook
+// as needed for the new bar value
+//
+static int pci_bar_write(int bar_num, uint32_t * src, void * private_data) {
+    struct vm_device * dev = (struct vm_device *)private_data;
+    struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data;
+    
+    struct pt_bar * pbar = &(state->bars[bar_num]);
+
+    PrintDebug("pci_front (%s): bar update: bar_num=%d, src=0x%x\n", state->name, bar_num, *src);
+    PrintDebug("pci_front (%s): the current bar has size=%u, type=%d, addr=%p, val=0x%x\n",
+              state->name, pbar->size, pbar->type, (void *)(addr_t)pbar->addr, pbar->val);
+
+
+
+    if (pbar->type == PT_BAR_NONE) {
+       PrintDebug("pci_front (%s): bar update is to empty bar - ignored\n",state->name);
+       return 0;
+    } else if (pbar->type == PT_BAR_IO) {
+       int i = 0;
+
+       // unhook old ports
+       PrintDebug("pci_front (%s): unhooking I/O ports 0x%x through 0x%x\n", 
+                  state->name, 
+                  (unsigned)(pbar->addr), (unsigned)(pbar->addr+pbar->size-1));
+       for (i = 0; i < pbar->size; i++) {
+           if (v3_dev_unhook_io(dev, pbar->addr + i) == -1) {
+               PrintError("pci_front (%s): could not unhook previously hooked port.... 0x%x\n", 
+                          state->name, 
+                          (uint32_t)pbar->addr + i);
+               return -1;
+           }
+       }
+
+       PrintDebug("pci_front (%s): setting I/O Port range size=%d\n", state->name, pbar->size);
+
+       // 
+       // Not clear if this cooking is needed... why not trust
+       // the write?  Who cares if it wants to suddenly hook more ports?
+       // 
+
+       // clear the low bits to match the size
+       *src &= ~(pbar->size - 1);
+
+       // Set reserved bits
+       *src |= (pbar->val & ~PCI_IO_MASK);
+
+       pbar->addr = PCI_IO_BASE(*src); 
+
+       PrintDebug("pci_front (%s): cooked src=0x%x\n", state->name, *src);
+
+       PrintDebug("pci_front (%s): rehooking I/O ports 0x%x through 0x%x\n",
+                  state->name, (unsigned)(pbar->addr), (unsigned)(pbar->addr+pbar->size-1));
+
+       for (i = 0; i < pbar->size; i++) {
+           if (v3_dev_hook_io(dev,
+                              pbar->addr + i, 
+                              pci_front_read_port, 
+                              pci_front_write_port)<0) { 
+               PrintError("pci_front (%s): unable to rehook port 0x%x\n",state->name, (unsigned)(pbar->addr+i));
+               return -1;
+           }
+       }
+
+    } else if (pbar->type == PT_BAR_MEM32) {
+
+       if (v3_unhook_mem(dev->vm,V3_MEM_CORE_ANY,pbar->addr)<0) { 
+           PrintError("pci_front (%s): unable to unhook 32 bit memory region starting at 0x%p\n", 
+                      state->name, (void*)(pbar->addr));
+           return -1;
+       }
+
+       // Again, not sure I need to do this cooking...
+
+       // clear the low bits to match the size
+       *src &= ~(pbar->size - 1);
+
+       // Set reserved bits
+       *src |= (pbar->val & ~PCI_MEM_MASK);
+
+       PrintDebug("pci_front (%s): cooked src=0x%x\n", state->name, *src);
+
+       pbar->addr = PCI_MEM32_BASE(*src);
+
+       PrintDebug("pci_front (%s): rehooking 32 bit memory region 0x%p through 0x%p\n",
+                  state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
+                  
+       if (v3_hook_full_mem(dev->vm,
+                            V3_MEM_CORE_ANY,
+                            pbar->addr,
+                            pbar->addr+pbar->size-1,
+                            pci_front_read_mem,
+                            pci_front_write_mem,
+                            dev)<0) { 
+           PrintError("pci_front (%s): unable to rehook 32 bit memory region 0x%p through 0x%p\n",
+                      state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
+           return -1;
+       }
+
+    } else if (pbar->type == PT_BAR_MEM64_LO) {
+       // We only store the written values here, the actual reconfig comes when the high BAR is updated
+
+       // clear the low bits to match the size
+       *src &= ~(pbar->size - 1);
+
+       // Set reserved bits
+       *src |= (pbar->val & ~PCI_MEM_MASK);
+
+       // Temp storage, used when hi bar is written
+       pbar->addr = PCI_MEM64_BASE_LO(*src);
+
+       PrintDebug("pci_front (%s): handled partial update for 64 bit memory region\n",state->name);
+
+    } else if (pbar->type == PT_BAR_MEM64_HI) {
+       struct pt_bar * lo_vbar = &(state->bars[bar_num - 1]);
+
+       if (v3_unhook_mem(dev->vm,V3_MEM_CORE_ANY,pbar->addr)<0) { 
+           PrintError("pci_front (%s): unable to unhook 64 bit memory region starting at 0x%p\n", 
+                      state->name, (void*)(pbar->addr));
+           return -1;
+       }
+
+       
+       // We don't set size, because we assume region is less than 4GB
+
+       // Set reserved bits
+       *src |= (pbar->val & ~PCI_MEM64_MASK_HI);
+
+       pbar->addr = PCI_MEM64_BASE_HI(*src);
+       pbar->addr <<= 32;
+       pbar->addr += lo_vbar->addr;
+
+       PrintDebug("pci_front (%s): rehooking 64 bit memory region 0x%p through 0x%p\n",
+                  state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
+                  
+       if (v3_hook_full_mem(dev->vm,
+                            V3_MEM_CORE_ANY,
+                            pbar->addr,
+                            pbar->addr+pbar->size-1,
+                            pci_front_read_mem,
+                            pci_front_write_mem,
+                            dev)<0) { 
+           PrintError("pci_front (%s): unable to rehook 64 bit memory region 0x%p through 0x%p\n",
+                      state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
+           return -1;
+       }
+       
+    } else {
+       PrintError("pci_front (%s): unhandled PCI bar type %d\n", state->name, pbar->type);
+       return -1;
+    }
+
+    pbar->val = *src;
+    
+    return 0;
+}
+
+
+static int pci_front_config_update(uint_t reg_num, void * src, uint_t length, void * private_data) 
+{
+    int i;
+    struct vm_device * dev = (struct vm_device *)private_data;
+    struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data;
+    union pci_addr_reg pci_addr = {state->pci_addr.value};
+    
+    pci_addr.reg = reg_num >> 2;
+
+    PrintDebug("pci_front (%s): configuration update: writing 0x%x bytes at offset 0x%x to host device 0x%p, bytes=0x",
+              state->name, length, pci_addr.value, state->host_dev);
+    
+    for (i = 0; i < length; i++) { 
+       PrintDebug("%x", ((uint8_t *)src)[i]);
+    }
+
+    PrintDebug("\n");
+
+    if (v3_host_dev_write_config(state->host_dev,
+                                pci_addr.value,
+                                src,
+                                length) != length) { 
+       PrintError("pci_front (%s): configuration update: unable to write all bytes\n",state->name);
+       return -1;
+    }
+
+
+    return 0;
+}
+
+
+static int unhook_all_mem(struct pci_front_internal *state)
+{
+    int bar_num;
+    struct vm_device *bus = state->pci_bus;
+
+
+    for (bar_num=0;bar_num<6;bar_num++) { 
+       struct pt_bar * pbar = &(state->bars[bar_num]);
+
+       PrintDebug("pci_front (%s): unhooking for bar %d\n", state->name, bar_num);
+
+       if (pbar->type == PT_BAR_MEM32) {
+           if (v3_unhook_mem(bus->vm,V3_MEM_CORE_ANY,pbar->addr)<0) { 
+               PrintError("pci_front (%s): unable to unhook 32 bit memory region starting at 0x%p\n", 
+                          state->name, (void*)(pbar->addr));
+               return -1;
+           }
+       } else  if (pbar->type == PT_BAR_MEM64_HI) {
+
+           if (v3_unhook_mem(bus->vm,V3_MEM_CORE_ANY,pbar->addr)<0) { 
+               PrintError("pci_front (%s): unable to unhook 64 bit memory region starting at 0x%p\n", 
+                          state->name, (void*)(pbar->addr));
+               return -1;
+           }
+       }
+    }
+    
+    return 0;
+}
+
+
+
+static int setup_virt_pci_dev(struct v3_vm_info * vm_info, struct vm_device * dev) 
+{
+    struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data;
+    struct pci_device * pci_dev = NULL;
+    struct v3_pci_bar bars[6];
+    int bus_num = 0;
+    int i;
+
+    for (i = 0; i < 6; i++) {
+       bars[i].type = PCI_BAR_PASSTHROUGH;
+       bars[i].private_data = dev;
+       bars[i].bar_init = pci_bar_init;
+       bars[i].bar_write = pci_bar_write;
+    }
+
+    pci_dev = v3_pci_register_device(state->pci_bus,
+                                    PCI_STD_DEVICE,
+                                    bus_num, -1, 0, 
+                                    state->name, bars,
+                                    pci_front_config_update,
+                                    NULL,      // no support for command updates
+                                    NULL,      // no support for expansion roms              
+                                    dev);
+
+
+    state->pci_dev = pci_dev;
+
+
+    // EXPANSION ROMS CURRENTLY UNSUPPORTED
+
+    // COMMANDS CURRENTLY UNSUPPORTED
+
+    return 0;
+}
+
+
+
+//
+// Note: potential bug:  not clear what pointer I get here
+//
+static int pci_front_free(struct pci_front_internal *state)
+{
+
+    if (unhook_all_mem(state)<0) { 
+       return -1;
+    }
+
+    // the device manager will unhook the i/o ports for us
+
+    if (state->host_dev) { 
+       v3_host_dev_close(state->host_dev);
+       state->host_dev=0;
+    }
+
+
+    V3_Free(state);
+
+    PrintDebug("pci_front (%s): freed\n",state->name);
+
+    return 0;
+}
+
+
+static struct v3_device_ops dev_ops = {
+//
+// Note: potential bug:  not clear what pointer I get here
+//
+    .free = (int (*)(void*))pci_front_free,
+};
+
+
+
+
+
+
+
+static int pci_front_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) 
+{
+    struct vm_device * dev;
+    struct vm_device * bus;
+    struct pci_front_internal *state;
+    char *dev_id;
+    char *bus_id;
+    char *url;
+
+    
+    if (!(dev_id = v3_cfg_val(cfg, "ID"))) { 
+       PrintError("pci_front: no id  given!\n");
+       return -1;
+    }
+    
+    if (!(bus_id = v3_cfg_val(cfg, "bus"))) { 
+       PrintError("pci_front (%s): no bus given!\n",dev_id);
+       return -1;
+    }
+    
+    if (!(url = v3_cfg_val(cfg, "hostdev"))) { 
+       PrintError("pci_front (%s): no host device url given!\n",dev_id);
+       return -1;
+    }
+    
+    if (!(bus = v3_find_dev(vm,bus_id))) { 
+       PrintError("pci_front (%s): cannot attach to bus %s\n",dev_id,bus_id);
+       return -1;
+    }
+    
+    if (!(state = V3_Malloc(sizeof(struct pci_front_internal)))) { 
+       PrintError("pci_front (%s): cannot allocate state for device\n",dev_id);
+       return -1;
+    }
+    
+    memset(state, 0, sizeof(struct pci_front_internal));
+    
+    state->pci_bus = bus;
+    strncpy(state->name, dev_id, 32);
+    
+    if (!(dev = v3_add_device(vm, dev_id, &dev_ops, state))) { 
+       PrintError("pci_front (%s): unable to add device\n",state->name);
+       return -1;
+    }
+    
+    if (!(state->host_dev=v3_host_dev_open(url,V3_BUS_CLASS_PCI,dev,vm))) { 
+       PrintError("pci_front (%s): unable to attach to host device %s\n",state->name, url);
+       v3_remove_device(dev);
+       return -1;
+    }
+    
+    // fetch config space from the host
+    if (pull_config(state,state->config_space)) { 
+       PrintError("pci_front (%s): cannot initially configure device\n",state->name);
+       v3_remove_device(dev);
+       return -1;
+    }
+
+    // setup virtual device for now
+    if (setup_virt_pci_dev(vm,dev)<0) { 
+       PrintError("pci_front (%s): cannot set up virtual pci device\n", state->name);
+       v3_remove_device(dev);
+       return -1;
+    }
+
+    // We do not need to hook anything here since pci will call
+    // us back via the bar_init functions
+
+    PrintDebug("pci_front (%s): inited and ready to be Potemkinized\n",state->name);
+
+    return 0;
+
+}
+
+
+device_register("PCI_FRONT", pci_front_init)
diff --git a/palacios/src/devices/vga.c b/palacios/src/devices/vga.c

index 834f780..ce28940 100644 (file)
--- a/palacios/src/devices/vga.c
+++ b/palacios/src/devices/vga.c
@@ -26,6 +26,16 @@
 
 #include "vga_regs.h"
 
+#ifndef CONFIG_DEBUG_VGA
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+#define DEBUG_MEM_DATA    0
+#define DEBUG_DEEP_MEM    0
+#define DEBUG_DEEP_RENDER 0
+
+
 #define MEM_REGION_START 0xa0000
 #define MEM_REGION_END   0xc0000
 #define MEM_REGION_NUM_PAGES (((MEM_REGION_END)-(MEM_REGION_START))/4096)
@@ -272,7 +282,7 @@ struct vga_dac_regs {
     
 
 struct vga_internal {
-    struct vm_device *dev;  
+    struct vm_device *dev; 
     
     bool passthrough;
     bool skip_next_passthrough_out; // for word access 
@@ -320,6 +330,8 @@ struct vga_internal {
 };
 
 
+typedef enum {PLANAR_SHIFT, PACKED_SHIFT, C256_SHIFT} shift_mode_t;
+
 
 static void find_text_char_dim(struct vga_internal *vga, uint32_t *w, uint32_t *h)
 {
@@ -363,6 +375,7 @@ static void find_text_data_start(struct vga_internal *vga, void **data)
 
 }
 
+
 static void find_text_attr_start(struct vga_internal *vga, void **data)
 {
     uint32_t offset;
@@ -444,6 +457,29 @@ static int blinking(struct vga_internal *vga)
 }
 
 
+static void find_graphics_data_starting_offset(struct vga_internal *vga, uint32_t *offset)
+{
+
+    *offset = vga->vga_crt_controller.vga_start_address_high;
+    *offset <<= 8;
+    *offset += vga->vga_crt_controller.vga_start_address_low;
+}
+
+
+static void find_shift_mode(struct vga_internal *vga, shift_mode_t *mode)
+{
+    if (vga->vga_graphics_controller.vga_graphics_mode.c256) { 
+       *mode=C256_SHIFT;
+    } else {
+       if (vga->vga_graphics_controller.vga_graphics_mode.shift_reg_mode) {
+           *mode=PACKED_SHIFT;
+       } else {
+           *mode=PLANAR_SHIFT;
+       }
+    }
+}
+
+
 static void find_graphics_res(struct vga_internal *vga, uint32_t *width, uint32_t *height)
 {
     uint32_t vert_lsb, vert_msb;
@@ -458,34 +494,42 @@ static void find_graphics_res(struct vga_internal *vga, uint32_t *width, uint32_
        + (vga->vga_crt_controller.vga_overflow.vertical_disp_enable_end8);
               
     *height  = ( (vert_msb << 8) + vert_lsb + 1) ; // pixels high (scanlines)
-    
-}
-
-
-static void find_graphics_cursor_pos(struct vga_internal *vga, uint32_t *width, uint32_t *height)
-{
-
-}
 
-static void render_graphics(struct vga_internal *vga, void *fb)
-{
+    // At this point we have the resolution in dot clocks across and scanlines top-to-bottom
+    // This is usually the resolution in pixels, but it can be monkeyed with
+    // at least in the following ways
 
-    PrintDebug("vga: render_graphics is unimplemented\n");
-    // Multiuplane 16
-    // Packed pixel mono
-    // packed pixel 4 color
-    // packed pixel 256 color
+    // vga sequencer dot clock divide by two 
+    if (vga->vga_sequencer.vga_clocking_mode.dot_clock) { 
+       *width/=2;
+       *height/=2;
+    }
 
-    find_graphics_cursor_pos(0,0,0);
+    // crt_controller.max_row_scan.double_scan => each row twice for 200=>400
+    if (vga->vga_crt_controller.vga_max_row_scan.double_scan) { 
+       *height/=2;
+    }
+    
+    // crt_controller.crt_mode_control.count_by_two => pixels twice as wide as normal
+    if (vga->vga_crt_controller.vga_crt_mode_control.count_by_two) { 
+       *width /= 2;
+    }
 
+    // crt_controller.crt_mode_control.horizontal_retrace_select => pixels twice as tall as normal
+    if (vga->vga_crt_controller.vga_crt_mode_control.horizontal_retrace_select) { 
+       *height /= 2;
+    }
+    
 }
 
-static void render_text_cursor(struct vga_internal *vga, void *fb)
+
+static void find_graphics_cursor_pos(struct vga_internal *vga, uint32_t *x, uint32_t *y)
 {
+    // todo
+    *x=*y=0;
 }
 
 
-
 static void dac_lookup_24bit_color(struct vga_internal *vga,
                                   uint8_t entry,
                                   uint8_t *red,
@@ -503,6 +547,209 @@ static void dac_lookup_24bit_color(struct vga_internal *vga,
 
 }
 
+
+/*
+  Colors work like this:
+
+  4 bit modes:   index is to the internal palette on the attribute controller
+                 that supplies 6 bits, but we need 8 to index the dac
+                2 more (the msbs) are supplied from the color select register
+                 we can optionally overwrite bits 5 and 4 from the color
+                select register as well, depending on a selection bit
+                in the mode control register.   The result of all this is
+                8 bit index for the dac
+
+  8 bit modes:   the attribute controller passes the index straight through
+                 to the DAC.
+
+
+  The DAC translates from the 8 bit index into 6 bits per color channel
+  (18 bit color).   We mulitply by 4 to get 24 bit color.
+*/
+
+static void find_24bit_color(struct vga_internal *vga, 
+                            uint8_t val,
+                            uint8_t *red,
+                            uint8_t *green,
+                            uint8_t *blue)
+{
+    uint8_t di;  // ultimate dac index
+
+    if (vga->vga_attribute_controller.vga_attribute_mode_control.pixel_width) { 
+       // 8 bit mode does right to the DAC
+       di=val;
+    } else {
+       struct vga_internal_palette_reg pr = vga->vga_attribute_controller.vga_internal_palette[val%16];
+       di = pr.palette_data;
+       
+       // Fix bits 5-4 if needed
+       if (vga->vga_attribute_controller.vga_attribute_mode_control.p54_select) { 
+           di &= ~0x30;  // clear 5-4
+           di |= vga->vga_attribute_controller.vga_color_select.sc4 << 4;
+           di |= vga->vga_attribute_controller.vga_color_select.sc5 << 5;
+       }
+
+       // We must always produce bits 6 and 7
+       di &= ~0xc0; // clear 7-6
+       di |= vga->vga_attribute_controller.vga_color_select.sc6 << 6;
+       di |= vga->vga_attribute_controller.vga_color_select.sc7 << 7;
+    }
+       
+    dac_lookup_24bit_color(vga,di,red,green,blue);
+}
+       
+static void render_graphics(struct vga_internal *vga, void *fb)
+{
+
+    struct v3_frame_buffer_spec *spec = &(vga->target_spec);
+
+    uint32_t gw, gh; // graphics w/h
+    uint32_t fw, fh; // fb w/h
+    uint32_t rgw, rgh;  // region we can actually show on the frame buffer
+    
+
+    uint32_t fx, fy;     // pixel position within the frame buffer
+    
+    uint32_t offset;     // offset into the maps
+    uint8_t  m;        // map
+    uint8_t  p;          // pixel in the current map byte  (0..7)
+
+    uint8_t r,g,b;  // looked up colors for entry
+
+    void    *pixel;   // current pixel in the fb
+    uint8_t *red;     // and the channels in the pixel
+    uint8_t *green;   //
+    uint8_t *blue;    //
+
+    uint8_t db[4]; // 4 bytes read at a time
+    uint8_t pb[8]; // 8 pixels assembled at a time
+
+    shift_mode_t sm;   // shift mode
+
+    uint32_t cur_x, cur_y;
+    
+
+    find_graphics_res(vga,&gw,&gh);
+
+    find_shift_mode(vga,&sm);
+
+    find_graphics_cursor_pos(vga,&cur_x,&cur_y);
+
+    find_graphics_data_starting_offset(vga,&offset);
+
+    fw = spec->width;
+    fh = spec->height;
+
+
+    PrintDebug("vga: attempting graphics render (%s): graphics_res=(%u,%u), fb_res=(%u,%u), "
+               "fb=0x%p offset=0x%x\n",
+              sm == PLANAR_SHIFT ? "planar shift" : 
+              sm == PACKED_SHIFT ? "packed shift" : 
+              sm == C256_SHIFT ? "color256 shift" : "UNKNOWN",
+              gw,gh,fw,fh,fb,offset);
+
+    // First we need to clip to what we can actually show
+    rgw = gw < fw ? gw : fw;
+    rgh = gh < fh ? gh : fh;
+
+    if (gw%8) { 
+       PrintError("vga: warning: graphics width is not a multiple of 8\n");
+    }
+
+
+
+    // Now we scan across by row
+    for (fy=0;fy<gh;fy++) { 
+       // by column
+       for (fx=0;fx<gw;
+            fx += (sm==C256_SHIFT ? 4 : 8) , offset++ ) { 
+
+           // if any of these pixels are in the rendger region
+           if (fy < rgh && fx < rgw) {
+               // assemble all 4 or 8 pixels
+               
+               // fetch the data bytes
+               for (m=0;m<4;m++) { 
+                   db[m]=*((uint8_t*)(vga->map[m]+offset));
+               }
+                
+               // assemble
+               switch (sm) { 
+                   case PLANAR_SHIFT:
+                       for (p=0;p<8;p++) { 
+                           pb[p]= 
+                               (( db[0] >> 7) & 0x1) |
+                               (( db[1] >> 6) & 0x2) |
+                               (( db[2] >> 5) & 0x4) |
+                               (( db[3] >> 4) & 0x8) ;
+                           
+                           for (m=0;m<4;m++) { 
+                               db[m] <<= 1;
+                           }
+                       }
+                       break;
+                       
+                   case PACKED_SHIFT:
+                       // first 4 pixels use planes 0 and 2
+                       for (p=0;p<4;p++) { 
+                           pb[p] = 
+                               ((db[2] >> 4) & 0xc) |
+                               ((db[0] >> 6) & 0x3) ;
+                           db[2] <<= 2;
+                           db[0] <<= 2;
+                       }
+                       break;
+                       
+                       // next 4 pixels use planes 1 and 3
+                       for (p=4;p<8;p++) { 
+                           pb[p] = 
+                               ((db[3] >> 4) & 0xc) |
+                               ((db[1] >> 6) & 0x3) ;
+                           db[3] <<= 2;
+                           db[1] <<= 2;
+                       }
+                       break;
+
+                   case C256_SHIFT:
+                       // this one is either very bizarre or as simple as this
+                       for (p=0;p<4;p++) { 
+                           pb[p] = db[p];
+                       }
+                       break;
+               }
+
+               // draw each pixel
+               for (p=0;p< (sm==C256_SHIFT ? 4 : 8);p++) { 
+                   
+                   // find its color
+                   find_24bit_color(vga,pb[p],&r,&g,&b);
+               
+                   // find its position in the framebuffer;
+                   pixel =  fb + (((fx + p) + (fy*spec->width)) * spec->bytes_per_pixel);
+                   red = pixel + spec->red_offset;
+                   green = pixel + spec->green_offset;
+                   blue = pixel + spec->blue_offset;
+
+                   // draw it
+                   *red=r;
+                   *green=g;
+                   *blue=b;
+               }
+           }
+       }
+    }
+    
+    PrintDebug("vga: render done\n");
+}
+
+
+static void render_text_cursor(struct vga_internal *vga, void *fb)
+{
+}
+
+
+
+
 //
 // A variant of this function could render to
 // a text console interface as well
@@ -593,22 +840,22 @@ static void render_text(struct vga_internal *vga, void *fb)
                // foreground
                
                if (!extended_fontset(vga)) { 
-                   fg_entry = ((uint8_t)(a.foreground_intensity_or_font_select)) << 3;
+                   fg_entry = a.foreground_intensity_or_font_select << 3;
                } else {
                    fg_entry = 0;
                }
                fg_entry |= a.fore;
 
-               dac_lookup_24bit_color(vga,fg_entry,&fgr,&fgg,&fgb);
+               find_24bit_color(vga,fg_entry,&fgr,&fgg,&fgb);
 
                if (!blinking(vga)) { 
-                   bg_entry = ((uint8_t)(a.blinking_or_bg_intensity)) << 3;
+                   bg_entry = a.blinking_or_bg_intensity << 3;
                } else {
                    bg_entry = 0;
                }
                bg_entry |= a.back;
                
-               dac_lookup_24bit_color(vga,bg_entry,&bgr,&bgg,&bgb);
+               find_24bit_color(vga,bg_entry,&bgr,&bgg,&bgb);
 
                // Draw the character
                for (l=0; l<ch; l++, font++) {
@@ -710,6 +957,15 @@ static void render_test(struct vga_internal *vga, void *fb)
     }
 }
 
+static void render_black(struct vga_internal *vga, void *fb)
+{
+    struct v3_frame_buffer_spec *s;
+
+    s=&(vga->target_spec);
+
+    memset(fb,0,s->height*s->width*s->bytes_per_pixel);
+}
+
 static void render_maps(struct vga_internal *vga, void *fb)
 {
 
@@ -757,20 +1013,21 @@ static int render(struct vga_internal *vga)
 
        fb = v3_graphics_console_get_frame_buffer_data_rw(vga->host_cons,&(vga->target_spec));
 
-       // Draw some crap for testing for now
-       if (0) { render_test(vga,fb);}
-       // Draw the maps for debugging
-       if (0) { render_maps(vga,fb);}
-
-       if (vga->vga_graphics_controller.vga_misc.graphics_mode) { 
-           render_graphics(vga,fb);
+       if (!(vga->vga_sequencer.vga_clocking_mode.screen_off)) {
+           if (vga->vga_attribute_controller.vga_attribute_mode_control.graphics) { 
+               render_graphics(vga,fb);
+           } else {
+               render_text(vga,fb);
+               render_text_cursor(vga,fb);
+           }
        } else {
-           render_text(vga,fb);
-           render_text_cursor(vga,fb);
+           render_black(vga,fb);
        }
 
-       render_maps(vga,fb);
+       if (0) { render_test(vga,fb); }
 
+       // always render maps for now 
+       render_maps(vga,fb);
 
        v3_graphics_console_release_frame_buffer_data_rw(vga->host_cons);
     }
@@ -870,7 +1127,7 @@ static int vga_write(struct guest_info * core,
        memcpy(V3_VAddr((void*)guest_addr),src,length);
     }
     
-#if 0
+#if DEBUG_MEM_DATA
     int i;
     PrintDebug("vga: data written was 0x");
     for (i=0;i<length;i++) {
@@ -887,10 +1144,6 @@ static int vga_write(struct guest_info * core,
 
     /* Write mode determine by Graphics Mode Register (Index 05h).writemode */
 
-    // Probably need to add odd/even mode access here for text
-
-    PrintDebug("vga: write is with odd/even = %u\n", vga->vga_sequencer.vga_mem_mode.odd_even);
-
 
     switch (vga->vga_graphics_controller.vga_graphics_mode.write_mode) {
        case 0: {
@@ -916,7 +1169,9 @@ static int vga_write(struct guest_info * core,
            
            offset = find_offset_write(vga, guest_addr);
 
+#if DEBUG_DEEP_MEM
            PrintDebug("vga: mode 0 write, offset=0x%llx, ror=%u, func=%u\n", offset,ror,func);
+#endif
 
            for (i=0;i<length;i++,offset+=find_increment_write(vga,guest_addr+i)) { 
                // now for each map
@@ -925,21 +1180,36 @@ static int vga_write(struct guest_info * core,
                uint8_t bm = vga->vga_graphics_controller.vga_bit_mask;
                uint8_t mm = find_map_write(vga,guest_addr+i);
 
-               PrintDebug("vga: write i=%u, mm=0x%x, offset=0x%x\n",i,(unsigned int)mm,(unsigned int)offset);
+#if DEBUG_DEEP_MEM
+               PrintDebug("vga: write i=%u, mm=0x%x, bm=0x%x sr=0x%x esr=0x%x offset=0x%x\n",i,(unsigned int)mm,(unsigned int)bm, (unsigned int)sr, (unsigned int)esr,(unsigned int)offset);
+#endif
 
-               for (mapnum=0;mapnum<4;mapnum++, sr>>=1, esr>>=1, bm>>=1, mm>>=1) { 
+               for (mapnum=0;mapnum<4;mapnum++, sr>>=1, esr>>=1, mm>>=1) { 
                    vga_map map = vga->map[mapnum];
                    uint8_t data = ((uint8_t *)src)[i];
                    uint8_t latchval = vga->latch[mapnum];
-                       
+                   
+#if DEBUG_DEEP_MEM
+                   PrintDebug("vga: raw data=0x%x\n",data);
+#endif
                    // rotate data right
-                   data = (data>>ror) | data<<(8-ror);
-
+                   if (ror) { 
+                       data = (data>>ror) | data<<(8-ror);
+                   }
+                   
+#if DEBUG_DEEP_MEM
+                   PrintDebug("vga: data after ror=0x%x\n",data);
+#endif
                    // use SR bit if ESR is on for this map
-                   if (esr & 0x1) { 
-                       data = (uint8_t)((((sint8_t)(sr&0x1))<<7)>>7);  // expand sr bit
+                   if (esr & 0x1) {
+                       data = (sr&0x1) * -1;
+                       
                    }
                    
+#if DEBUG_DEEP_MEM
+                   PrintDebug("vga: data after esrr=0x%x\n",data);
+#endif
+                   
                    // Apply function
                    switch (func) { 
                        case 0: // NOP
@@ -954,19 +1224,26 @@ static int vga_write(struct guest_info * core,
                            data ^= latchval;
                            break;
                    }
-                           
-                   // mux between latch and alu output
-                   if (bm & 0x1) { 
-                       // use alu output, which is in data
-                   } else {
-                       // use latch value
-                       data=latchval;
-                   }
+                   
+#if DEBUG_DEEP_MEM
+                   PrintDebug("vga: data after func=0x%x\n",data);
+#endif
+                   
+                   // mux between the data byte and the latch byte on
+                   // a per-bit basis
+                   data = (bm & data) | ((~bm) & latchval);
+                   
+
+#if DEBUG_DEEP_MEM
+                   PrintDebug("vga: data after bm mux=0x%x\n",data);
+#endif
                    
                    // selective write
                    if (mm & 0x1) { 
                        // write to this map
-                       //PrintDebug("vga: write map %u offset 0x%p map=0x%p pointer=0x%p\n",mapnum,(void*)offset,map,&(map[offset]));
+#if DEBUG_DEEP_MEM
+                       PrintDebug("vga: write map %u offset 0x%p map=0x%p pointer=0x%p\n",mapnum,(void*)offset,map,&(map[offset]));
+#endif
                        map[offset] = data;
                    } else {
                        // skip this map
@@ -989,7 +1266,9 @@ static int vga_write(struct guest_info * core,
 
            uint64_t offset = find_offset_write(vga,guest_addr);
 
+#if DEBUG_DEEP_MEM
            PrintDebug("vga: mode 1 write, offset=0x%llx\n", offset);
+#endif
 
            for (i=0;i<length;i++,offset+=find_increment_write(vga,guest_addr+i)) { 
 
@@ -1030,7 +1309,9 @@ static int vga_write(struct guest_info * core,
            
            offset = find_offset_write(vga, guest_addr);
 
+#if DEBUG_DEEP_MEM
            PrintDebug("vga: mode 2 write, offset=0x%llx, func=%u\n", offset,func);
+#endif
 
            for (i=0;i<length;i++,offset+=find_increment_write(vga,guest_addr+i)) { 
                // now for each map
@@ -1043,9 +1324,9 @@ static int vga_write(struct guest_info * core,
                    uint8_t latchval = vga->latch[mapnum];
                        
                    // expand relevant bit to 8 bit
-                   // it's basically esr=1, sr=bit from write
-                   data = (uint8_t)(((sint8_t)(((data>>mapnum)&0x1)<<7))>>7);
-                   
+                   // it's basically esr=1, sr=bit from mode 0 write
+                   data = ((data>>mapnum)&0x1) * -1;
+                       
                    // Apply function
                    switch (func) { 
                        case 0: // NOP
@@ -1060,14 +1341,9 @@ static int vga_write(struct guest_info * core,
                            data ^= latchval;
                            break;
                    }
-                           
+
                    // mux between latch and alu output
-                   if (bm & 0x1) { 
-                       // use alu output, which is in data
-                   } else {
-                       // use latch value
-                       data=latchval;
-                   }
+                   data = (bm & data) | ((~bm) & latchval);
                    
                    // selective write
                    if (mm & 0x1) { 
@@ -1107,7 +1383,9 @@ static int vga_write(struct guest_info * core,
                // now for each map
                uint8_t data = ((uint8_t *)src)[i];
 
-               data = (data>>ror) | data<<(8-ror);
+               if (ror) {
+                   data = (data>>ror) | data<<(8-ror);
+               }
 
                uint8_t bm = vga->vga_graphics_controller.vga_bit_mask & data;
                uint8_t sr = vga->vga_graphics_controller.vga_set_reset.val & 0xf;
@@ -1117,16 +1395,11 @@ static int vga_write(struct guest_info * core,
                    vga_map map = vga->map[mapnum];
                    uint8_t latchval = vga->latch[mapnum];
                        
-                   data = (uint8_t)((((sint8_t)(sr&0x1))<<7)>>7);  // expand sr bit
-                   
-                   
+                   // expand SR bit
+                   data = (sr&0x1) * -1;
+
                    // mux between latch and alu output
-                   if (bm & 0x1) { 
-                       // use alu output, which is in data
-                   } else {
-                       // use latch value
-                       data=latchval;
-                   }
+                   data = (bm & data) | ((~bm) & latchval);
                    
                    // selective write
                    if (mm & 0x1) { 
@@ -1212,7 +1485,15 @@ static int vga_read(struct guest_info * core,
                // address bytes select the map
                for (i=0;i<length;i++,offset+=find_increment_read(vga,guest_addr+i)) { 
                    mapnum = (guest_addr+i) % 4;
-                   ((uint8_t*)dst)[i] = vga->latch[mapnum] = *(vga->map[mapnum]+offset);
+#if DEBUG_DEEP_MEM
+                   PrintDebug("vga: mode 0 read, chain4, offset=0x%llx, mapnum=%u\n",offset,mapnum);
+#endif
+                   ((uint8_t*)dst)[i] = *(vga->map[mapnum]+offset);
+
+                   // presumably all the latches are to be reloaded, not just the selected one?
+                   for (mapnum=0;mapnum<4;mapnum++) { 
+                       vga->latch[mapnum] = *(vga->map[mapnum]+offset);
+                   }
                }
            } else {
                mapnum = vga->vga_graphics_controller.vga_read_map_select.map_select;
@@ -1222,6 +1503,10 @@ static int vga_read(struct guest_info * core,
                    PrintError("vga: read to offset=%llu map=%u (%u bytes)\n",offset,mapnum,length);
                }
                
+#if DEBUG_DEEP_MEM
+               PrintDebug("vga: mode 0 read, not-chain4, offset=0x%llx, mapnum=%u\n",offset,mapnum);
+#endif
+
                memcpy(dst,(vga->map[mapnum])+offset,length);
                
                // load the latches with the last item read
@@ -1252,6 +1537,11 @@ static int vga_read(struct guest_info * core,
            uint8_t  bits;
            
            offset = find_offset_read(vga,guest_addr);
+
+#if DEBUG_DEEP_MEM
+           PrintDebug("vga: mode 1 read, offset=0x%llx, cc=0x%x, dc-0x%x\n",offset,cc,dc);
+#endif
+               
            
            for (i=0;i<length;i++,offset++) { 
                vga_map map;
@@ -1289,7 +1579,7 @@ static int vga_read(struct guest_info * core,
     }
 
 
-#if 0
+#if DEBUG_MEM_DATA
     int i;
     PrintDebug("vga: data read is 0x");
     for (i=0;i<length;i++) {
diff --git a/palacios/src/devices/vga_regs.h b/palacios/src/devices/vga_regs.h

index c9858db..08b65f2 100644 (file)
--- a/palacios/src/devices/vga_regs.h
+++ b/palacios/src/devices/vga_regs.h
@@ -632,8 +632,7 @@ page 2-88).
            // 1 = odd/even addressing as in CGMA
            uint8_t shift_reg_mode:1;
            // 1 = shift regs get odd bits from odd maps and even/even
-           uint8_t c256:1;         
-           // 1 = 256 color mode
+           uint8_t c256:1;                 // 1 = 256 color mode
            // 0 = shift_reg_mode controls shift regs
            uint8_t reserved2:1; 
        } __attribute__((packed));
@@ -713,7 +712,7 @@ struct vga_attribute_controller_address_reg {
        uint8_t val;
        struct {
            uint8_t index:5;    // actual address
-           uint8_t internal_palette_address_srouce:1; 
+           uint8_t internal_palette_address_source:1; 
            // 0 => use the internal color palette (load the regs)
            // 1 => use the external color palette
            uint8_t reserved:2; 
@@ -866,27 +865,13 @@ struct vga_attribute_byte {
     union {
        uint8_t val;
        struct {
-           union {
-               uint8_t fore:3;
-               struct { 
-                   uint8_t fore_red:1;
-                   uint8_t fore_green:1;
-                   uint8_t fore_blue:1;
-               } __attribute__((packed));
-           } __attribute__((packed));
+           uint8_t fore:3;   //foreground color
            uint8_t foreground_intensity_or_font_select:1; // depends on char map select reg
            // character map selection is effected
            // when memory_mode.extended meomory=1
            // and the two character map enteries on character_map_select are 
            // different
-           union {
-               uint8_t back:3;
-               struct { 
-                   uint8_t back_red:1;
-                   uint8_t back_green:1;
-                   uint8_t back_blue:1;
-               } __attribute__((packed));
-           } __attribute__((packed));
+           uint8_t back:3;   //background color
            uint8_t blinking_or_bg_intensity:1; 
            // attribute mode control.enableblink = 1 => blink
            // =0 => intensity (16 colors of bg)
diff --git a/palacios/src/devices/vnet_nic.c b/palacios/src/devices/vnet_nic.c

index 0fdaaba..05117e5 100644 (file)
--- a/palacios/src/devices/vnet_nic.c
+++ b/palacios/src/devices/vnet_nic.c
@@ -42,7 +42,7 @@ struct vnet_nic_state {
 
 /* called by frontend, send pkt to VNET */
 static int vnet_nic_send(uint8_t * buf, uint32_t len, 
-                        void * private_data) {
+                        int synchronize, void * private_data) {
     struct vnet_nic_state * vnetnic = (struct vnet_nic_state *)private_data;
 
     struct v3_vnet_pkt pkt;
@@ -52,15 +52,13 @@ static int vnet_nic_send(uint8_t * buf, uint32_t len,
     memcpy(pkt.header, buf, ETHERNET_HEADER_LEN);
     pkt.data = buf;
 
-#ifdef CONFIG_DEBUG_VNET_NIC
-    {
-       PrintDebug("VNET-NIC: send pkt (size: %d, src_id: %d, src_type: %d)\n", 
+    V3_Net_Print(2, "VNET-NIC: send pkt (size: %d, src_id: %d, src_type: %d)\n", 
                   pkt.size, pkt.src_id, pkt.src_type);
-       v3_hexdump(buf, len, NULL, 0);
+    if(v3_net_debug >= 4){
+       v3_hexdump(buf, len, NULL, 0);
     }
-#endif
 
-    return v3_vnet_send_pkt(&pkt, NULL);;
+    return v3_vnet_send_pkt(&pkt, NULL, synchronize);
 }
 
 
@@ -70,22 +68,13 @@ static int virtio_input(struct v3_vm_info * info,
                        void * private_data){
     struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data;
 
-    PrintDebug("VNET-NIC: receive pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n", 
+    V3_Net_Print(2, "VNET-NIC: receive pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n", 
                pkt->size, pkt->src_id, pkt->src_type, pkt->dst_id, pkt->dst_type);
        
     return vnetnic->net_ops.recv(pkt->data, pkt->size,
                                 vnetnic->net_ops.frontend_data);
 }
 
-/* poll data from front-end */
-static void virtio_poll(struct v3_vm_info * info, 
-                       int budget,
-                       void * private_data){
-    struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data;
-
-    vnetnic->net_ops.poll(info, budget, vnetnic->net_ops.frontend_data);
-}
-
 
 static int vnet_nic_free(struct vnet_nic_state * vnetnic) {
 
@@ -102,7 +91,6 @@ static struct v3_device_ops dev_ops = {
 
 static struct v3_vnet_dev_ops vnet_dev_ops = {
     .input = virtio_input,
-    .poll = virtio_poll,
 };
 
 
diff --git a/palacios/src/extensions/Kconfig b/palacios/src/extensions/Kconfig

new file mode 100644 (file)

index 0000000..c7b7d69
--- /dev/null
+++ b/palacios/src/extensions/Kconfig
@@ -0,0 +1,41 @@
+menu "Extensions"
+
+config EXT_VTIME
+       bool "Enable Time virtualization"
+       default n
+       help
+          Enables the timer virtualization extensions. These hide the cost of 
+          running inside the VMM context.  This can aid the consistency of
+           time between multiple timers, but can cause the guest to run 
+           a good bit slower than the host in VM-intensive parts of the code.
+ 
+
+config EXT_VTSC
+       bool "Fully virtualize guest TSC"
+       default n
+       depends on EXT_VTIME
+       help
+           Virtualize the processor time stamp counter in the guest, 
+           generally increasing consistency between various time sources 
+           but also potentially making guest time run slower than real time.
+        
+config EXT_MTRRS
+       bool "Support virtualized MTTRs"
+       default n
+       help
+          Provides a virtualized set of MTTR registers
+
+config EXT_MACH_CHECK
+       bool "Support Machine Check functionality"
+       default n
+       help 
+          Provides a virtualized machine check architecture
+
+
+config EXT_INSPECTOR
+       bool "VM Inspector"
+       default n
+       help
+         Provides the inspection extension
+
+endmenu
diff --git a/palacios/src/extensions/Makefile b/palacios/src/extensions/Makefile

new file mode 100644 (file)

index 0000000..ac19202
--- /dev/null
+++ b/palacios/src/extensions/Makefile
@@ -0,0 +1,5 @@
+obj-y += null.o
+obj-$(CONFIG_EXT_MTRRS) += ext_mtrr.o
+obj-$(CONFIG_EXT_VTSC) += ext_vtsc.o
+obj-$(CONFIG_EXT_VTIME) += ext_vtime.o
+obj-$(CONFIG_EXT_INSPECTOR) += ext_inspector.o
diff --git a/palacios/src/palacios/vmm_inspector.c b/palacios/src/extensions/ext_inspector.c

similarity index 81%

rename from palacios/src/palacios/vmm_inspector.c

rename to palacios/src/extensions/ext_inspector.c

index 2ff611b..7b89a91 100644 (file)
--- a/palacios/src/palacios/vmm_inspector.c
+++ b/palacios/src/extensions/ext_inspector.c
@@ -18,29 +18,39 @@
  */
 
 
-#include <palacios/vmm_inspector.h>
+//#include <palacios/vmm_inspector.h>
 #include <palacios/vmm.h>
 #include <palacios/vm_guest.h>
 #include <palacios/vmm_sprintf.h>
+#include <palacios/vmm_extensions.h>
+
+#include <palacios/vmm_multitree.h>
+#include <interfaces/inspector.h>
 
 // Note that v3_inspect_node_t is actuall a struct v3_mtree
 // Its set as void for opaque portability
 
+struct v3_inspector_state {
+    struct v3_mtree state_tree;
+
+};
 
-int v3_init_inspector(struct v3_vm_info * vm) {
-    struct v3_inspector_state * state = (struct v3_inspector_state *)&(vm->inspector);
 
+static int init_inspector(struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data) {
+    struct v3_inspector_state * state = V3_Malloc(sizeof(struct v3_inspector_state));
     memset(state, 0, sizeof(struct v3_inspector_state));
 
     strncpy(state->state_tree.name, "vm->name", 50);
     state->state_tree.subtree = 1;
 
+    *priv_data = state;
+
     return 0;
 }
 
 
-int  v3_init_inspector_core(struct guest_info * core) {
-    struct v3_inspector_state * vm_state = &(core->vm_info->inspector);
+static int init_inspector_core(struct guest_info * core, void * priv_data) {
+    struct v3_inspector_state * vm_state = priv_data;
     char core_name[50];
 
     snprintf(core_name, 50, "core.%d", core->cpu_id);
@@ -72,7 +82,7 @@ int  v3_init_inspector_core(struct guest_info * core) {
        v3_inspect_64(cr_node, "EFER", (uint64_t *)&(core->ctrl_regs.efer));    
 
 
-       //      struct v3_mtree * seg_node = v3_mtree_create_subtree(core_node, "SEGMENTS");
+       //struct v3_mtree * seg_node = v3_mtree_create_subtree(core_node, "SEGMENTS");
        
 
 
@@ -82,6 +92,23 @@ int  v3_init_inspector_core(struct guest_info * core) {
 }
 
 
+
+
+
+static struct v3_extension_impl inspector_impl = {
+    .name = "inspector",
+    .init = init_inspector,
+    .deinit = NULL,
+    .core_init = init_inspector_core,
+    .core_deinit = NULL,
+    .on_entry = NULL,
+    .on_exit = NULL
+};
+
+
+register_extension(&inspector_impl);
+
+
 v3_inspect_node_t * v3_inspect_add_subtree(v3_inspect_node_t * root, char * name) {
     return v3_mtree_create_subtree(root, name);
 }
@@ -122,8 +149,6 @@ int v3_inspect_buf(v3_inspect_node_t * node, char * name,
 
 
 
-
-
 int v3_find_inspection_value(v3_inspect_node_t * node, char * name, 
                           struct v3_inspection_value * value) {
     struct v3_mtree * mt_node = v3_mtree_find_node(node, name);
@@ -152,7 +177,13 @@ struct v3_inspection_value v3_inspection_value(v3_inspect_node_t * node) {
 
 
 v3_inspect_node_t * v3_get_inspection_root(struct v3_vm_info * vm) {
-    return &(vm->inspector.state_tree);
+    struct v3_inspector_state * inspector = v3_get_extension_state(vm, inspector_impl.name);
+
+    if (inspector == NULL) {
+       return NULL;
+    }
+
+    return &(inspector->state_tree);
 }
 
 v3_inspect_node_t * v3_get_inspection_subtree(v3_inspect_node_t * root, char * name) {
@@ -167,3 +198,7 @@ v3_inspect_node_t * v3_inspection_node_next(v3_inspect_node_t * node) {
 v3_inspect_node_t * v3_inspection_first_child(v3_inspect_node_t * root) {
     return v3_mtree_first_child(root);
 }
+
+
+
+
diff --git a/palacios/src/extensions/ext_vtime.c b/palacios/src/extensions/ext_vtime.c

new file mode 100644 (file)

index 0000000..665d6be
--- /dev/null
+++ b/palacios/src/extensions/ext_vtime.c
@@ -0,0 +1,169 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *         Patrick G. Bridges <bridges@cs.unm.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+#include <palacios/vmm.h>
+#include <palacios/vmm_time.h>
+#include <palacios/vm_guest.h>
+
+
+
+
+/* Overview 
+ *
+ * Time handling in VMMs is challenging, and Palacios uses the highest 
+ * resolution, lowest overhead timer on modern CPUs that it can - the 
+ * processor timestamp counter (TSC). Note that on somewhat old processors
+ * this can be problematic; in particular, older AMD processors did not 
+ * have a constant rate timestamp counter in the face of power management
+ * events. However, the latest Intel and AMD CPUs all do (should...) have a 
+ * constant rate TSC, and Palacios relies on this fact.
+ * 
+ * Basically, Palacios keeps track of three quantities as it runs to manage
+ * the passage of time:
+ * (1) The host timestamp counter - read directly from HW and never written
+ * (2) A monotonic guest timestamp counter used to measure the progression of
+ *     time in the guest. This is computed using an offsets from (1) above.
+ * (3) The actual guest timestamp counter (which can be written by
+ *     writing to the guest TSC MSR - MSR 0x10) from the monotonic guest TSC.
+ *     This is also computed as an offset from (2) above when the TSC and
+ *     this offset is updated when the TSC MSR is written.
+ *
+ * The value used to offset the guest TSC from the host TSC is the *sum* of all
+ * of these offsets (2 and 3) above
+ * 
+ * Because all other devices are slaved off of the passage of time in the guest,
+ * it is (2) above that drives the firing of other timers in the guest, 
+ * including timer devices such as the Programmable Interrupt Timer (PIT).
+ *
+ * Future additions:
+ * (1) Add support for temporarily skewing guest time off of where it should
+ *     be to support slack simulation of guests. The idea is that simulators
+ *     set this skew to be the difference between how much time passed for a 
+ *     simulated feature and a real implementation of that feature, making 
+ *     pass at a different rate from real time on this core. The VMM will then
+ *     attempt to move this skew back towards 0 subject to resolution/accuracy
+ *     constraints from various system timers.
+ *   
+ *     The main effort in doing this will be to get accuracy/resolution 
+ *     information from each local timer and to use this to bound how much skew
+ *     is removed on each exit.
+ */
+
+
+
+struct vtime_state {
+    uint32_t guest_cpu_freq;   // can be lower than host CPU freq!
+    uint64_t initial_time;     // Time when VMM started. 
+    sint64_t guest_host_offset;// Offset of monotonic guest time from host time
+};
+
+
+
+
+static int offset_time( struct guest_info * info, sint64_t offset )
+{
+    struct vm_time * time_state = &(info->time_state);
+//    PrintDebug("Adding additional offset of %lld to guest time.\n", offset);
+    time_state->guest_host_offset += offset;
+    return 0;
+}
+
+
+// Control guest time in relation to host time so that the two stay 
+// appropriately synchronized to the extent possible. 
+int v3_adjust_time(struct guest_info * info) {
+    struct vm_time * time_state = &(info->time_state);
+    uint64_t host_time, target_host_time;
+    uint64_t guest_time, target_guest_time, old_guest_time;
+    uint64_t guest_elapsed, host_elapsed, desired_elapsed;
+
+    /* Compute the target host time given how much time has *already*
+     * passed in the guest */
+    guest_time = v3_get_guest_time(time_state);
+    guest_elapsed = (guest_time - time_state->initial_time);
+    desired_elapsed = (guest_elapsed * time_state->host_cpu_freq) / time_state->guest_cpu_freq;
+    target_host_time = time_state->initial_time + desired_elapsed;
+
+    /* Now, let the host run while the guest is stopped to make the two
+     * sync up. */
+    host_time = v3_get_host_time(time_state);
+    old_guest_time = v3_get_guest_time(time_state);
+
+    while (target_host_time > host_time) {
+       v3_yield(info);
+       host_time = v3_get_host_time(time_state);
+    }
+
+    guest_time = v3_get_guest_time(time_state);
+
+    // We do *not* assume the guest timer was paused in the VM. If it was
+    // this offseting is 0. If it wasn't we need this.
+   offset_time(info, (sint64_t)old_guest_time - (sint64_t)guest_time);
+
+    /* Now the host may have gotten ahead of the guest because
+     * yielding is a coarse grained thing. Figure out what guest time
+     * we want to be at, and use the use the offsetting mechanism in 
+     * the VMM to make the guest run forward. We limit *how* much we skew 
+     * it forward to prevent the guest time making large jumps, 
+     * however. */
+    host_elapsed = host_time - time_state->initial_time;
+    desired_elapsed = (host_elapsed * time_state->guest_cpu_freq) / time_state->host_cpu_freq;
+    target_guest_time = time_state->initial_time + desired_elapsed;
+
+    if (guest_time < target_guest_time) {
+       uint64_t max_skew, desired_skew, skew;
+
+       if (time_state->enter_time) {
+           max_skew = (time_state->exit_time - time_state->enter_time) / 10;
+       } else {
+           max_skew = 0;
+       }
+
+       desired_skew = target_guest_time - guest_time;
+       skew = desired_skew > max_skew ? max_skew : desired_skew;
+/*     PrintDebug("Guest %llu cycles behind where it should be.\n",
+                  desired_skew);
+       PrintDebug("Limit on forward skew is %llu. Skewing forward %llu.\n",
+                  max_skew, skew); */
+       
+       offset_time(info, skew);
+    }
+    
+    return 0;
+}
+
+
+static int init() {
+    khz = v3_cfg_val(cfg_tree, "khz");
+
+    if (khz) {
+       time_state->guest_cpu_freq = atoi(khz);
+       PrintDebug("Core %d CPU frequency requested at %d khz.\n", 
+                  info->cpu_id, time_state->guest_cpu_freq);
+    } 
+    
+    if ( (khz == NULL) || 
+        (time_state->guest_cpu_freq <= 0)  || 
+        (time_state->guest_cpu_freq > time_state->host_cpu_freq) ) {
+
+       time_state->guest_cpu_freq = time_state->host_cpu_freq;
+    }
+
+
+}
diff --git a/palacios/src/extensions/ext_vtsc.c b/palacios/src/extensions/ext_vtsc.c

new file mode 100644 (file)

index 0000000..96e7ce3
--- /dev/null
+++ b/palacios/src/extensions/ext_vtsc.c
@@ -0,0 +1,195 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *         Patrick G. Bridges <bridges@cs.unm.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_time.h>
+#include <palacios/vm_guest.h>
+
+
+// Functions for handling exits on the TSC when fully virtualizing 
+// the timestamp counter.
+#define TSC_MSR     0x10
+#define TSC_AUX_MSR 0xC0000103
+
+int v3_handle_rdtscp(struct guest_info *info);
+int v3_handle_rdtsc(struct guest_info *info);
+
+
+struct vtsc_state {
+
+    struct v3_msr tsc_aux;     // Auxilliary MSR for RDTSCP
+
+};
+
+
+
+/* 
+ * Handle full virtualization of the time stamp counter.  As noted
+ * above, we don't store the actual value of the TSC, only the guest's
+ * offset from monotonic guest's time. If the guest writes to the TSC, we
+ * handle this by changing that offset.
+ *
+ * Possible TODO: Proper hooking of TSC read/writes?
+ */ 
+
+static int rdtsc(struct guest_info * info) {
+    uint64_t tscval = v3_get_guest_tsc(&info->time_state);
+
+    info->vm_regs.rdx = tscval >> 32;
+    info->vm_regs.rax = tscval & 0xffffffffLL;
+
+    return 0;
+}
+
+int v3_handle_rdtsc(struct guest_info * info) {
+    rdtsc(info);
+    
+    info->vm_regs.rax &= 0x00000000ffffffffLL;
+    info->vm_regs.rdx &= 0x00000000ffffffffLL;
+
+    info->rip += 2;
+    
+    return 0;
+}
+
+int v3_rdtscp(struct guest_info * info) {
+    int ret;
+    /* First get the MSR value that we need. It's safe to futz with
+     * ra/c/dx here since they're modified by this instruction anyway. */
+    info->vm_regs.rcx = TSC_AUX_MSR; 
+    ret = v3_handle_msr_read(info);
+
+    if (ret != 0) {
+       return ret;
+    }
+
+    info->vm_regs.rcx = info->vm_regs.rax;
+
+    /* Now do the TSC half of the instruction */
+    ret = v3_rdtsc(info);
+
+    if (ret != 0) {
+       return ret;
+    }
+
+    return 0;
+}
+
+
+int v3_handle_rdtscp(struct guest_info * info) {
+  PrintDebug("Handling virtual RDTSCP call.\n");
+
+    v3_rdtscp(info);
+
+    info->vm_regs.rax &= 0x00000000ffffffffLL;
+    info->vm_regs.rcx &= 0x00000000ffffffffLL;
+    info->vm_regs.rdx &= 0x00000000ffffffffLL;
+
+    info->rip += 3;
+    
+    return 0;
+}
+
+
+
+
+static int tsc_aux_msr_read_hook(struct guest_info *info, uint_t msr_num, 
+                                struct v3_msr *msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+
+    V3_ASSERT(msr_num == TSC_AUX_MSR);
+
+    msr_val->lo = time_state->tsc_aux.lo;
+    msr_val->hi = time_state->tsc_aux.hi;
+
+    return 0;
+}
+
+
+static int tsc_aux_msr_write_hook(struct guest_info *info, uint_t msr_num, 
+                             struct v3_msr msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+
+    V3_ASSERT(msr_num == TSC_AUX_MSR);
+
+    time_state->tsc_aux.lo = msr_val.lo;
+    time_state->tsc_aux.hi = msr_val.hi;
+
+    return 0;
+}
+
+
+static int tsc_msr_read_hook(struct guest_info *info, uint_t msr_num,
+                            struct v3_msr *msr_val, void *priv) {
+    uint64_t time = v3_get_guest_tsc(&info->time_state);
+
+    V3_ASSERT(msr_num == TSC_MSR);
+
+    msr_val->hi = time >> 32;
+    msr_val->lo = time & 0xffffffffLL;
+    
+    return 0;
+}
+
+
+static int tsc_msr_write_hook(struct guest_info *info, uint_t msr_num,
+                            struct v3_msr msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+    uint64_t guest_time, new_tsc;
+
+    V3_ASSERT(msr_num == TSC_MSR);
+
+    new_tsc = (((uint64_t)msr_val.hi) << 32) | (uint64_t)msr_val.lo;
+    guest_time = v3_get_guest_time(time_state);
+    time_state->tsc_guest_offset = (sint64_t)new_tsc - (sint64_t)guest_time; 
+
+    return 0;
+}
+
+
+static int deinit() {
+    v3_unhook_msr(vm, TSC_MSR);
+    v3_unhook_msr(vm, TSC_AUX_MSR);
+}
+
+
+static int init() {
+
+    time_state->tsc_aux.lo = 0;
+    time_state->tsc_aux.hi = 0;
+
+
+
+    PrintDebug("Installing TSC MSR hook.\n");
+    ret = v3_hook_msr(vm, TSC_MSR, 
+                     tsc_msr_read_hook, tsc_msr_write_hook, NULL);
+
+    if (ret != 0) {
+       return ret;
+    }
+
+    PrintDebug("Installing TSC_AUX MSR hook.\n");
+    ret = v3_hook_msr(vm, TSC_AUX_MSR, tsc_aux_msr_read_hook, 
+                     tsc_aux_msr_write_hook, NULL);
+
+    if (ret != 0) {
+       return ret;
+    }
+}
diff --git a/palacios/src/extensions/null.c b/palacios/src/extensions/null.c

new file mode 100644 (file)

index 0000000..1828c44
--- /dev/null
+++ b/palacios/src/extensions/null.c
@@ -0,0 +1,6 @@
+/** \file
+ * Do nothing module.
+ *
+ * This file only exists to appease the kbuild gods.
+ */
+
diff --git a/palacios/src/interfaces/vmm_host_dev.c b/palacios/src/interfaces/vmm_host_dev.c

index 064d17f..0002cb2 100644 (file)
--- a/palacios/src/interfaces/vmm_host_dev.c
+++ b/palacios/src/interfaces/vmm_host_dev.c
@@ -29,12 +29,13 @@ struct v3_host_dev_hooks * host_dev_hooks = 0;
 
 v3_host_dev_t v3_host_dev_open(char *impl,
                               v3_bus_class_t bus,
-                              v3_guest_dev_t gdev)
+                              v3_guest_dev_t gdev,
+                              struct v3_vm_info *vm)
 {                                             
     V3_ASSERT(host_dev_hooks != NULL);
     V3_ASSERT(host_dev_hooks->open != NULL);
 
-    return host_dev_hooks->open(impl,bus,gdev);
+    return host_dev_hooks->open(impl,bus,gdev,vm->host_priv_data);
 }
 
 int v3_host_dev_close(v3_host_dev_t hdev) 
@@ -75,7 +76,7 @@ uint64_t v3_host_dev_read_mem(v3_host_dev_t hdev,
     V3_ASSERT(host_dev_hooks != NULL);
     V3_ASSERT(host_dev_hooks->read_mem != NULL);
     
-    return host_dev_hooks->read_mem(hdev,gpa,dst,len);
+    return host_dev_hooks->read_mem(hdev,(void*)gpa,dst,len);
 }
 
 uint64_t v3_host_dev_write_mem(v3_host_dev_t hdev,
@@ -86,7 +87,7 @@ uint64_t v3_host_dev_write_mem(v3_host_dev_t hdev,
     V3_ASSERT(host_dev_hooks != NULL);
     V3_ASSERT(host_dev_hooks->write_mem != NULL);
     
-    return host_dev_hooks->write_mem(hdev,gpa,src,len);
+    return host_dev_hooks->write_mem(hdev,(void*)gpa,src,len);
 }
 
 uint64_t v3_host_dev_read_config(v3_host_dev_t hdev,
@@ -140,7 +141,7 @@ int v3_host_dev_raise_irq(v3_host_dev_t hostdev,
 
 uint64_t v3_host_dev_read_guest_mem(v3_host_dev_t  hostdev,
                                    v3_guest_dev_t guest_dev,
-                                   addr_t         gpa,
+                                   void *         gpa,
                                    void           *dst,
                                    uint64_t       len)
 {
@@ -154,14 +155,14 @@ uint64_t v3_host_dev_read_guest_mem(v3_host_dev_t  hostdev,
        if (!vm) { 
            return 0;
        } else {
-           return v3_read_gpa_memory(&(vm->cores[0]), gpa, len, dst);
+           return v3_read_gpa_memory(&(vm->cores[0]), (addr_t)gpa, len, dst);
        }
     }
 }
 
 uint64_t v3_host_dev_write_guest_mem(v3_host_dev_t  hostdev,
                                     v3_guest_dev_t guest_dev,
-                                    addr_t         gpa,
+                                    void *         gpa,
                                     void           *src,
                                     uint64_t       len)
 {
@@ -175,7 +176,7 @@ uint64_t v3_host_dev_write_guest_mem(v3_host_dev_t  hostdev,
        if (!vm) { 
            return 0;
        } else {
-           return v3_write_gpa_memory(&(vm->cores[0]), gpa, len, src);
+           return v3_write_gpa_memory(&(vm->cores[0]), (addr_t)gpa, len, src);
        }
     }
 }
diff --git a/palacios/src/palacios/Makefile b/palacios/src/palacios/Makefile

index c505b60..6a24b89 100644 (file)
--- a/palacios/src/palacios/Makefile
+++ b/palacios/src/palacios/Makefile
@@ -31,7 +31,6 @@ obj-y := \
        vmm_binaries.o \
        vmm_cpuid.o \
        vmm_xml.o \
-       vmm_muxer.o \
        vmm_mem_hook.o \
        vmm_mptable.o \
        vmm_extensions.o \
@@ -39,7 +38,6 @@ obj-y := \
        vmm_multitree.o \
 
 
-obj-$(CONFIG_INSPECTOR) += vmm_inspector.o
 
 
 obj-$(CONFIG_XED) +=   vmm_xed.o
@@ -59,6 +57,7 @@ obj-$(CONFIG_VMX) +=          vmx.o \
                        vmx_io.o \
                        vmx_lowlevel.o \
                        vmx_msr.o \
+                       vmx_hw_info.o \
                        vmcs.o \
                        vmx_ctrl_regs.o \
                        vmx_assist.o
diff --git a/palacios/src/palacios/vm_guest.c b/palacios/src/palacios/vm_guest.c

index b8cc549..4ab0134 100644 (file)
--- a/palacios/src/palacios/vm_guest.c
+++ b/palacios/src/palacios/vm_guest.c
@@ -28,7 +28,6 @@
 #include <palacios/vm_guest_mem.h>
 #include <palacios/vmm_lowlevel.h>
 #include <palacios/vmm_sprintf.h>
-#include <palacios/vmm_muxer.h>
 #include <palacios/vmm_xed.h>
 #include <palacios/vmm_direct_paging.h>
 
@@ -526,13 +525,7 @@ static int info_hcall(struct guest_info * core, uint_t hcall_id, void * priv_dat
 int v3_init_vm(struct v3_vm_info * vm) {
     v3_cpu_arch_t cpu_type = v3_get_cpu_type(V3_Get_CPU());
 
-    if (v3_get_foreground_vm() == NULL) {
-       v3_set_foreground_vm(vm);
-    }
 
-#ifdef CONFIG_INSPECTOR
-    v3_init_inspector(vm);
-#endif
 
 #ifdef CONFIG_TELEMETRY
     v3_init_telemetry(vm);
@@ -661,9 +654,7 @@ int v3_init_core(struct guest_info * core) {
     v3_cpu_arch_t cpu_type = v3_get_cpu_type(V3_Get_CPU());
     struct v3_vm_info * vm = core->vm_info;
 
-#ifdef CONFIG_INSPECTOR
-    v3_init_inspector_core(core);
-#endif
+
 
     /*
      * Initialize the subsystem data strutures
diff --git a/palacios/src/palacios/vm_guest_mem.c b/palacios/src/palacios/vm_guest_mem.c

index bf4c30e..ca6c601 100644 (file)
--- a/palacios/src/palacios/vm_guest_mem.c
+++ b/palacios/src/palacios/vm_guest_mem.c
@@ -73,9 +73,9 @@ int v3_gpa_to_hpa(struct guest_info * info, addr_t gpa, addr_t * hpa) {
     }
     
     if (reg->flags.alloced == 0) {
-       PrintError("In GPA->HPA: Tried to translate physical address of non allocated page (addr=%p)\n", 
-                  (void *)gpa);
-       v3_print_mem_map(info->vm_info);
+       //PrintError("In GPA->HPA: Tried to translate physical address of non allocated page (addr=%p)\n", 
+       //         (void *)gpa);
+    //v3_print_mem_map(info->vm_info);
        return -1;
     }
        
@@ -133,8 +133,8 @@ int v3_gpa_to_hva(struct guest_info * guest_info, addr_t gpa, addr_t * hva) {
     *hva = 0;
 
     if (v3_gpa_to_hpa(guest_info, gpa, &hpa) != 0) {
-       PrintError("In GPA->HVA: Invalid GPA(%p)->HPA lookup\n", 
-                  (void *)gpa);
+       //      PrintError("In GPA->HVA: Invalid GPA(%p)->HPA lookup\n", 
+       //         (void *)gpa);
        return -1;
     }
   
diff --git a/palacios/src/palacios/vmcs.c b/palacios/src/palacios/vmcs.c

index 0b874fd..f8fe322 100644 (file)
--- a/palacios/src/palacios/vmcs.c
+++ b/palacios/src/palacios/vmcs.c
@@ -624,6 +624,7 @@ static void print_exec_ctrls() {
 #ifdef __V3_32BIT__
     print_vmcs_field(VMCS_IO_BITMAP_A_ADDR_HIGH);
 #endif
+
     print_vmcs_field(VMCS_IO_BITMAP_B_ADDR);
 #ifdef __V3_32BIT__
     print_vmcs_field(VMCS_IO_BITMAP_B_ADDR_HIGH);
@@ -762,162 +763,29 @@ void v3_print_vmcs() {
 
 /*
  * Returns the field length in bytes
+ *   It doesn't get much uglier than this... Thanks Intel
  */
 int v3_vmcs_get_field_len(vmcs_field_t field) {
-    switch(field)  {
-       /* 16 bit Control Fields */
-        case VMCS_GUEST_ES_SELECTOR:
-        case VMCS_GUEST_CS_SELECTOR:
-        case VMCS_GUEST_SS_SELECTOR:
-        case VMCS_GUEST_DS_SELECTOR:
-        case VMCS_GUEST_FS_SELECTOR:
-        case VMCS_GUEST_GS_SELECTOR:
-        case VMCS_GUEST_LDTR_SELECTOR:
-        case VMCS_GUEST_TR_SELECTOR:
-        case VMCS_HOST_ES_SELECTOR:
-        case VMCS_HOST_CS_SELECTOR:
-        case VMCS_HOST_SS_SELECTOR:
-        case VMCS_HOST_DS_SELECTOR:
-        case VMCS_HOST_FS_SELECTOR:
-        case VMCS_HOST_GS_SELECTOR:
-        case VMCS_HOST_TR_SELECTOR:
-            return 2;
-
-       /* 32 bit Control Fields */
-        case VMCS_PIN_CTRLS:
-        case VMCS_PROC_CTRLS:
-       case VMCS_SEC_PROC_CTRLS:
-        case VMCS_EXCP_BITMAP:
-        case VMCS_PG_FAULT_ERR_MASK:
-        case VMCS_PG_FAULT_ERR_MATCH:
-        case VMCS_CR3_TGT_CNT:
-        case VMCS_EXIT_CTRLS:
-        case VMCS_EXIT_MSR_STORE_CNT:
-        case VMCS_EXIT_MSR_LOAD_CNT:
-        case VMCS_ENTRY_CTRLS:
-        case VMCS_ENTRY_MSR_LOAD_CNT:
-        case VMCS_ENTRY_INT_INFO:
-        case VMCS_ENTRY_EXCP_ERR:
-        case VMCS_ENTRY_INSTR_LEN:
-        case VMCS_TPR_THRESHOLD:
-        case VMCS_INSTR_ERR:
-        case VMCS_EXIT_REASON:
-        case VMCS_EXIT_INT_INFO:
-        case VMCS_EXIT_INT_ERR:
-        case VMCS_IDT_VECTOR_INFO:
-        case VMCS_IDT_VECTOR_ERR:
-        case VMCS_EXIT_INSTR_LEN:
-        case VMCS_EXIT_INSTR_INFO:
-        case VMCS_GUEST_ES_LIMIT:
-        case VMCS_GUEST_CS_LIMIT:
-        case VMCS_GUEST_SS_LIMIT:
-        case VMCS_GUEST_DS_LIMIT:
-        case VMCS_GUEST_FS_LIMIT:
-        case VMCS_GUEST_GS_LIMIT:
-        case VMCS_GUEST_LDTR_LIMIT:
-        case VMCS_GUEST_TR_LIMIT:
-        case VMCS_GUEST_GDTR_LIMIT:
-        case VMCS_GUEST_IDTR_LIMIT:
-        case VMCS_GUEST_ES_ACCESS:
-        case VMCS_GUEST_CS_ACCESS:
-        case VMCS_GUEST_SS_ACCESS:
-        case VMCS_GUEST_DS_ACCESS:
-        case VMCS_GUEST_FS_ACCESS:
-        case VMCS_GUEST_GS_ACCESS:
-        case VMCS_GUEST_LDTR_ACCESS:
-        case VMCS_GUEST_TR_ACCESS:
-        case VMCS_GUEST_INT_STATE:
-        case VMCS_GUEST_ACTIVITY_STATE:
-        case VMCS_GUEST_SMBASE:
-        case VMCS_GUEST_SYSENTER_CS:
-        case VMCS_HOST_SYSENTER_CS:
-            return 4;
+    struct vmcs_field_encoding * enc = (struct vmcs_field_encoding *)&field;
 
-
-       /* high bits of variable width fields
-        * We can probably just delete most of these....
-        */
-        case VMCS_IO_BITMAP_A_ADDR_HIGH:
-        case VMCS_IO_BITMAP_B_ADDR_HIGH:
-        case VMCS_MSR_BITMAP_HIGH:
-        case VMCS_EXIT_MSR_STORE_ADDR_HIGH:
-        case VMCS_EXIT_MSR_LOAD_ADDR_HIGH:
-        case VMCS_ENTRY_MSR_LOAD_ADDR_HIGH:
-        case VMCS_EXEC_PTR_HIGH:
-        case VMCS_TSC_OFFSET_HIGH:
-        case VMCS_VAPIC_ADDR_HIGH:
-       case VMCS_APIC_ACCESS_ADDR_HIGH:
-        case VMCS_LINK_PTR_HIGH:
-        case VMCS_GUEST_DBG_CTL_HIGH:
-        case VMCS_GUEST_PERF_GLOBAL_CTRL_HIGH:
-       case VMCS_HOST_PERF_GLOBAL_CTRL_HIGH:
-       case VMCS_GUEST_EFER_HIGH:
+    switch (enc->width)  {
+       case 0:
+            return 2;
+       case 1: {
+           if (enc->access_type == 1) {
+               return 4;
+           } else {
+#ifdef __V3_64BIT__
+               return 8;
+#else
+               return 4;
+#endif
+           }
+       }
+       case 2:
             return 4;
-
-            /* Natural Width Control Fields */
-        case VMCS_IO_BITMAP_A_ADDR:
-        case VMCS_IO_BITMAP_B_ADDR:
-        case VMCS_MSR_BITMAP:
-        case VMCS_EXIT_MSR_STORE_ADDR:
-        case VMCS_EXIT_MSR_LOAD_ADDR:
-        case VMCS_ENTRY_MSR_LOAD_ADDR:
-        case VMCS_EXEC_PTR:
-        case VMCS_TSC_OFFSET:
-        case VMCS_VAPIC_ADDR:
-       case VMCS_APIC_ACCESS_ADDR:
-        case VMCS_LINK_PTR:
-        case VMCS_GUEST_DBG_CTL:
-        case VMCS_GUEST_PERF_GLOBAL_CTRL:
-       case VMCS_HOST_PERF_GLOBAL_CTRL:
-        case VMCS_CR0_MASK:
-        case VMCS_CR4_MASK:
-        case VMCS_CR0_READ_SHDW:
-        case VMCS_CR4_READ_SHDW:
-        case VMCS_CR3_TGT_VAL_0:
-        case VMCS_CR3_TGT_VAL_1:
-        case VMCS_CR3_TGT_VAL_2:
-        case VMCS_CR3_TGT_VAL_3:
-        case VMCS_EXIT_QUAL:
-        case VMCS_IO_RCX:
-        case VMCS_IO_RSI:
-        case VMCS_IO_RDI:
-        case VMCS_IO_RIP:
-        case VMCS_GUEST_LINEAR_ADDR:
-        case VMCS_GUEST_CR0:
-        case VMCS_GUEST_CR3:
-        case VMCS_GUEST_CR4:
-        case VMCS_GUEST_ES_BASE:
-        case VMCS_GUEST_CS_BASE:
-        case VMCS_GUEST_SS_BASE:
-        case VMCS_GUEST_DS_BASE:
-        case VMCS_GUEST_FS_BASE:
-        case VMCS_GUEST_GS_BASE:
-        case VMCS_GUEST_LDTR_BASE:
-        case VMCS_GUEST_TR_BASE:
-        case VMCS_GUEST_GDTR_BASE:
-        case VMCS_GUEST_IDTR_BASE:
-        case VMCS_GUEST_DR7:
-        case VMCS_GUEST_RSP:
-        case VMCS_GUEST_RIP:
-        case VMCS_GUEST_RFLAGS:
-        case VMCS_GUEST_PENDING_DBG_EXCP:
-        case VMCS_GUEST_SYSENTER_ESP:
-        case VMCS_GUEST_SYSENTER_EIP:
-        case VMCS_HOST_CR0:
-        case VMCS_HOST_CR3:
-        case VMCS_HOST_CR4:
-        case VMCS_HOST_FS_BASE:
-        case VMCS_HOST_GS_BASE:
-        case VMCS_HOST_TR_BASE:
-        case VMCS_HOST_GDTR_BASE:
-        case VMCS_HOST_IDTR_BASE:
-        case VMCS_HOST_SYSENTER_ESP:
-        case VMCS_HOST_SYSENTER_EIP:
-        case VMCS_HOST_RSP:
-        case VMCS_HOST_RIP:
-       case VMCS_GUEST_EFER:
+       case 3:
             return sizeof(addr_t);
-
         default:
            PrintError("Invalid VMCS field: 0x%x\n", field);
             return -1;
diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c

index ea24ca3..cb38aab 100644 (file)
--- a/palacios/src/palacios/vmm.c
+++ b/palacios/src/palacios/vmm.c
@@ -183,14 +183,13 @@ v3_cpu_arch_t v3_get_cpu_type(int cpu_id) {
 struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
     struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
 
-    V3_Print("CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
-
-
     if (vm == NULL) {
        PrintError("Could not configure guest\n");
        return NULL;
     }
 
+    V3_Print("CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
+
     if (name == NULL) {
        name = "[V3_VM]";
     } else if (strlen(name) >= 128) {
@@ -374,8 +373,6 @@ int v3_stop_vm(struct v3_vm_info * vm) {
            break;
        }
 
-       V3_Print("Yielding\n");
-
        v3_yield(NULL);
     }
     
diff --git a/palacios/include/palacios/vmm_muxer.h b/palacios/src/palacios/vmm_barrier.c

similarity index 51%

rename from palacios/include/palacios/vmm_muxer.h

rename to palacios/src/palacios/vmm_barrier.c

index 1c50789..1115ce0 100644 (file)
--- a/palacios/include/palacios/vmm_muxer.h
+++ b/palacios/src/palacios/vmm_barrier.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * This file is part of the Palacios Virtual Machine Monitor developed
  * by the V3VEE Project with funding from the United States National 
  * Science Foundation and the Department of Energy.  
@@ -7,33 +7,15 @@
  * and the University of New Mexico.  You can find out more at 
  * http://www.v3vee.org
  *
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu> 
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
  * All rights reserved.
  *
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ * Author: Jack Lange <jacklangel@cs.pitt.edu>
  *
  * This is free software.  You are permitted to use,
  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  */
 
-#ifndef __VMM_MUXER_H__
-#define __VMM_MUXER_H__
 
-#ifdef __V3VEE__
-
-
-struct v3_vm_info;
-
-
-
-struct v3_vm_info * v3_get_foreground_vm();
-void v3_set_foreground_vm(struct v3_vm_info * vm);
-
-
-int v3_add_mux_notification(int (*focus_change)(struct v3_vm_info * old_vm, struct v3_vm_info * new_vm));
-
-
-#endif
-
-#endif
+#include <util/vmm_barrier.h>
diff --git a/palacios/src/palacios/vmm_config.c b/palacios/src/palacios/vmm_config.c

index b1d747e..25d8b23 100644 (file)
--- a/palacios/src/palacios/vmm_config.c
+++ b/palacios/src/palacios/vmm_config.c
@@ -410,6 +410,11 @@ static int post_config_core(struct guest_info * info, v3_cfg_tree_t * cfg) {
 
     info->core_run_state = CORE_STOPPED;
  
+    if (v3_init_core_extensions(info) == -1) {
+        PrintError("Error intializing extension core states\n");
+        return -1;
+    }
+
     if (info->vm_info->vm_class == V3_PC_VM) {
        if (post_config_pc_core(info, cfg) == -1) {
            PrintError("PC Post configuration failure\n");
@@ -552,6 +557,7 @@ int v3_free_config(struct v3_vm_info * vm) {
 
 
 
+
 static int setup_memory_map(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
     v3_cfg_tree_t * mem_region = v3_cfg_subtree(v3_cfg_subtree(cfg, "memmap"), "region");
 
diff --git a/palacios/src/palacios/vmm_config_class.h b/palacios/src/palacios/vmm_config_class.h

index 9fcf197..0c7ef4c 100644 (file)
--- a/palacios/src/palacios/vmm_config_class.h
+++ b/palacios/src/palacios/vmm_config_class.h
@@ -77,13 +77,21 @@ static int post_config_pc(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
     }
 
 
-    if (vm->num_cores > 1) {
+    if (vm->num_cores>1 && !v3_find_dev(vm,"apic")) { 
+       PrintError("palacios: VM has more than one core, but no device named \"apic\"!\n");
+       return -1;
+    } 
+    
+    if (v3_find_dev(vm,"apic")) { 
+       if (!v3_find_dev(vm,"ioapic")) { 
+           PrintError("palacios: VM cores have apics, but there is no device named \"ioapic\"!\n");
+       }
        if (v3_inject_mptable(vm) == -1) { 
            PrintError("Failed to inject mptable during configuration\n");
            return -1;
        }
     }
-
+    
     return 0;
 }
 
diff --git a/palacios/src/palacios/vmm_cpuid.c b/palacios/src/palacios/vmm_cpuid.c

index a093bf7..ce7c244 100644 (file)
--- a/palacios/src/palacios/vmm_cpuid.c
+++ b/palacios/src/palacios/vmm_cpuid.c
@@ -7,11 +7,10 @@
  * and the University of New Mexico.  You can find out more at 
  * http://www.v3vee.org
  *
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu> 
  * All rights reserved.
  *
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
  *
  * This is free software.  You are permitted to use,
  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
@@ -22,11 +21,33 @@
 #include <palacios/vmm_lowlevel.h>
 #include <palacios/vm_guest.h>
 
+struct masked_cpuid {
+    uint32_t rax_mask;
+    uint32_t rbx_mask;
+    uint32_t rcx_mask;
+    uint32_t rdx_mask;
+
+    uint32_t rax;
+    uint32_t rbx;
+    uint32_t rcx;
+    uint32_t rdx;
+};
+
 
 void v3_init_cpuid_map(struct v3_vm_info * vm) {
     vm->cpuid_map.map.rb_node = NULL;
+
+    // Setup default cpuid entries
+
+
+    // Disable XSAVE (cpuid 0x01, ECX bit 26)
+    v3_cpuid_add_fields(vm, 0x01, 0, 0, 0, 0, (1 << 26), 0, 0, 0);
+
 }
 
+
+
+
 int v3_deinit_cpuid_map(struct v3_vm_info * vm) {
     struct rb_node * node = v3_rb_first(&(vm->cpuid_map.map));
     struct v3_cpuid_hook * hook = NULL;
@@ -104,6 +125,105 @@ static struct v3_cpuid_hook * get_cpuid_hook(struct v3_vm_info * vm, uint32_t cp
 }
 
 
+
+static int mask_hook(struct guest_info * core, uint32_t cpuid, 
+             uint32_t * eax, uint32_t * ebx, 
+             uint32_t * ecx, uint32_t * edx,
+             void * priv_data) {
+    struct masked_cpuid * mask = (struct masked_cpuid *)priv_data;
+
+    v3_cpuid(cpuid, eax, ebx, ecx, edx);
+
+    *eax &= ~(mask->rax_mask);
+    *eax |= mask->rax;
+
+    *ebx &= ~(mask->rbx_mask);
+    *ebx |= mask->rbx;
+
+    *ecx &= ~(mask->rcx_mask);
+    *ecx |= mask->rcx;
+
+    *edx &= ~(mask->rdx_mask);
+    *edx |= mask->rdx;
+
+    return 0;
+}
+
+int v3_cpuid_add_fields(struct v3_vm_info * vm, uint32_t cpuid, 
+                       uint32_t rax_mask, uint32_t rax,
+                       uint32_t rbx_mask, uint32_t rbx, 
+                       uint32_t rcx_mask, uint32_t rcx, 
+                       uint32_t rdx_mask, uint32_t rdx) {
+    struct v3_cpuid_hook * hook = get_cpuid_hook(vm, cpuid);
+
+    if (hook == NULL) {
+       struct masked_cpuid * mask = V3_Malloc(sizeof(struct masked_cpuid));
+       memset(mask, 0, sizeof(struct masked_cpuid));
+       
+       mask->rax_mask = rax_mask;
+       mask->rax = rax;
+       mask->rbx_mask = rbx_mask;
+       mask->rbx = rbx;
+       mask->rcx_mask = rcx_mask;
+       mask->rcx = rcx;
+       mask->rdx_mask = rdx_mask;
+       mask->rdx = rdx;
+
+       if (v3_hook_cpuid(vm, cpuid, mask_hook, mask) == -1) {
+           PrintError("Error hooking cpuid %d\n", cpuid);
+           return -1;
+       }
+    } else {
+       struct masked_cpuid * mask = NULL;
+       uint32_t tmp_val = 0;
+
+       if (hook->hook_fn != mask_hook) {
+           PrintError("trying to add fields to a fully hooked cpuid (%d)\n", cpuid);
+           return -1;
+       }
+       
+       mask = (struct masked_cpuid *)(hook->private_data);
+
+       if ((mask->rax_mask & rax_mask) ||
+           (mask->rbx_mask & rbx_mask) || 
+           (mask->rcx_mask & rcx_mask) || 
+           (mask->rdx_mask & rdx_mask)) {
+           PrintError("Trying to add fields that have already been masked\n");
+           return -1;
+       }
+
+       if ((~rax_mask & rax) || (~rbx_mask & rbx) ||
+           (~rcx_mask & rcx) || (~rdx_mask & rdx)) {
+           PrintError("Invalid cpuid reg value (mask overrun)\n");
+           return -1;
+       }
+
+       mask->rax_mask |= rax_mask;
+       mask->rbx_mask |= rbx_mask;
+       mask->rcx_mask |= rcx_mask;
+       mask->rdx_mask |= rdx_mask;
+       
+       mask->rax |= rax;
+       tmp_val = (~rax_mask | rax);
+       mask->rax &= tmp_val;
+
+       mask->rbx |= rbx;
+       tmp_val = (~rbx_mask | rbx);
+       mask->rbx &= tmp_val;
+
+       mask->rcx |= rcx;
+       tmp_val = (~rcx_mask | rcx);
+       mask->rcx &= tmp_val;
+
+       mask->rdx |= rdx;
+       tmp_val = (~rdx_mask | rdx);
+       mask->rdx &= tmp_val;
+
+    }
+
+    return 0;
+}
+
 int v3_unhook_cpuid(struct v3_vm_info * vm, uint32_t cpuid) {
     struct v3_cpuid_hook * hook = get_cpuid_hook(vm, cpuid);
 
@@ -185,3 +305,8 @@ int v3_handle_cpuid(struct guest_info * info) {
 
     return 0;
 }
+
+
+
+
+
diff --git a/palacios/src/palacios/vmm_emulator.c b/palacios/src/palacios/vmm_emulator.c

index 15a56d6..c05e09d 100644 (file)
--- a/palacios/src/palacios/vmm_emulator.c
+++ b/palacios/src/palacios/vmm_emulator.c
@@ -314,7 +314,7 @@ static int run_str_op(struct guest_info * core, struct x86_instr * instr,
     struct rflags * flags_reg = (struct rflags *)&(core->ctrl_regs.rflags);
 
 
-    PrintError("Emulation_len=%d, tmp_rcx=%d\n", emulation_length, (uint_t)tmp_rcx);
+    PrintDebug("Emulation_len=%d, tmp_rcx=%d\n", emulation_length, (uint_t)tmp_rcx);
 
 
     if (instr->op_type == V3_OP_MOVS) {
diff --git a/palacios/src/palacios/vmm_extensions.c b/palacios/src/palacios/vmm_extensions.c

index de1cfa0..1db9dc6 100644 (file)
--- a/palacios/src/palacios/vmm_extensions.c
+++ b/palacios/src/palacios/vmm_extensions.c
@@ -69,6 +69,8 @@ int V3_init_extensions() {
 }
 
 
+
+
 int V3_deinit_extensions() {
     v3_free_htable(ext_table, 0, 0);
     return 0;
@@ -85,6 +87,15 @@ int v3_init_ext_manager(struct v3_vm_info * vm) {
     return 0;
 }
 
+
+int v3_deinit_ext_manager(struct v3_vm_info * vm)  {
+
+       PrintError("I should really do something here... \n");
+       return -1;
+}
+
+
+
 int v3_add_extension(struct v3_vm_info * vm, const char * name, v3_cfg_tree_t * cfg) {
     struct v3_extension_impl * impl = NULL;
     struct v3_extension * ext = NULL;
@@ -125,3 +136,34 @@ int v3_add_extension(struct v3_vm_info * vm, const char * name, v3_cfg_tree_t *
     
     return 0;
 }
+
+int v3_init_core_extensions(struct guest_info * core) {
+    struct v3_extension * ext = NULL;
+
+    list_for_each_entry(ext, &(core->vm_info->extensions.extensions), node) {
+       if ((ext->impl) && (ext->impl->core_init)) {
+           if (ext->impl->core_init(core, ext->priv_data) == -1) {
+               PrintError("Error configuring per core extension %s on core %d\n", 
+                          ext->impl->name, core->cpu_id);
+               return -1;
+           }
+       }
+    }
+
+    return 0;
+}
+
+
+
+
+void * v3_get_extension_state(struct v3_vm_info * vm, const char * name) {
+    struct v3_extension * ext = NULL;
+
+    list_for_each_entry(ext, &(vm->extensions.extensions), node) {
+       if (strncmp(ext->impl->name, name, strlen(ext->impl->name)) == 0) {
+           return ext->priv_data;
+       }
+    }
+
+    return NULL;
+}
diff --git a/palacios/src/palacios/vmm_host_events.c b/palacios/src/palacios/vmm_host_events.c

index 432b9fb..3e6d09b 100644 (file)
--- a/palacios/src/palacios/vmm_host_events.c
+++ b/palacios/src/palacios/vmm_host_events.c
@@ -20,7 +20,6 @@
 #include <palacios/vmm.h>
 #include <palacios/vmm_host_events.h>
 #include <palacios/vm_guest.h>
-#include <palacios/vmm_muxer.h>
 
 int v3_init_host_events(struct v3_vm_info * vm) {
     struct v3_host_events * host_evts = &(vm->host_event_hooks);
@@ -125,9 +124,6 @@ int v3_deliver_keyboard_event(struct v3_vm_info * vm,
     struct v3_host_events * host_evts = NULL;
     struct v3_host_event_hook * hook = NULL;
 
-    if (vm == NULL) {
-       vm = v3_get_foreground_vm();
-    }
 
     host_evts = &(vm->host_event_hooks);
 
@@ -150,9 +146,6 @@ int v3_deliver_mouse_event(struct v3_vm_info * vm,
     struct v3_host_events * host_evts = NULL;
     struct v3_host_event_hook * hook = NULL;
 
-    if (vm == NULL) {
-       vm = v3_get_foreground_vm();
-    }
 
     host_evts = &(vm->host_event_hooks);
 
@@ -175,9 +168,6 @@ int v3_deliver_timer_event(struct v3_vm_info * vm,
     struct v3_host_events * host_evts = NULL;
     struct v3_host_event_hook * hook = NULL;
 
-    if (vm == NULL) {
-       vm = v3_get_foreground_vm();
-    }
 
     host_evts = &(vm->host_event_hooks);
 
@@ -199,9 +189,6 @@ int v3_deliver_serial_event(struct v3_vm_info * vm,
     struct v3_host_events * host_evts = NULL;
     struct v3_host_event_hook * hook = NULL;
 
-    if (vm == NULL) {
-       vm = v3_get_foreground_vm();
-    }
 
     host_evts = &(vm->host_event_hooks);
 
@@ -225,9 +212,6 @@ int v3_deliver_console_event(struct v3_vm_info * vm,
     struct v3_host_events * host_evts = NULL;
     struct v3_host_event_hook * hook = NULL;
 
-    if (vm == NULL) {
-       vm = v3_get_foreground_vm();
-    }
 
     host_evts = &(vm->host_event_hooks);
 
@@ -250,9 +234,6 @@ int v3_deliver_packet_event(struct v3_vm_info * vm,
     struct v3_host_events * host_evts = NULL;
     struct v3_host_event_hook * hook = NULL;
 
-    if (vm == NULL) {
-       vm = v3_get_foreground_vm();
-    }
 
     host_evts = &(vm->host_event_hooks);
 
diff --git a/palacios/src/palacios/vmm_muxer.c b/palacios/src/palacios/vmm_muxer.c

deleted file mode 100644 (file)

index 01e8169..0000000
--- a/palacios/src/palacios/vmm_muxer.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/* 
- * This file is part of the Palacios Virtual Machine Monitor developed
- * by the V3VEE Project with funding from the United States National 
- * Science Foundation and the Department of Energy.  
- *
- * The V3VEE Project is a joint project between Northwestern University
- * and the University of New Mexico.  You can find out more at 
- * http://www.v3vee.org
- *
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
- * All rights reserved.
- *
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
- *
- * This is free software.  You are permitted to use,
- * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
- */
-
-#include <palacios/vmm.h>
-#include <palacios/vmm_muxer.h>
-#include <palacios/vmm_list.h>
-
-
-
-static struct v3_vm_info * foreground_vm = NULL;
-
-// list of notification callbacks
-static LIST_HEAD(cb_list);
-
-
-struct mux_callback {
-    struct list_head cb_node;
-
-    int (*focus_change)(struct v3_vm_info * old_vm, struct v3_vm_info * new_vm);
-};
-
-
-struct v3_vm_info * v3_get_foreground_vm() {
-    return foreground_vm;
-}
-
-
-void v3_set_foreground_vm(struct v3_vm_info * vm) {
-    struct mux_callback * tmp_cb;
-
-    list_for_each_entry(tmp_cb, &(cb_list), cb_node) {
-       tmp_cb->focus_change(foreground_vm, vm);
-    }
-
-    foreground_vm = vm;
-}
-
-
-int v3_add_mux_notification(int (*focus_change)(struct v3_vm_info * old_vm, 
-                                               struct v3_vm_info * new_vm)) {
-
-    struct mux_callback * cb = (struct mux_callback *)V3_Malloc(sizeof(struct mux_callback));
-
-    cb->focus_change = focus_change;
-    
-    list_add(&(cb->cb_node), &cb_list);
-
-    return 0;
-}
diff --git a/palacios/src/palacios/vmm_queue.c b/palacios/src/palacios/vmm_queue.c

index b06ff73..03cfb6d 100644 (file)
--- a/palacios/src/palacios/vmm_queue.c
+++ b/palacios/src/palacios/vmm_queue.c
@@ -19,20 +19,20 @@
 
 #include <palacios/vmm_queue.h>
 
-void v3_init_queue(struct gen_queue * queue) {
+void v3_init_queue(struct v3_queue * queue) {
     queue->num_entries = 0;
     INIT_LIST_HEAD(&(queue->entries));
     v3_lock_init(&queue->lock);
 }
 
-struct gen_queue * v3_create_queue() {
-    struct gen_queue * tmp_queue = V3_Malloc(sizeof(struct gen_queue));
+struct v3_queue * v3_create_queue() {
+    struct v3_queue * tmp_queue = V3_Malloc(sizeof(struct v3_queue));
     v3_init_queue(tmp_queue);
     return tmp_queue;
 }
 
-void v3_enqueue(struct gen_queue * queue, addr_t entry) {
-    struct queue_entry * q_entry = V3_Malloc(sizeof(struct queue_entry));
+void v3_enqueue(struct v3_queue * queue, addr_t entry) {
+    struct v3_queue_entry * q_entry = V3_Malloc(sizeof(struct v3_queue_entry));
 
     v3_lock(queue->lock);
     q_entry->entry = entry;
@@ -42,13 +42,13 @@ void v3_enqueue(struct gen_queue * queue, addr_t entry) {
 }
 
 
-addr_t v3_dequeue(struct gen_queue * queue) {
+addr_t v3_dequeue(struct v3_queue * queue) {
     addr_t entry_val = 0;
 
     v3_lock(queue->lock);
     if (!list_empty(&(queue->entries))) {
        struct list_head * q_entry = queue->entries.next;
-       struct queue_entry * tmp_entry = list_entry(q_entry, struct queue_entry, entry_list);
+       struct v3_queue_entry * tmp_entry = list_entry(q_entry, struct v3_queue_entry, entry_list);
 
        entry_val = tmp_entry->entry;
        list_del(q_entry);
diff --git a/palacios/src/palacios/vmm_vnet_core.c b/palacios/src/palacios/vmm_vnet_core.c

index e0e0ac7..4b54d71 100644 (file)
--- a/palacios/src/palacios/vmm_vnet_core.c
+++ b/palacios/src/palacios/vmm_vnet_core.c
@@ -31,6 +31,8 @@
 #define PrintDebug(fmt, args...)
 #endif
 
+int v3_net_debug = 0;
+
 struct eth_hdr {
     uint8_t dst_mac[ETH_ALEN];
     uint8_t src_mac[ETH_ALEN];
@@ -45,11 +47,6 @@ struct vnet_dev {
     struct v3_vnet_dev_ops dev_ops;
     void * private_data;
 
-    int active;
-
-    uint64_t bytes_tx, bytes_rx;
-    uint32_t pkts_tx, pkt_rx;
-    
     struct list_head node;
 } __attribute__((packed));
 
@@ -60,7 +57,6 @@ struct vnet_brg_dev {
 
     uint8_t type;
 
-    int active;
     void * private_data;
 } __attribute__((packed));
 
@@ -85,6 +81,20 @@ struct route_list {
 } __attribute__((packed));
 
 
+struct queue_entry{
+    uint8_t use;
+    struct v3_vnet_pkt pkt;
+    uint8_t data[ETHERNET_PACKET_LEN];
+};
+
+#define VNET_QUEUE_SIZE 10240
+struct vnet_queue {
+       struct queue_entry buf[VNET_QUEUE_SIZE];
+       int head, tail;
+       int count;
+       v3_lock_t lock;
+};
+
 static struct {
     struct list_head routes;
     struct list_head devs;
@@ -97,10 +107,13 @@ static struct {
     v3_lock_t lock;
     struct vnet_stat stats;
 
-    struct hashtable * route_cache;
-} vnet_state;
+    void * pkt_flush_thread;
 
+    struct vnet_queue pkt_q;
 
+    struct hashtable * route_cache;
+} vnet_state;
+       
 
 #ifdef CONFIG_DEBUG_VNET
 static inline void mac_to_string(uint8_t * mac, char * buf) {
@@ -182,7 +195,8 @@ static int clear_hash_cache() {
     return 0;
 }
 
-static int look_into_cache(const struct v3_vnet_pkt * pkt, struct route_list ** routes) {
+static int look_into_cache(const struct v3_vnet_pkt * pkt, 
+                          struct route_list ** routes) {
     *routes = (struct route_list *)v3_htable_search(vnet_state.route_cache, (addr_t)(pkt->hash_buf));
    
     return 0;
@@ -306,8 +320,8 @@ static struct route_list * match_route(const struct v3_vnet_pkt * pkt) {
     int max_rank = 0;
     struct list_head match_list;
     struct eth_hdr * hdr = (struct eth_hdr *)(pkt->data);
-//    uint8_t src_type = pkt->src_type;
-  //  uint32_t src_link = pkt->src_id;
+    //    uint8_t src_type = pkt->src_type;
+    //  uint32_t src_link = pkt->src_id;
 
 #ifdef CONFIG_DEBUG_VNET
     {
@@ -425,19 +439,18 @@ static struct route_list * match_route(const struct v3_vnet_pkt * pkt) {
 }
 
 
-int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
+int vnet_tx_one_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
     struct route_list * matched_routes = NULL;
     unsigned long flags;
     int i;
 
-#ifdef CONFIG_DEBUG_VNET
-   {
-       int cpu = V3_Get_CPU();
-       PrintDebug("VNET/P Core: cpu %d: pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n",
+    int cpu = V3_Get_CPU();
+    V3_Net_Print(2, "VNET/P Core: cpu %d: pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n",
                  cpu, pkt->size, pkt->src_id, 
                  pkt->src_type, pkt->dst_id, pkt->dst_type);
-   }
-#endif
+    if(v3_net_debug >= 4){
+           v3_hexdump(pkt->data, pkt->size, NULL, 0);
+    }
 
     flags = v3_lock_irqsave(vnet_state.lock);
 
@@ -466,30 +479,30 @@ int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
     for (i = 0; i < matched_routes->num_routes; i++) {
        struct vnet_route_info * route = matched_routes->routes[i];
        
-        if (route->route_def.dst_type == LINK_EDGE) {
-           struct vnet_brg_dev *bridge = vnet_state.bridge;
-            pkt->dst_type = LINK_EDGE;
-            pkt->dst_id = route->route_def.dst_id;
+       if (route->route_def.dst_type == LINK_EDGE) {
+           struct vnet_brg_dev * bridge = vnet_state.bridge;
+           pkt->dst_type = LINK_EDGE;
+           pkt->dst_id = route->route_def.dst_id;
 
-           if (bridge == NULL || (bridge->active == 0)) {
-               PrintDebug("VNET/P Core: No active bridge to sent data to\n");
+           if (bridge == NULL) {
+               V3_Net_Print(2, "VNET/P Core: No active bridge to sent data to\n");
                 continue;
            }
 
            if(bridge->brg_ops.input(bridge->vm, pkt, bridge->private_data) < 0){
-                PrintDebug("VNET/P Core: Packet not sent properly to bridge\n");
+                V3_Net_Print(2, "VNET/P Core: Packet not sent properly to bridge\n");
                 continue;
            }         
            vnet_state.stats.tx_bytes += pkt->size;
            vnet_state.stats.tx_pkts ++;
         } else if (route->route_def.dst_type == LINK_INTERFACE) {
-            if (route->dst_dev == NULL || route->dst_dev->active == 0){
-               PrintDebug("VNET/P Core: No active device to sent data to\n");
+            if (route->dst_dev == NULL){
+                 V3_Net_Print(2, "VNET/P Core: No active device to sent data to\n");
                continue;
             }
 
            if(route->dst_dev->dev_ops.input(route->dst_dev->vm, pkt, route->dst_dev->private_data) < 0) {
-                PrintDebug("VNET/P Core: Packet not sent properly\n");
+                V3_Net_Print(2, "VNET/P Core: Packet not sent properly\n");
                 continue;
            }
            vnet_state.stats.tx_bytes += pkt->size;
@@ -502,6 +515,50 @@ int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
     return 0;
 }
 
+
+static int vnet_pkt_enqueue(struct v3_vnet_pkt * pkt){
+    unsigned long flags;
+    struct queue_entry * entry;
+    struct vnet_queue * q = &(vnet_state.pkt_q);
+
+    flags = v3_lock_irqsave(q->lock);
+
+    if (q->count >= VNET_QUEUE_SIZE){
+       V3_Net_Print(1, "VNET Queue overflow!\n");
+       v3_unlock_irqrestore(q->lock, flags);
+       return -1;
+    }
+       
+    q->count ++;
+    entry = &(q->buf[q->tail++]);
+    q->tail %= VNET_QUEUE_SIZE;
+       
+    v3_unlock_irqrestore(q->lock, flags);
+
+    /* this is ugly, but should happen very unlikely */
+    while(entry->use);
+
+    entry->pkt.data = entry->data;
+    memcpy(&(entry->pkt), pkt, sizeof(struct v3_vnet_pkt));
+    memcpy(entry->data, pkt->data, pkt->size);
+
+    entry->use = 1;
+
+    return 0;
+}
+
+
+int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data, int synchronize) {
+    if(synchronize){
+       vnet_tx_one_pkt(pkt, NULL);
+    }else {
+       vnet_pkt_enqueue(pkt);
+       V3_Net_Print(2, "VNET/P Core: Put pkt into Queue: pkt size %d\n", pkt->size);
+    }
+       
+    return 0;
+}
+
 int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac, 
                    struct v3_vnet_dev_ops *ops,
                    void * priv_data){
@@ -517,11 +574,9 @@ int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac,
    
     memcpy(new_dev->mac_addr, mac, 6);
     new_dev->dev_ops.input = ops->input;
-    new_dev->dev_ops.poll = ops->poll;
     new_dev->private_data = priv_data;
     new_dev->vm = vm;
     new_dev->dev_id = 0;
-    new_dev->active = 1;
 
     flags = v3_lock_irqsave(vnet_state.lock);
 
@@ -544,7 +599,6 @@ int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac,
 }
 
 
-
 int v3_vnet_del_dev(int dev_id){
     struct vnet_dev * dev = NULL;
     unsigned long flags;
@@ -566,6 +620,7 @@ int v3_vnet_del_dev(int dev_id){
     return 0;
 }
 
+
 int v3_vnet_stat(struct vnet_stat * stats){
        
     stats->rx_bytes = vnet_state.stats.rx_bytes;
@@ -604,12 +659,10 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm,
     struct vnet_brg_dev * tmp_bridge = NULL;    
     
     flags = v3_lock_irqsave(vnet_state.lock);
-
     if (vnet_state.bridge == NULL) {
        bridge_free = 1;
        vnet_state.bridge = (void *)1;
     }
-
     v3_unlock_irqrestore(vnet_state.lock, flags);
 
     if (bridge_free == 0) {
@@ -629,7 +682,6 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm,
     tmp_bridge->brg_ops.input = ops->input;
     tmp_bridge->brg_ops.poll = ops->poll;
     tmp_bridge->private_data = priv_data;
-    tmp_bridge->active = 1;
     tmp_bridge->type = type;
        
     /* make this atomic to avoid possible race conditions */
@@ -641,20 +693,39 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm,
 }
 
 
-void v3_vnet_do_poll(struct v3_vm_info * vm){
-    struct vnet_dev * dev = NULL;
+static int vnet_tx_flush(void *args){
+    unsigned long flags;
+    struct queue_entry * entry;
+    struct vnet_queue * q = &(vnet_state.pkt_q);
 
-    /* TODO: run this on separate threads
-      * round-robin schedule, with maximal budget for each poll
-      */
-    list_for_each_entry(dev, &(vnet_state.devs), node) {
-           if(dev->dev_ops.poll != NULL){
-               dev->dev_ops.poll(vm, -1, dev->private_data);
-           }
+    V3_Print("VNET/P Handing Pkt Thread Starting ....\n");
+
+    //V3_THREAD_SLEEP();
+    /* we need thread sleep/wakeup in Palacios */
+    while(1){
+       flags = v3_lock_irqsave(q->lock);
+
+       if (q->count <= 0){
+           v3_unlock_irqrestore(q->lock, flags);
+           v3_yield(NULL);
+           //V3_THREAD_SLEEP();
+       }else {
+           q->count --;
+           entry = &(q->buf[q->head++]);
+           q->head %= VNET_QUEUE_SIZE;
+
+           v3_unlock_irqrestore(q->lock, flags);
+
+           /* this is ugly, but should happen very unlikely */
+           while(!entry->use);
+           vnet_tx_one_pkt(&(entry->pkt), NULL);
+           entry->use = 0;
+
+           V3_Net_Print(2, "vnet_tx_flush: pkt (size %d)\n", entry->pkt.size);   
+       }
     }
 }
 
-
 int v3_init_vnet() {
     memset(&vnet_state, 0, sizeof(vnet_state));
        
@@ -669,12 +740,15 @@ int v3_init_vnet() {
     }
 
     vnet_state.route_cache = v3_create_htable(0, &hash_fn, &hash_eq);
-
     if (vnet_state.route_cache == NULL) {
         PrintError("VNET/P Core: Fails to initiate route cache\n");
         return -1;
     }
 
+    v3_lock_init(&(vnet_state.pkt_q.lock));
+
+    vnet_state.pkt_flush_thread = V3_CREATE_THREAD(vnet_tx_flush, NULL, "VNET_Pkts");
+
     PrintDebug("VNET/P Core is initiated\n");
 
     return 0;
diff --git a/palacios/src/palacios/vmm_xed.c b/palacios/src/palacios/vmm_xed.c

index 9f3d7ac..80fbfde 100644 (file)
--- a/palacios/src/palacios/vmm_xed.c
+++ b/palacios/src/palacios/vmm_xed.c
@@ -465,7 +465,7 @@ int v3_decode(struct guest_info * info, addr_t instr_ptr, struct x86_instr * ins
            }
        }
 
-       V3_Print("Operand 0 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op)));
+//     V3_Print("Operand 0 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op)));
 
 
        if (xed_operand_read(op)) {
@@ -555,7 +555,7 @@ int v3_decode(struct guest_info * info, addr_t instr_ptr, struct x86_instr * ins
            }
        }
 
-       V3_Print("Operand 1 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op)));
+//     V3_Print("Operand 1 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op)));
 
        if (xed_operand_read(op)) {
            v3_op->read = 1;
diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c

index eb79fa3..4326788 100644 (file)
--- a/palacios/src/palacios/vmx.c
+++ b/palacios/src/palacios/vmx.c
@@ -34,6 +34,7 @@
 #include <palacios/vmx_io.h>
 #include <palacios/vmx_msr.h>
 
+#include <palacios/vmx_hw_info.h>
 
 #ifndef CONFIG_DEBUG_VMX
 #undef PrintDebug
@@ -41,8 +42,12 @@
 #endif
 
 
-static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
+/* These fields contain the hardware feature sets supported by the local CPU */
+static struct vmx_hw_info hw_info;
+
+
 static addr_t active_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
+static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
 
 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
@@ -50,7 +55,7 @@ extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, str
 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
     int ret = 0;
 
-    ret = vmcs_write(field,val);
+    ret = vmcs_write(field, val);
 
     if (ret != VMX_SUCCESS) {
         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
@@ -76,7 +81,6 @@ static int inline check_vmcs_read(vmcs_field_t field, void * val) {
 
 
 static addr_t allocate_vmcs() {
-    reg_ex_t msr;
     struct vmcs_data * vmcs_page = NULL;
 
     PrintDebug("Allocating page\n");
@@ -84,10 +88,8 @@ static addr_t allocate_vmcs() {
     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
     memset(vmcs_page, 0, 4096);
 
-    v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
-    
-    vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision;
-    PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision);
+    vmcs_page->revision = hw_info.basic_info.revision;
+    PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
 
     return (addr_t)V3_PAddr((void *)vmcs_page);
 }
@@ -388,7 +390,7 @@ static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state)
     // reenable global interrupts for vm state initialization now
     // that the vm state is initialized. If another VM kicks us off, 
     // it'll update our vmx state so that we know to reload ourself
-    v3_disable_ints();
+    v3_enable_ints();
 
     return 0;
 }
@@ -641,6 +643,13 @@ int v3_vmx_enter(struct guest_info * info) {
     // disable global interrupts for vm state transition
     v3_disable_ints();
 
+
+    if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
+       vmcs_load(vmx_info->vmcs_ptr_phys);
+       active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
+    }
+
+
     v3_vmx_restore_vmcs(info);
 
 
@@ -666,10 +675,6 @@ int v3_vmx_enter(struct guest_info * info) {
     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
 
-    if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
-       vmcs_load(vmx_info->vmcs_ptr_phys);
-       active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
-    }
 
     if (vmx_info->state == VMX_UNLAUNCHED) {
        vmx_info->state = VMX_LAUNCHED;
@@ -726,10 +731,15 @@ int v3_vmx_enter(struct guest_info * info) {
     update_irq_exit_state(info);
 #endif
 
-    // Handle any exits needed still in the atomic section
-    if (v3_handle_atomic_vmx_exit(info, &exit_info) == -1) {
-       PrintError("Error in atomic VMX exit handler\n");
-       return -1;
+    if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
+       // This is a special case whose only job is to inject an interrupt
+       vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
+        vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
+        vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
+
+#ifdef CONFIG_DEBUG_INTERRUPTS
+        PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip);
+#endif
     }
 
     // reenable global interrupts after vm exit
@@ -807,6 +817,12 @@ int v3_start_vmx_guest(struct guest_info * info) {
 }
 
 
+
+
+#define VMX_FEATURE_CONTROL_MSR     0x0000003a
+#define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
+#define CPUID_1_ECX_VTXFLAG 0x00000020
+
 int v3_is_vmx_capable() {
     v3_msr_t feature_msr;
     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
@@ -820,7 +836,7 @@ int v3_is_vmx_capable() {
        
         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
 
-        if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
+        if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
             PrintDebug("VMX is locked -- enable in the BIOS\n");
             return 0;
         }
@@ -833,82 +849,23 @@ int v3_is_vmx_capable() {
     return 1;
 }
 
-static int has_vmx_nested_paging() {
-    return 0;
-}
 
 
 
-void v3_init_vmx_cpu(int cpu_id) {
-    extern v3_cpu_arch_t v3_cpu_types[];
-    struct v3_msr tmp_msr;
-    uint64_t ret = 0;
 
-    v3_get_msr(VMX_CR4_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
 
-#ifdef __V3_64BIT__
-    __asm__ __volatile__ (
-                         "movq %%cr4, %%rbx;"
-                         "orq  $0x00002000, %%rbx;"
-                         "movq %%rbx, %0;"
-                         : "=m"(ret) 
-                         :
-                         : "%rbx"
-                         );
-
-    if ((~ret & tmp_msr.value) == 0) {
-        __asm__ __volatile__ (
-                             "movq %0, %%cr4;"
-                             :
-                             : "q"(ret)
-                             );
-    } else {
-        PrintError("Invalid CR4 Settings!\n");
-        return;
-    }
+void v3_init_vmx_cpu(int cpu_id) {
+    extern v3_cpu_arch_t v3_cpu_types[];
 
-    __asm__ __volatile__ (
-                         "movq %%cr0, %%rbx; "
-                         "orq  $0x00000020,%%rbx; "
-                         "movq %%rbx, %%cr0;"
-                         :
-                         :
-                         : "%rbx"
-                         );
-#elif __V3_32BIT__
-    __asm__ __volatile__ (
-                         "movl %%cr4, %%ecx;"
-                         "orl  $0x00002000, %%ecx;"
-                         "movl %%ecx, %0;"
-                         : "=m"(ret) 
-                         :
-                         : "%ecx"
-                         );
-
-    if ((~ret & tmp_msr.value) == 0) {
-        __asm__ __volatile__ (
-                             "movl %0, %%cr4;"
-                             :
-                             : "q"(ret)
-                             );
-    } else {
-        PrintError("Invalid CR4 Settings!\n");
-        return;
+    if (cpu_id == 0) {
+       if (v3_init_vmx_hw(&hw_info) == -1) {
+           PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
+           return;
+       }
     }
 
-    __asm__ __volatile__ (
-                         "movl %%cr0, %%ecx; "
-                         "orl  $0x00000020,%%ecx; "
-                         "movl %%ecx, %%cr0;"
-                         :
-                         :
-                         : "%ecx"
-                         );
 
-#endif
-
-    //
-    // Should check and return Error here.... 
+    enable_vmx();
 
 
     // Setup VMXON Region
@@ -916,7 +873,7 @@ void v3_init_vmx_cpu(int cpu_id) {
 
     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
 
-    if (v3_enable_vmx(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
+    if (vmx_on(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
         PrintDebug("VMX Enabled\n");
     } else {
         PrintError("VMX initialization failure\n");
@@ -924,11 +881,8 @@ void v3_init_vmx_cpu(int cpu_id) {
     }
     
 
-    if (has_vmx_nested_paging() == 1) {
-        v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
-    } else {
-        v3_cpu_types[cpu_id] = V3_VMX_CPU;
-    }
+    v3_cpu_types[cpu_id] = V3_VMX_CPU;
+
 
 }
 
diff --git a/palacios/src/palacios/vmx_ept.c b/palacios/src/palacios/vmx_ept.c

new file mode 100644 (file)

index 0000000..42ca942
--- /dev/null
+++ b/palacios/src/palacios/vmx_ept.c
@@ -0,0 +1,19 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
diff --git a/palacios/src/palacios/vmx_handler.c b/palacios/src/palacios/vmx_handler.c

index 4066bf2..e5da762 100644 (file)
--- a/palacios/src/palacios/vmx_handler.c
+++ b/palacios/src/palacios/vmx_handler.c
@@ -42,25 +42,6 @@
 #endif
 
 /* At this point the GPRs are already copied into the guest_info state */
-int v3_handle_atomic_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_info) {
-    struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
-
-    switch (exit_info->exit_reason) {
-        case VMEXIT_INTR_WINDOW:
-           // This is here because we touch the VMCS 
-           vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
-            vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
-            vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
-
-#ifdef CONFIG_DEBUG_INTERRUPTS
-            PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip);
-#endif
-            break;
-    }
-    return 0;
-}
-
-/* At this point the GPRs are already copied into the guest_info state */
 int v3_handle_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_info) {
     /*
       PrintError("Handling VMEXIT: %s (%u), %lu (0x%lx)\n", 
diff --git a/palacios/src/palacios/vmx_hw_info.c b/palacios/src/palacios/vmx_hw_info.c

new file mode 100644 (file)

index 0000000..3220e52
--- /dev/null
+++ b/palacios/src/palacios/vmx_hw_info.c
@@ -0,0 +1,113 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_lowlevel.h>
+#include <palacios/vmx_hw_info.h>
+#include <palacios/vmm_msr.h>
+
+// Intel VMX Feature MSRs
+
+
+
+static int get_ex_ctrl_caps(struct vmx_hw_info * hw_info, struct vmx_ctrl_field * field, 
+                 uint32_t old_msr, uint32_t true_msr) {
+    uint32_t old_0;  /* Bit is 1 => MB1 */
+    uint32_t old_1;  /* Bit is 0 => MBZ */
+    uint32_t true_0; /* Bit is 1 => MB1 */
+    uint32_t true_1; /* Bit is 0 => MBZ */
+
+    v3_get_msr(old_msr, &old_1, &old_0);
+    field->def_val = old_0;
+
+    if (hw_info->basic_info.def1_maybe_0) {
+       v3_get_msr(true_msr, &true_1, &true_0);
+    } else {
+       true_0 = old_0;
+       true_1 = old_1;
+    }
+    
+    field->req_val = true_0;
+    field->req_mask = ~(true_1 ^ true_0);
+
+    return 0;
+}
+
+
+static int get_ctrl_caps(struct vmx_ctrl_field * field, uint32_t msr) {
+    uint32_t mbz = 0; /* Bit is 0 => MBZ */
+    uint32_t mb1 = 0; /* Bit is 1 => MB1 */
+    
+    v3_get_msr(msr, &mbz, &mb1);
+    
+    field->def_val = mb1;
+    field->req_val = mb1;
+    field->req_mask = ~(mbz ^ mb1);
+
+    return 0;
+}
+
+
+
+static int get_cr_fields(struct vmx_cr_field * field, uint32_t fixed_1_msr, uint32_t fixed_0_msr) {
+    struct v3_msr mbz; /* Bit is 0 => MBZ */
+    struct v3_msr mb1; /* Bit is 0 => MBZ */
+
+    v3_get_msr(fixed_1_msr, &(mbz.hi), &(mbz.lo));
+    v3_get_msr(fixed_0_msr, &(mb1.hi), &(mb1.lo));
+     
+    field->def_val = mb1.value;
+    field->req_val = mb1.value;
+    field->req_mask = ~(mbz.value ^ mb1.value);
+
+    return 0;
+}
+
+
+
+
+
+int v3_init_vmx_hw(struct vmx_hw_info * hw_info) {
+    //  extern v3_cpu_arch_t v3_cpu_types[];
+
+    memset(hw_info, 0, sizeof(struct vmx_hw_info));
+
+    v3_get_msr(VMX_BASIC_MSR, &(hw_info->basic_info.hi), &(hw_info->basic_info.lo));
+    v3_get_msr(VMX_MISC_MSR, &(hw_info->misc_info.hi), &(hw_info->misc_info.lo));
+    v3_get_msr(VMX_EPT_VPID_CAP_MSR, &(hw_info->ept_info.hi), &(hw_info->ept_info.lo));
+
+    PrintError("BASIC_MSR: Lo: %x, Hi: %x\n", hw_info->basic_info.lo, hw_info->basic_info.hi);
+
+    get_ex_ctrl_caps(hw_info, &(hw_info->pin_ctrls), VMX_PINBASED_CTLS_MSR, VMX_TRUE_PINBASED_CTLS_MSR);
+    get_ex_ctrl_caps(hw_info, &(hw_info->proc_ctrls), VMX_PROCBASED_CTLS_MSR, VMX_TRUE_PROCBASED_CTLS_MSR);
+    get_ex_ctrl_caps(hw_info, &(hw_info->exit_ctrls), VMX_EXIT_CTLS_MSR, VMX_TRUE_EXIT_CTLS_MSR);
+    get_ex_ctrl_caps(hw_info, &(hw_info->entry_ctrls), VMX_ENTRY_CTLS_MSR, VMX_TRUE_ENTRY_CTLS_MSR);
+
+    /* Get secondary PROCBASED controls if secondary controls are available (optional or required) */
+    /* Intel Manual 3B. Sect. G.3.3 */
+    if ( ((hw_info->proc_ctrls.req_mask & 0x80000000) == 0) || 
+        ((hw_info->proc_ctrls.req_val & 0x80000000) == 1) ) {
+       get_ctrl_caps(&(hw_info->proc_ctrls_2), VMX_PROCBASED_CTLS2_MSR);
+    }
+    
+    get_cr_fields(&(hw_info->cr0), VMX_CR0_FIXED1_MSR, VMX_CR0_FIXED0_MSR);
+    get_cr_fields(&(hw_info->cr4), VMX_CR4_FIXED1_MSR, VMX_CR4_FIXED0_MSR);
+
+    return 0;
+}
Kconfig		patch \| blob \| history
Makefile		patch \| blob \| history
linux_module/palacios-debugfs.c	[new file with mode: 0644]	patch \| blob
linux_module/palacios-debugfs.h	[new file with mode: 0644]	patch \| blob
palacios/include/interfaces/inspector.h	[moved from palacios/include/palacios/vmm_inspector.h with 97% similarity]	patch \| blob \| history
palacios/include/interfaces/vmm_host_dev.h		patch \| blob \| history
palacios/include/palacios/vm_guest.h		patch \| blob \| history
palacios/include/palacios/vmcs.h		patch \| blob \| history
palacios/include/palacios/vmm.h		patch \| blob \| history
palacios/include/palacios/vmm_barrier.h	[copied from palacios/include/palacios/vmm_muxer.h with 55% similarity]	patch \| blob \| history
palacios/include/palacios/vmm_cpuid.h		patch \| blob \| history
palacios/include/palacios/vmm_dev_mgr.h		patch \| blob \| history
palacios/include/palacios/vmm_ethernet.h		patch \| blob \| history
palacios/include/palacios/vmm_extensions.h		patch \| blob \| history
palacios/include/palacios/vmm_instr_emulator.h		patch \| blob \| history
palacios/include/palacios/vmm_queue.h		patch \| blob \| history
palacios/include/palacios/vmm_vnet.h		patch \| blob \| history
palacios/include/palacios/vmx.h		patch \| blob \| history
palacios/include/palacios/vmx_ept.h	[new file with mode: 0644]	patch \| blob
palacios/include/palacios/vmx_hw_info.h	[new file with mode: 0644]	patch \| blob
palacios/include/palacios/vmx_lowlevel.h		patch \| blob \| history
palacios/src/devices/Kconfig		patch \| blob \| history
palacios/src/devices/Makefile		patch \| blob \| history
palacios/src/devices/generic.c		patch \| blob \| history
palacios/src/devices/lnx_virtio_nic.c		patch \| blob \| history
palacios/src/devices/lnx_virtio_vnet.c		patch \| blob \| history
palacios/src/devices/nic_bridge.c		patch \| blob \| history
palacios/src/devices/nvram.c		patch \| blob \| history
palacios/src/devices/pci_front.c	[new file with mode: 0644]	patch \| blob
palacios/src/devices/vga.c		patch \| blob \| history
palacios/src/devices/vga_regs.h		patch \| blob \| history
palacios/src/devices/vnet_nic.c		patch \| blob \| history
palacios/src/extensions/Kconfig	[new file with mode: 0644]	patch \| blob
palacios/src/extensions/Makefile	[new file with mode: 0644]	patch \| blob
palacios/src/extensions/ext_inspector.c	[moved from palacios/src/palacios/vmm_inspector.c with 81% similarity]	patch \| blob \| history
palacios/src/extensions/ext_vtime.c	[new file with mode: 0644]	patch \| blob
palacios/src/extensions/ext_vtsc.c	[new file with mode: 0644]	patch \| blob
palacios/src/extensions/null.c	[new file with mode: 0644]	patch \| blob
palacios/src/interfaces/vmm_host_dev.c		patch \| blob \| history
palacios/src/palacios/Makefile		patch \| blob \| history
palacios/src/palacios/vm_guest.c		patch \| blob \| history
palacios/src/palacios/vm_guest_mem.c		patch \| blob \| history
palacios/src/palacios/vmcs.c		patch \| blob \| history
palacios/src/palacios/vmm.c		patch \| blob \| history
palacios/src/palacios/vmm_barrier.c	[moved from palacios/include/palacios/vmm_muxer.h with 51% similarity]	patch \| blob \| history
palacios/src/palacios/vmm_config.c		patch \| blob \| history
palacios/src/palacios/vmm_config_class.h		patch \| blob \| history
palacios/src/palacios/vmm_cpuid.c		patch \| blob \| history
palacios/src/palacios/vmm_emulator.c		patch \| blob \| history
palacios/src/palacios/vmm_extensions.c		patch \| blob \| history
palacios/src/palacios/vmm_host_events.c		patch \| blob \| history
palacios/src/palacios/vmm_muxer.c	[deleted file]	patch \| blob \| history
palacios/src/palacios/vmm_queue.c		patch \| blob \| history
palacios/src/palacios/vmm_vnet_core.c		patch \| blob \| history
palacios/src/palacios/vmm_xed.c		patch \| blob \| history
palacios/src/palacios/vmx.c		patch \| blob \| history
palacios/src/palacios/vmx_ept.c	[new file with mode: 0644]	patch \| blob
palacios/src/palacios/vmx_handler.c		patch \| blob \| history
palacios/src/palacios/vmx_hw_info.c	[new file with mode: 0644]	patch \| blob