Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Merge branch 'devel' of /home-remote/palacios/palacios into devel
Kyle Hale [Fri, 29 Apr 2011 22:16:39 +0000 (17:16 -0500)]
Conflicts:

Kconfig

merged

59 files changed:
Kconfig
Makefile
linux_module/palacios-debugfs.c [new file with mode: 0644]
linux_module/palacios-debugfs.h [new file with mode: 0644]
palacios/include/interfaces/inspector.h [moved from palacios/include/palacios/vmm_inspector.h with 97% similarity]
palacios/include/interfaces/vmm_host_dev.h
palacios/include/palacios/vm_guest.h
palacios/include/palacios/vmcs.h
palacios/include/palacios/vmm.h
palacios/include/palacios/vmm_barrier.h [copied from palacios/include/palacios/vmm_muxer.h with 55% similarity]
palacios/include/palacios/vmm_cpuid.h
palacios/include/palacios/vmm_dev_mgr.h
palacios/include/palacios/vmm_ethernet.h
palacios/include/palacios/vmm_extensions.h
palacios/include/palacios/vmm_instr_emulator.h
palacios/include/palacios/vmm_queue.h
palacios/include/palacios/vmm_vnet.h
palacios/include/palacios/vmx.h
palacios/include/palacios/vmx_ept.h [new file with mode: 0644]
palacios/include/palacios/vmx_hw_info.h [new file with mode: 0644]
palacios/include/palacios/vmx_lowlevel.h
palacios/src/devices/Kconfig
palacios/src/devices/Makefile
palacios/src/devices/generic.c
palacios/src/devices/lnx_virtio_nic.c
palacios/src/devices/lnx_virtio_vnet.c
palacios/src/devices/nic_bridge.c
palacios/src/devices/nvram.c
palacios/src/devices/pci_front.c [new file with mode: 0644]
palacios/src/devices/vga.c
palacios/src/devices/vga_regs.h
palacios/src/devices/vnet_nic.c
palacios/src/extensions/Kconfig [new file with mode: 0644]
palacios/src/extensions/Makefile [new file with mode: 0644]
palacios/src/extensions/ext_inspector.c [moved from palacios/src/palacios/vmm_inspector.c with 81% similarity]
palacios/src/extensions/ext_vtime.c [new file with mode: 0644]
palacios/src/extensions/ext_vtsc.c [new file with mode: 0644]
palacios/src/extensions/null.c [new file with mode: 0644]
palacios/src/interfaces/vmm_host_dev.c
palacios/src/palacios/Makefile
palacios/src/palacios/vm_guest.c
palacios/src/palacios/vm_guest_mem.c
palacios/src/palacios/vmcs.c
palacios/src/palacios/vmm.c
palacios/src/palacios/vmm_barrier.c [moved from palacios/include/palacios/vmm_muxer.h with 51% similarity]
palacios/src/palacios/vmm_config.c
palacios/src/palacios/vmm_config_class.h
palacios/src/palacios/vmm_cpuid.c
palacios/src/palacios/vmm_emulator.c
palacios/src/palacios/vmm_extensions.c
palacios/src/palacios/vmm_host_events.c
palacios/src/palacios/vmm_muxer.c [deleted file]
palacios/src/palacios/vmm_queue.c
palacios/src/palacios/vmm_vnet_core.c
palacios/src/palacios/vmm_xed.c
palacios/src/palacios/vmx.c
palacios/src/palacios/vmx_ept.c [new file with mode: 0644]
palacios/src/palacios/vmx_handler.c
palacios/src/palacios/vmx_hw_info.c [new file with mode: 0644]

diff --git a/Kconfig b/Kconfig
index d312c0b..4241627 100644 (file)
--- a/Kconfig
+++ b/Kconfig
@@ -128,7 +128,7 @@ config MAX_CPUS
 endmenu
 
 source "palacios/src/interfaces/Kconfig"
-
+source "palacios/src/extensions/Kconfig"
 
 config TELEMETRY
        bool "Enable VMM telemetry support"
index 46227ae..ed13298 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -435,6 +435,7 @@ core-y          := palacios/src/palacios/
 libs-y         := palacios/lib/$(ARCH)/
 devices-y       := palacios/src/devices/
 interfaces-y    := palacios/src/interfaces/
+extensions-y    := palacios/src/extensions/
 modules-y       := modules/
 
 
@@ -529,7 +530,7 @@ export      INSTALL_PATH ?= /build
 
 
 palacios-dirs  := $(patsubst %/,%,$(filter %/,  \
-                    $(core-y) $(devices-y) $(interfaces-y) $(libs-y)) $(modules-y))
+                    $(core-y) $(devices-y) $(interfaces-y) $(extensions-y) $(libs-y)) $(modules-y))
 
 
 
@@ -540,13 +541,14 @@ palacios-dirs     := $(patsubst %/,%,$(filter %/,  \
 
 palacios-cleandirs := $(sort $(palacios-dirs) $(patsubst %/,%,$(filter %/, \
                        $(core-n) $(core-) $(devices-n) $(devices-) \
-                       $(interfaces-n) $(interfaces-) $(modules-n) $(modules-))))
+                       $(interfaces-n) $(interfaces-) $(extensions-n) $(extensions-) $(modules-n) $(modules-))))
 
 
 
 core-y         := $(patsubst %/, %/built-in.o, $(core-y))
 devices-y      := $(patsubst %/, %/built-in.o, $(devices-y))
 interfaces-y    := $(patsubst %/, %/built-in.o, $(interfaces-y))
+extensions-y    := $(patsubst %/, %/built-in.o, $(extensions-y))
 libs-y         := $(patsubst %/, %/built-in.o, $(libs-y))
 modules-y       := $(patsubst %/, %/built-in.o, $(modules-y))
 #lnxmod-y        := $(patsubst %/, %/built-in.o, $(lnxmod-y))
@@ -573,7 +575,7 @@ modules-y       := $(patsubst %/, %/built-in.o, $(modules-y))
 
 
 
-palacios := $(core-y) $(devices-y) $(interfaces-y) $(libs-y) $(modules-y)
+palacios := $(core-y) $(devices-y) $(interfaces-y) $(extensions-y) $(libs-y) $(modules-y)
 
 
 # Rule to link palacios - also used during CONFIG_CONFIGKALLSYMS
diff --git a/linux_module/palacios-debugfs.c b/linux_module/palacios-debugfs.c
new file mode 100644 (file)
index 0000000..b35120e
--- /dev/null
@@ -0,0 +1,79 @@
+/* 
+ * DebugFS interface
+ * (c) Jack Lange, 2011
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include <interfaces/inspector.h>
+
+#include "palacios.h"
+
+struct dentry * v3_dir = NULL;
+
+
+int palacios_init_debugfs( void ) {
+
+    v3_dir = debugfs_create_dir("v3vee", NULL);
+
+    if (IS_ERR(v3_dir)) {
+       printk("Error creating v3vee debugfs directory\n");
+       return -1;
+    }
+
+    return 0;
+}
+
+
+int palacios_deinit_debugfs( void ) {
+    debugfs_remove(v3_dir);
+    return 0;
+}
+
+
+
+static int dfs_register_tree(struct dentry * dir, v3_inspect_node_t * root) {
+    v3_inspect_node_t * tmp_node = v3_inspection_first_child(root);
+    struct v3_inspection_value tmp_value;
+
+    while (tmp_node) {
+       tmp_value = v3_inspection_value(tmp_node);
+
+       if (tmp_value.size == 0) {
+           struct dentry * new_dir = debugfs_create_dir(tmp_value.name, dir);
+           dfs_register_tree(new_dir, tmp_node);
+       } else if (tmp_value.size == 1) {
+           debugfs_create_u8(tmp_value.name, 0644, dir, (u8 *)tmp_value.value);
+       } else if (tmp_value.size == 2) {
+           debugfs_create_u16(tmp_value.name, 0644, dir, (u16 *)tmp_value.value);
+       } else if (tmp_value.size == 4) {
+           debugfs_create_u32(tmp_value.name, 0644, dir, (u32 *)tmp_value.value);
+       } else if (tmp_value.size == 8) {
+           debugfs_create_u64(tmp_value.name, 0644, dir, (u64 *)tmp_value.value);
+       } else {
+
+           // buffer
+       }
+
+       tmp_node = v3_inspection_node_next(tmp_node);
+
+    }
+
+    return 0;
+}
+
+
+int dfs_register_vm(struct v3_guest * guest) {
+    v3_inspect_node_t * root = v3_get_inspection_root(guest->v3_ctx);
+
+    if (root == NULL) {
+       printk("No inspection root found\n");
+       return -1;
+    }
+
+    dfs_register_tree(v3_dir, root);
+    return 0;
+}
diff --git a/linux_module/palacios-debugfs.h b/linux_module/palacios-debugfs.h
new file mode 100644 (file)
index 0000000..1caad52
--- /dev/null
@@ -0,0 +1,14 @@
+/* 
+ * DebugFS interface
+ * (c) Jack Lange, 2011
+ */
+
+#include "palacios.h"
+
+int palacios_init_debugfs( void );
+int palacios_deinit_debugfs( void );
+
+
+
+int dfs_register_vm(struct v3_guest * guest);
+
similarity index 97%
rename from palacios/include/palacios/vmm_inspector.h
rename to palacios/include/interfaces/inspector.h
index ee0f70d..396e490 100644 (file)
@@ -36,12 +36,6 @@ typedef void v3_inspect_node_t;
 #define READ_ONLY 2
 #define HOOKED 4
 
-struct v3_inspector_state {
-    struct v3_mtree state_tree;
-
-};
-
-
 
 int v3_init_inspector(struct v3_vm_info * vm);
 int v3_init_inspector_core(struct guest_info * core);
index 138839f..2b893b5 100644 (file)
@@ -23,7 +23,6 @@
 
 #include <palacios/vmm.h>
 
-
 /*
 
   The purpose of this interface is to make it possible to implement
@@ -78,9 +77,12 @@ typedef enum { V3_BUS_CLASS_DIRECT, V3_BUS_CLASS_PCI } v3_bus_class_t;
 
 #ifdef __V3VEE__
 
+struct v3_vm_info;
+
 v3_host_dev_t v3_host_dev_open(char *impl, 
                               v3_bus_class_t bus,
-                              v3_guest_dev_t gdev); 
+                              v3_guest_dev_t gdev,
+                              struct v3_vm_info *vm); 
 
 int v3_host_dev_close(v3_host_dev_t hdev);
     
@@ -106,13 +108,13 @@ uint64_t v3_host_dev_write_mem(v3_host_dev_t hostdev,
 
 int v3_host_dev_ack_irq(v3_host_dev_t hostdev, uint8_t irq);
 
-uint64_t v3_host_dev_config_read(v3_host_dev_t hostdev, 
+uint64_t v3_host_dev_read_config(v3_host_dev_t hostdev, 
                                 uint64_t      offset,
                                 void          *dest,
                                 uint64_t      len);
 
-uint64_t v3_host_dev_config_write(v3_host_dev_t hostdev, 
-                                uint64_t      offset,
+uint64_t v3_host_dev_write_config(v3_host_dev_t hostdev, 
+                                 uint64_t      offset,
                                  void          *src,
                                  uint64_t      len);
  
@@ -124,10 +126,12 @@ struct v3_host_dev_hooks {
     // this device is attached to and an opaque pointer back to the
     // guest device.  It returns an opaque representation of 
     // the host device it has attached to, with zero indicating
-    // failure
+    // failure.  The host_priv_data arguement supplies to the 
+    // host the pointer that the VM was originally registered with
     v3_host_dev_t (*open)(char *impl, 
                          v3_bus_class_t bus,
-                         v3_guest_dev_t gdev);
+                         v3_guest_dev_t gdev,
+                         void *host_priv_data);
 
     int (*close)(v3_host_dev_t hdev);
     
@@ -150,12 +154,12 @@ struct v3_host_dev_hooks {
     // fail, returning != len
     // Callee gets the host dev id, and the guest physical address
     uint64_t (*read_mem)(v3_host_dev_t hostdev, 
-                        addr_t        gpa,
+                        void *        gpa,
                         void          *dest,
                         uint64_t      len);
     
     uint64_t (*write_mem)(v3_host_dev_t hostdev, 
-                         addr_t        gpa,
+                         void *        gpa,
                          void          *src,
                          uint64_t      len);
     
@@ -202,19 +206,16 @@ int v3_host_dev_raise_irq(v3_host_dev_t hostdev,
 
 /* These functions allow the host to read and write the guest
    memory by physical address, for example to implement DMA 
-
-   These functions are incremental - that is, they can return
-   a smaller amount than requested
 */
 uint64_t v3_host_dev_read_guest_mem(v3_host_dev_t  hostdev,
                                    v3_guest_dev_t guest_dev,
-                                   addr_t         gpa,
+                                   void *         gpa,
                                    void           *dest,
                                    uint64_t       len);
 
 uint64_t v3_host_dev_write_guest_mem(v3_host_dev_t  hostdev,
                                     v3_guest_dev_t guest_dev,
-                                    addr_t         gpa,
+                                    void *         gpa,
                                     void           *src,
                                     uint64_t       len);
                              
index 4b2728f..5d4527f 100644 (file)
@@ -50,9 +50,6 @@
 struct v3_sym_core_state;
 #endif
 
-#ifdef CONFIG_INSPECTOR
-#include  <palacios/vmm_inspector.h>
-#endif
 
 
 #include <palacios/vmm_config.h>
@@ -187,9 +184,6 @@ struct v3_vm_info {
     struct v3_telemetry_state telemetry;
 #endif
 
-#ifdef CONFIG_INSPECTOR
-    struct v3_inspector_state inspector;
-#endif
 
     uint64_t yield_cycle_period;  
 
index 9129d1b..c80a23c 100644 (file)
 
 
 
+struct vmcs_field_encoding {
+    uint8_t access_type    : 1; /*  0 = full, 1 = high, (for accessing 64 bit fields on 32bit CPU) */
+    uint16_t index         : 9;
+    uint8_t type           : 2; /* 0=ctrl, 1=read-only, 2 = guest state, 3 = host state */
+    uint8_t rsvd1          : 1; /* MBZ */
+    uint8_t width          : 2; /* 0 = 16bit, 1 = 64bit, 2 = 32bit, 3 = natural width */
+    uint32_t rsvd2         : 17;
+} __attribute__((packed));
+
 
 typedef enum {
     VMCS_GUEST_ES_SELECTOR       = 0x00000800,
index ae4421c..5cb1db1 100644 (file)
@@ -184,13 +184,33 @@ struct guest_info;
 
 #ifdef CONFIG_MULTITHREAD_OS
 
-#define V3_CREATE_THREAD(fn, arg, name)                                \
-    do {                                                       \
+#define V3_CREATE_THREAD(fn, arg, name)        ({                      \
+       void * thread = NULL;                                                   \
        extern struct v3_os_hooks * os_hooks;                   \
        if ((os_hooks) && (os_hooks)->start_kernel_thread) {    \
-           (os_hooks)->start_kernel_thread(fn, arg, name);     \
+           thread = (os_hooks)->start_kernel_thread(fn, arg, name);    \
        }                                                       \
-    } while (0)
+       thread;                                         \
+    })
+
+
+#define V3_THREAD_SLEEP()              \
+    do{                                                        \
+       extern struct v3_os_hooks * os_hooks;                   \
+       if ((os_hooks) && (os_hooks)->kernel_thread_sleep) {    \
+           (os_hooks)->kernel_thread_sleep();  \
+       }                                                       \
+    }while(0)
+
+
+#define V3_THREAD_WAKEUP(thread)               \
+    do{                                                        \
+       extern struct v3_os_hooks * os_hooks;                   \
+       if ((os_hooks) && (os_hooks)->kernel_thread_wakeup) {   \
+           (os_hooks)->kernel_thread_wakeup(thread);   \
+       }                                                       \
+    }while(0)
+
 
 
 #define V3_Call_On_CPU(cpu, fn, arg)                   \
@@ -298,7 +318,9 @@ struct v3_os_hooks {
 
 
 
-    void (*start_kernel_thread)(int (*fn)(void * arg), void * arg, char * thread_name); 
+    void * (*start_kernel_thread)(int (*fn)(void * arg), void * arg, char * thread_name); 
+    void (*kernel_thread_sleep)(void);
+    void (*kernel_thread_wakeup)(void * thread);
     void (*interrupt_cpu)(struct v3_vm_info * vm, int logical_cpu, int vector);
     void (*call_on_cpu)(int logical_cpu, void (*fn)(void * arg), void * arg);
     void * (*start_thread_on_cpu)(int cpu_id, int (*fn)(void * arg), void * arg, char * thread_name);
similarity index 55%
copy from palacios/include/palacios/vmm_muxer.h
copy to palacios/include/palacios/vmm_barrier.h
index 1c50789..4513c09 100644 (file)
@@ -1,4 +1,4 @@
-/* 
+/*
  * This file is part of the Palacios Virtual Machine Monitor developed
  * by the V3VEE Project with funding from the United States National 
  * Science Foundation and the Department of Energy.  
@@ -7,31 +7,34 @@
  * and the University of New Mexico.  You can find out more at 
  * http://www.v3vee.org
  *
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu> 
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
  * All rights reserved.
  *
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
  *
  * This is free software.  You are permitted to use,
  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  */
 
-#ifndef __VMM_MUXER_H__
-#define __VMM_MUXER_H__
+#ifndef __VMM_BARRIER_H__
+#define __VMM_BARRIER_H__
 
 #ifdef __V3VEE__
 
 
-struct v3_vm_info;
+#include <util/vmm_lock.h>
 
+struct v3_barrier {
+    
 
+    int active;     // If 1, barrier is active, everyone must wait 
+                    // If 0, barrier is clear, can proceed
 
-struct v3_vm_info * v3_get_foreground_vm();
-void v3_set_foreground_vm(struct v3_vm_info * vm);
+    v3_lock_t lock;
+};
 
 
-int v3_add_mux_notification(int (*focus_change)(struct v3_vm_info * old_vm, struct v3_vm_info * new_vm));
 
 
 #endif
index 30467fd..88d48bd 100644 (file)
@@ -54,6 +54,12 @@ struct v3_cpuid_map {
 
 void v3_print_cpuid_map(struct v3_vm_info * vm);
 
+int v3_cpuid_add_fields(struct v3_vm_info * vm, uint32_t cpuid, 
+                       uint32_t rax_mask, uint32_t rax,
+                       uint32_t rbx_mask, uint32_t rbx, 
+                       uint32_t rcx_mask, uint32_t rcx, 
+                       uint32_t rdx_mask, uint32_t rdx);
+
 int v3_hook_cpuid(struct v3_vm_info * vm, uint32_t cpuid, 
                  int (*hook_fn)(struct guest_info * info, uint32_t cpuid, \
                                 uint32_t * eax, uint32_t * ebx, \
index e789207..c9999bd 100644 (file)
@@ -179,11 +179,10 @@ struct v3_dev_blk_ops {
 
 struct v3_dev_net_ops {
     /* Backend implemented functions */
-    int (*send)(uint8_t * buf, uint32_t count, void * private_data);
+    int (*send)(uint8_t * buf, uint32_t len, int synchronize, void * private_data);
 
     /* Frontend implemented functions */
-    int (*recv)(uint8_t * buf, uint32_t count, void * frnt_data);
-    void (*poll)(struct v3_vm_info * vm, int budget, void * frnt_data);
+    int (*recv)(uint8_t * buf, uint32_t len, void * frnt_data);
 
     /* This is ugly... */
     void * frontend_data; 
index 3794d77..2b9319b 100644 (file)
 #define ETHERNET_PACKET_LEN (ETHERNET_HEADER_LEN + ETHERNET_MTU)
 #define ETH_ALEN 6
 
+#define MIN_MTU 68
+//#define MAX_MTU 65535
+#define MAX_MTU 9000
+
+#define MAX_PACKET_LEN (ETHERNET_HEADER_LEN + MAX_MTU)
+
+
+extern int v3_net_debug;
 
 #ifdef __V3VEE__
 
 #include <palacios/vmm.h>
 
+#define V3_Net_Print(level, fmt, args...)                                      \
+    do {                                                               \
+       if(level <= v3_net_debug) {   \
+           extern struct v3_os_hooks * os_hooks;                       \
+           if ((os_hooks) && (os_hooks)->print) {                      \
+               (os_hooks)->print((fmt), ##args);                       \
+           }                                                   \
+       }                                                       \
+    } while (0)        
+
 struct nic_statistics {
-    uint32_t tx_pkts;
+    uint64_t tx_pkts;
     uint64_t tx_bytes;
-    uint32_t tx_dropped;
+    uint64_t tx_dropped;
        
-    uint32_t rx_pkts;
+    uint64_t rx_pkts;
     uint64_t rx_bytes;
-    uint32_t rx_dropped;
+    uint64_t rx_dropped;
 
-    uint32_t interrupts;
+    uint32_t tx_interrupts;
+    uint32_t rx_interrupts;
 };
     
 static inline int is_multicast_ethaddr(const uint8_t * addr)
index 0135f88..fdddb69 100644 (file)
@@ -23,8 +23,8 @@
 #ifdef __V3VEE__
 
 #include <palacios/vmm.h>
-#include <palacios/vmm_list.h>
 #include <palacios/vmm_config.h>
+#include <palacios/vmm_list.h>
 
 
 struct v3_vm_info;
@@ -41,10 +41,10 @@ struct v3_extension_impl {
     char * name;
     int (*init)(struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data);
     int (*deinit)(struct v3_vm_info * vm, void * priv_data);
-    int (*core_init)(struct guest_info * core);
-    int (*core_deinit)(struct guest_info * core);
-    int (*on_entry)(struct guest_info * core);
-    int (*on_exit)(struct guest_info * core);
+    int (*core_init)(struct guest_info * core, void * priv_data);
+    int (*core_deinit)(struct guest_info * core, void * priv_data);
+    int (*on_entry)(struct guest_info * core, void * priv_data);
+    int (*on_exit)(struct guest_info * core, void * priv_data);
 };
 
 struct v3_extension {
@@ -64,6 +64,9 @@ int V3_deinit_extensions();
 
 int v3_init_ext_manager(struct v3_vm_info * vm);
 int v3_add_extension(struct v3_vm_info * vm, const char * name, v3_cfg_tree_t * cfg);
+int v3_init_core_extensions(struct guest_info * core);
+
+void * v3_get_extension_state(struct v3_vm_info * vm, const char * name);
 
 
 #define register_extension(ext)                                        \
index 84b07a4..7559f05 100644 (file)
@@ -23,7 +23,7 @@
 
 #define MAKE_1OP_8FLAGS_INST(iname) static inline void iname##8(addr_t * dst,  addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -42,7 +42,7 @@
 
 #define MAKE_1OP_16FLAGS_INST(iname) static inline void iname##16(addr_t * dst,  addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -61,7 +61,7 @@
 
 #define MAKE_1OP_32FLAGS_INST(iname) static inline void iname##32(addr_t * dst,  addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
@@ -80,7 +80,7 @@
 
 #define MAKE_1OP_64FLAGS_INST(iname) static inline void iname##64(addr_t * dst,  addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushfq; "                                        \
 
 #define MAKE_2OP_64FLAGS_INST(iname) static inline void iname##64(addr_t * dst, addr_t * src, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushfq\r\n"                                      \
 
 #define MAKE_2OP_32FLAGS_INST(iname) static inline void iname##32(addr_t * dst, addr_t * src, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
 
 #define MAKE_2OP_16FLAGS_INST(iname) static inline void iname##16(addr_t * dst, addr_t * src, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
 
 #define MAKE_2OP_8FLAGS_INST(iname) static inline void iname##8(addr_t * dst, addr_t * src, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
                                                                addr_t * src, \
                                                                addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushfq; "                                        \
                                                                addr_t * src, \
                                                                addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
                                                                addr_t * src, \
                                                                addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
                                                              addr_t * src, \
                                                              addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
                                                                addr_t * src, \
                                                                addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushfq; "                                        \
                                                                addr_t * src, \
                                                                addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
                                                                addr_t * src, \
                                                                addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
                                                              addr_t * src, \
                                                              addr_t * ecx, addr_t * flags) { \
        /* Some of the flags values are not copied out in a pushf, we save them here */ \
-       addr_t flags_rsvd = *flags & ~0xfffe7fff;                       \
+       addr_t flags_rsvd = *flags & ~0xfffc7fff;                       \
                                                                        \
        asm volatile (                                                  \
                      "pushf; "                                         \
index e88329f..811f19d 100644 (file)
 #include <palacios/vmm_lock.h>
 
 
-/* IMPORTANT:
- * This implementation currently does no locking, and as such is not 
- * SMP/thread/interrupt safe
- */
 
 
-struct queue_entry {
+struct v3_queue_entry {
     addr_t entry;
     struct list_head entry_list;
 };
 
 
-struct gen_queue {
+struct v3_queue {
     uint_t num_entries;
     struct list_head entries;
     v3_lock_t lock;
 };
 
 
-struct gen_queue * v3_create_queue();
-void v3_init_queue(struct gen_queue * queue);
+struct v3_queue * v3_create_queue();
+void v3_init_queue(struct v3_queue * queue);
 
-void v3_enqueue(struct gen_queue * queue, addr_t entry);
-addr_t v3_dequeue(struct gen_queue * queue);
+void v3_enqueue(struct v3_queue * queue, addr_t entry);
+addr_t v3_dequeue(struct v3_queue * queue);
 
 
 
index 1750fff..0f8c793 100644 (file)
@@ -19,8 +19,8 @@
  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  */
 
-#ifndef __VNET_H__
-#define __VNET_H__
+#ifndef __VNET_CORE_H__
+#define __VNET_CORE_H__
 
 #include <palacios/vmm.h>
 #include <palacios/vmm_ethernet.h>
@@ -38,7 +38,8 @@
 
 #define VNET_HASH_SIZE         17
 
-//routing table entry
+extern int v3_vnet_debug;
+
 struct v3_vnet_route {
     uint8_t src_mac[ETH_ALEN];
     uint8_t dst_mac[ETH_ALEN];
@@ -100,7 +101,7 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm,
                uint8_t type,
                void * priv_data);
 int v3_vnet_add_route(struct v3_vnet_route route);
-int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data);
+int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data, int synchronize);
 int v3_vnet_find_dev(uint8_t  * mac);
 int v3_vnet_stat(struct vnet_stat * stats);
 
@@ -110,19 +111,17 @@ struct v3_vnet_dev_ops {
     int (*input)(struct v3_vm_info * vm, 
                struct v3_vnet_pkt * pkt, 
                void * dev_data);
-    void (*poll) (struct v3_vm_info * vm, int budget, void * dev_data);
 };
 
 int v3_init_vnet(void);        
 void v3_deinit_vnet(void);
 
-void v3_vnet_do_poll(struct v3_vm_info * vm);
-
 int v3_vnet_add_dev(struct v3_vm_info * info, uint8_t * mac, 
                    struct v3_vnet_dev_ops * ops,
                    void * priv_data);
 int v3_vnet_del_dev(int dev_id);
 
+
 #endif
 
 #endif
index fd5e6ce..7a0a039 100644 (file)
 #include <palacios/vmm.h>
 #include <palacios/vm_guest.h>
 
-// Intel VMX Specific MSRs
-#define VMX_FEATURE_CONTROL_MSR     0x0000003a
-#define VMX_BASIC_MSR               0x00000480
-#define VMX_PINBASED_CTLS_MSR       0x00000481
-#define VMX_PROCBASED_CTLS_MSR      0x00000482
-#define VMX_EXIT_CTLS_MSR           0x00000483
-#define VMX_ENTRY_CTLS_MSR          0x00000484
-#define VMX_MISC_MSR                0x00000485
-#define VMX_CR0_FIXED0_MSR          0x00000486
-#define VMX_CR0_FIXED1_MSR          0x00000487
-#define VMX_CR4_FIXED0_MSR          0x00000488
-#define VMX_CR4_FIXED1_MSR          0x00000489
-#define VMX_VMCS_ENUM_MSR           0x0000048A
 
 #define VMX_SUCCESS        0
 #define VMX_FAIL_INVALID   1
 #define VMX_FAIL_VALID     2
 #define VMM_ERROR          3
 
-#define FEATURE_CONTROL_LOCK  0x00000001
-#define FEATURE_CONTROL_VMXON 0x00000004
-#define FEATURE_CONTROL_VALID ( FEATURE_CONTROL_LOCK | FEATURE_CONTROL_VMXON )
 
 
-#define CPUID_1_ECX_VTXFLAG 0x00000020
 
 
 struct vmx_pin_ctrls {
@@ -168,15 +151,6 @@ struct vmx_entry_ctrls {
     } __attribute__((packed));
 } __attribute__((packed));
 
-struct vmx_basic_msr {
-    uint32_t revision;
-    uint_t regionSize   : 13;
-    uint_t rsvd1        : 4; // Always 0
-    uint_t physWidth    : 1;
-    uint_t smm          : 1; // Always 1
-    uint_t memType      : 4;
-    uint_t rsvd2        : 10; // Always 0
-}  __attribute__((packed));
 
 typedef enum { 
     VMXASSIST_DISABLED,
diff --git a/palacios/include/palacios/vmx_ept.h b/palacios/include/palacios/vmx_ept.h
new file mode 100644 (file)
index 0000000..55cb363
--- /dev/null
@@ -0,0 +1,124 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+#ifndef __VMX_EPT_H__
+#define __VMX_EPT_H__
+
+
+#ifdef __V3VEE__
+
+/* The actual format of these data structures is specified as being machine 
+   dependent. Thus the lengths of the base address fields are defined as variable. 
+   To be safe we assume the maximum(?) size fields 
+*/
+
+
+typedef struct vmx_eptp {
+    uint8_t psmt            : 3;
+    uint8_t pwl1            : 3;
+    uint8_t rsvd1           : 6;
+    uint64_t pml_base_addr  : 39;
+    uint16_t rsvd2          : 13;
+} __attribute__((packed)) vmx_eptp_t;
+
+
+typedef struct vmx_pml4 {
+    uint8_t read            : 1;
+    uint8_t write           : 1;
+    uint8_t exec            : 1;
+    uint8_t rsvd1           : 5;
+    uint8_t ignore1         : 4;
+    uint64_t pdp_base_addr  : 39;
+    uint8_t rsvd2           : 1;
+    uint32_t ignore2        : 12;
+} __attribute__((packed)) vmx_pml4_t;
+
+
+typedef struct vmx_pdp_1GB {
+    uint8_t read            : 1;
+    uint8_t write           : 1;
+    uint8_t exec            : 1;
+    uint8_t mt              : 3;
+    uint8_t ipat            : 1;
+    uint8_t large_page      : 1;
+    uint8_t ignore1         : 4;
+    uint32_t rsvd1          : 18;
+    uint32_t page_base_addr : 21;
+    uint8_t rsvd2           : 1;
+    uint32_t ignore2        : 12;
+} __attribute__((packed)) vmx_pdp_1GB_t;
+
+typedef struct vmx_pdp {
+    uint8_t read            : 1;
+    uint8_t write           : 1;
+    uint8_t exec            : 1;
+    uint8_t rsvd1           : 4;
+    uint8_t large_page      : 1;
+    uint8_t ignore1         : 4;
+    uint32_t page_base_addr : 39;
+    uint8_t rsvd2           : 1;
+    uint32_t ignore2        : 12;
+} __attribute__((packed)) vmx_pdp_t;
+
+
+typedef struct vmx_pde_2MB {
+    uint8_t read            : 1;
+    uint8_t write           : 1;
+    uint8_t exec            : 1;
+    uint8_t mt              : 3;
+    uint8_t ipat            : 1;
+    uint8_t large_page      : 1;
+    uint8_t ignore1         : 4;
+    uint32_t rsvd1          : 9;
+    uint32_t page_base_addr : 30;
+    uint8_t rsvd2           : 1;
+    uint32_t ignore2        : 12;
+} __attribute__((packed)) vmx_pde_2MB_t;
+
+
+typedef struct vmx_pde {
+    uint8_t read            : 1;
+    uint8_t write           : 1;
+    uint8_t exec            : 1;
+    uint8_t rsvd1           : 4;
+    uint8_t large_page      : 1;
+    uint8_t ignore1         : 4;
+    uint32_t page_base_addr : 39;
+    uint8_t rsvd2           : 1;
+    uint32_t ignore2        : 12;
+} __attribute__((packed)) vmx_pde_t;
+
+
+
+typedef struct vmx_pte {
+    uint8_t read            : 1;
+    uint8_t write           : 1;
+    uint8_t exec            : 1;
+    uint8_t mt              : 3;
+    uint8_t ipat            : 1;
+    uint8_t ignore1         : 5;
+    uint32_t page_base_addr : 39;
+    uint8_t rsvd2           : 1;
+    uint32_t ignore2        : 12;
+} __attribute__((packed)) vmx_pte_t;
+
+#endif 
+
+#endif
+
diff --git a/palacios/include/palacios/vmx_hw_info.h b/palacios/include/palacios/vmx_hw_info.h
new file mode 100644 (file)
index 0000000..e130545
--- /dev/null
@@ -0,0 +1,169 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+#ifndef __VMX_HW_INFO_H__
+#define __VMX_HW_INFO_H__
+
+#ifdef __V3VEE__
+
+
+
+#define VMX_BASIC_MSR               0x00000480
+#define VMX_PINBASED_CTLS_MSR       0x00000481
+#define VMX_PROCBASED_CTLS_MSR      0x00000482
+#define VMX_EXIT_CTLS_MSR           0x00000483
+#define VMX_ENTRY_CTLS_MSR          0x00000484
+#define VMX_MISC_MSR                0x00000485
+#define VMX_CR0_FIXED0_MSR          0x00000486
+#define VMX_CR0_FIXED1_MSR          0x00000487
+#define VMX_CR4_FIXED0_MSR          0x00000488
+#define VMX_CR4_FIXED1_MSR          0x00000489
+#define VMX_VMCS_ENUM_MSR           0x0000048A
+#define VMX_PROCBASED_CTLS2_MSR     0x0000048B
+#define VMX_EPT_VPID_CAP_MSR        0x0000048C
+#define VMX_TRUE_PINBASED_CTLS_MSR  0x0000048D
+#define VMX_TRUE_PROCBASED_CTLS_MSR 0x0000048E
+#define VMX_TRUE_EXIT_CTLS_MSR      0x0000048F
+#define VMX_TRUE_ENTRY_CTLS_MSR     0x00000490
+
+
+
+struct vmx_basic_msr {
+    union {
+       struct {
+           uint32_t lo;
+           uint32_t hi;
+       } __attribute__((packed));
+
+       struct {    uint32_t revision;
+           uint32_t regionSize   : 13;
+           uint8_t rsvd1         : 3; /* Always 0 */
+           uint8_t physWidth     : 1; /* VMCS address field widths 
+                                         (1=32bits, 0=natural width) */
+           uint8_t smm           : 1;
+           uint8_t memType       : 4; /* 0 = UC, 6 = WriteBack */
+           uint8_t io_str_info   : 1;
+           uint8_t def1_maybe_0  : 1; /* 1="Any VMX ctrls that default to 1 may be cleared to 0" */
+           uint32_t rsvd2        : 8; /* Always 0 */
+       }  __attribute__((packed));
+    }  __attribute__((packed));
+}  __attribute__((packed));
+
+
+struct vmx_misc_msr {
+    union {
+       struct {
+           uint32_t lo;
+           uint32_t hi;
+       } __attribute__((packed));
+
+       struct {
+           uint8_t tsc_multiple       : 5; /* Bit position in TSC field that drives vmx timer step */
+           uint8_t exits_store_LMA    : 1;
+           uint8_t can_halt           : 1;
+           uint8_t can_shtdown        : 1;
+           uint8_t can_wait_for_sipi  : 1;
+           uint8_t rsvd1              : 7;
+           uint16_t num_cr3_targets   : 9;
+           uint8_t max_msr_cache_size : 3; /* (512 * (max_msr_cache_size + 1)) == max msr load/store list size */
+           uint8_t SMM_ctrl_avail     : 1;
+           uint8_t rsvd2              : 3; 
+           uint32_t MSEG_rev_id;
+       }  __attribute__((packed));
+    }  __attribute__((packed));
+} __attribute__((packed));
+
+
+struct vmx_ept_msr {
+    union {
+       struct {
+           uint32_t lo;
+           uint32_t hi;
+       } __attribute__((packed));
+
+       struct {
+           uint8_t exec_only_ok             : 1;
+           uint8_t rsvd1                    : 5;
+           uint8_t pg_walk_len4             : 1; /* support for a page walk of length 4 */
+           uint8_t rsvd2                    : 1;
+           uint8_t ept_uc_ok                : 1; /* EPT page tables can be uncacheable */
+           uint8_t rsvd3                    : 5;
+           uint8_t ept_wb_ok                : 1; /* EPT page tables can be writeback */
+           uint8_t rsvd4                    : 1;
+           uint8_t ept_2MB_ok               : 1; /* 2MB EPT pages supported */
+           uint8_t ept_1GB_ok               : 1; /* 1GB EPT pages supported */
+           uint8_t rsvd5                    : 2;
+           uint8_t INVEPT_avail             : 1; /* INVEPT instruction is available */
+           uint8_t rsvd6                    : 4;
+           uint8_t INVEPT_single_ctx_avail  : 1;
+           uint8_t INVEPT_all_ctx_avail     : 1;
+           uint8_t rsvd7                    : 5;
+           uint8_t INVVPID_avail            : 1;
+           uint8_t rsvd8                    : 7;
+           uint8_t INVVPID_1addr_avail      : 1;
+           uint8_t INVVPID_single_ctx_avail : 1;
+           uint8_t INVVPID_all_ctx_avail    : 1;
+           uint8_t INVVPID_single_ctx_w_glbls_avail : 1;
+           uint32_t rsvd9                   : 20;
+       }  __attribute__((packed));
+    }  __attribute__((packed));
+}__attribute__((packed));
+
+
+struct vmx_ctrl_field {
+    uint32_t def_val;
+    uint32_t req_val;  /* Required values: field_val & req_mask == req_val */ 
+    uint32_t req_mask; /* If a mask bit is set it's value is restricted (i.e. the VMM cannot change it) */
+};
+
+
+struct vmx_cr_field {
+    uint64_t def_val;
+    uint64_t req_val;  /* Required values: field_val & req_mask == req_val */ 
+    uint64_t req_mask; /* If a mask bit is set it's value is restricted (i.e. the VMM cannot change it) */
+};
+
+
+
+
+struct vmx_hw_info {
+    struct vmx_basic_msr basic_info;
+    struct vmx_misc_msr misc_info;
+    struct vmx_ept_msr ept_info;
+
+    struct vmx_ctrl_field pin_ctrls;
+    struct vmx_ctrl_field proc_ctrls;
+    struct vmx_ctrl_field exit_ctrls;
+    struct vmx_ctrl_field entry_ctrls;
+    struct vmx_ctrl_field proc_ctrls_2;
+
+    struct vmx_cr_field cr0;
+    struct vmx_cr_field cr4;
+};
+
+
+int v3_init_vmx_hw(struct vmx_hw_info * hw_info);
+
+
+
+
+#endif
+
+#endif
index 6db9f17..ce6a440 100644 (file)
 
 
 
-static inline int v3_enable_vmx(addr_t vmxon_ptr) {
-    uint64_t vmxon_ptr_64 __attribute__((aligned(8))) = (uint64_t)vmxon_ptr;
-    uint8_t ret_invalid = 0;
 
-    __asm__ __volatile__ (
-                VMXON_OPCODE
-                EAX_06_MODRM
-                "setnaeb %0;" // fail invalid (CF=1)
-                : "=q"(ret_invalid)
-                : "a"(&vmxon_ptr_64),"0"(ret_invalid)
-                : "memory");
-
-    if (ret_invalid) {
-        return VMX_FAIL_INVALID;
-    } else {
-        return VMX_SUCCESS;
-    }
-}
 
 static inline int vmcs_clear(addr_t vmcs_ptr) {
     uint64_t vmcs_ptr_64 __attribute__ ((aligned(8))) = (uint64_t)vmcs_ptr;
@@ -181,6 +164,26 @@ static inline int vmcs_write(vmcs_field_t vmcs_field, addr_t value) {
     return VMX_SUCCESS;
 }
 
+
+static inline int vmx_on(addr_t vmxon_ptr) {
+    uint64_t vmxon_ptr_64 __attribute__((aligned(8))) = (uint64_t)vmxon_ptr;
+    uint8_t ret_invalid = 0;
+
+    __asm__ __volatile__ (
+                VMXON_OPCODE
+                EAX_06_MODRM
+                "setnaeb %0;" // fail invalid (CF=1)
+                : "=q"(ret_invalid)
+                : "a"(&vmxon_ptr_64),"0"(ret_invalid)
+                : "memory");
+
+    if (ret_invalid) {
+        return VMX_FAIL_INVALID;
+    } else {
+        return VMX_SUCCESS;
+    }
+}
+
 static inline int vmx_off() {
     uint8_t ret_valid = 0;
     uint8_t ret_invalid = 0;
@@ -198,6 +201,57 @@ static inline int vmx_off() {
     return VMX_SUCCESS;
 }
 
+
+static inline int enable_vmx() {
+#ifdef __V3_64BIT__
+    __asm__ __volatile__ (
+                         "movq %%cr4, %%rbx;"
+                         "orq  $0x00002000, %%rbx;"
+                         "movq %%rbx, %%cr4;"
+                         : 
+                         :
+                         : "%rbx"
+                         );
+
+
+    __asm__ __volatile__ (
+                         "movq %%cr0, %%rbx; "
+                         "orq  $0x00000020,%%rbx; "
+                         "movq %%rbx, %%cr0;"
+                         :
+                         :
+                         : "%rbx"
+                         );
+#elif __V3_32BIT__
+    __asm__ __volatile__ (
+                         "movl %%cr4, %%ecx;"
+                         "orl  $0x00002000, %%ecx;"
+                         "movl %%ecx, %%cr4;"
+                         : 
+                         :
+                         : "%ecx"
+                         );
+
+
+
+    __asm__ __volatile__ (
+                         "movl %%cr0, %%ecx; "
+                         "orl  $0x00000020,%%ecx; "
+                         "movl %%ecx, %%cr0;"
+                         :
+                         :
+                         : "%ecx"
+                         );
+    
+#endif
+
+    return 0;
+}
+
+
+
+
+
 #endif
 
 #endif
index eb15aca..35b8523 100644 (file)
@@ -43,7 +43,16 @@ config GENERIC
        bool "Generic Device"
        default y
        help 
-         Includes the Virtual Generic device
+         Includes the virtual generic device.  This device allows you
+          to see guest I/O port and memory region interaction with a physical
+          device on the underlying hardware, as well as to ignore such
+          interaction.  The generic device also serves as a front-end
+          device for non-PCI host-based virtual device implementations.  If
+          you want to handle either host-based virtual or physical devices
+          that are not PCI devices, this is what you want.  If you want
+          to handle a host-based virtual device that is a PCI device, you  
+          want to use the PCI front-end device.  If you want to handle
+          a physical PCI device, you want the passthrough PCI device.  
 
 config DEBUG_GENERIC
        bool "Generic device Debugging"
@@ -156,7 +165,7 @@ config LINUX_VIRTIO_VNET
         default n
         depends on PCI && EXPERIMENTAL && VNET
         help
-          Enable the Virtio VNET interface
+          Enable the Virtio VNET interface for Control VM
 
 config DEBUG_LINUX_VIRTIO_VNET
         bool "Virtio VNET Interface Debugging"
@@ -167,11 +176,11 @@ config DEBUG_LINUX_VIRTIO_VNET
 
 
 config VNET_NIC
-        bool "Enable VNET VIrtio NIC Device"
+        bool "Enable VNET Backend Device"
         default n
        depends on PCI && EXPERIMENTAL && VNET
         help
-          Enable the VNET Virtio backend device
+          Enable the VNET backend device
 
 config DEBUG_VNET_NIC
         bool "VNET NIC Device Debugging"
@@ -267,6 +276,7 @@ config PASSTHROUGH_PCI
        help 
          Enables hardware devices to be passed through to the VM
 
+
 config DEBUG_PCI
        bool "PCI debugging"
        depends on PCI && DEBUG_ON
@@ -274,6 +284,26 @@ config DEBUG_PCI
          Enable debugging for the PCI  
 
 
+config PCI_FRONT
+       bool "PCI front-end device"
+       default y 
+       depends on PCI && HOST_DEVICE
+       help 
+         PCI front-end device for a host-based PCI device implementation
+          This device allows you to project a host-based *virtual* device 
+          into the guest as a PCI device.   If you want to project a 
+          physical PCI device, use Passthrough PCI instead.  If you want
+          to project a non-PCI virtual or physical device, 
+          use the generic device.
+          
+
+config DEBUG_PCI_FRONT
+       bool "PCI front-end debugging"
+       depends on PCI_FRONT && DEBUG_ON
+       help 
+         Enable debugging for the PCI front-end device 
+          
+
 
 config PIC
        bool "8259A PIC"
index f5b40be..51b43e9 100644 (file)
@@ -45,3 +45,5 @@ obj-$(CONFIG_MCHECK) += mcheck.o
 
 obj-$(CONFIG_VGA) += vga.o
 
+obj-$(CONFIG_PCI_FRONT) += pci_front.o
+
index b7d1a18..73b778a 100644 (file)
 #include <palacios/vmm_list.h>
 #include <palacios/vmm_io.h>
 #include <palacios/vmm_dev_mgr.h>
+#include <palacios/vm_guest_mem.h>
+
+#ifdef CONFIG_HOST_DEVICE
+#include <interfaces/vmm_host_dev.h>
+#endif
 
 #ifndef CONFIG_DEBUG_GENERIC
 #undef PrintDebug
 #define PrintDebug(fmt, args...)
 #endif
 
+#define MAX_NAME      32
+#define MAX_MEM_HOOKS 16
 
 typedef enum {GENERIC_IGNORE, 
              GENERIC_PASSTHROUGH, 
@@ -37,32 +44,64 @@ typedef enum {GENERIC_IGNORE,
              GENERIC_PRINT_AND_IGNORE} generic_mode_t;
 
 struct generic_internal {
+    enum {GENERIC_PHYSICAL, GENERIC_HOST} forward_type;
+#ifdef CONFIG_HOST_DEVICE
+    v3_host_dev_t                         host_dev;
+#endif
+    struct vm_device                      *dev; // me
+
+    char                                  name[MAX_NAME];
+    
+    uint32_t                              num_mem_hooks;
+    addr_t                                mem_hook[MAX_MEM_HOOKS];
 };
 
 
 
 
-static int generic_write_port_passthrough(struct guest_info * core, uint16_t port, void * src, 
-                                         uint_t length, void * priv_data) {
+static int generic_write_port_passthrough(struct guest_info * core, 
+                                         uint16_t port, 
+                                         void * src, 
+                                         uint_t length, 
+                                         void * priv_data) 
+{
+    struct generic_internal *state = (struct generic_internal *) priv_data;
     uint_t i;
 
-    switch (length) {
-       case 1:
-           v3_outb(port, ((uint8_t *)src)[0]);
-           break;
-       case 2:
-           v3_outw(port, ((uint16_t *)src)[0]);
+    switch (state->forward_type) { 
+       case GENERIC_PHYSICAL:
+           switch (length) {
+               case 1:
+                   v3_outb(port, ((uint8_t *)src)[0]);
+                   break;
+               case 2:
+                   v3_outw(port, ((uint16_t *)src)[0]);
+                   break;
+               case 4:
+                   v3_outdw(port, ((uint32_t *)src)[0]);
+                   break;
+               default:
+                   for (i = 0; i < length; i++) { 
+                       v3_outb(port, ((uint8_t *)src)[i]);
+                   }
+                   break;
+           }
+           return length;
            break;
-       case 4:
-           v3_outdw(port, ((uint32_t *)src)[0]);
+#ifdef CONFIG_HOST_DEVICE
+       case GENERIC_HOST:
+           if (state->host_dev) { 
+               return v3_host_dev_write_io(state->host_dev,port,src,length);
+           } else {
+               return -1;
+           }
            break;
+#endif
        default:
-           for (i = 0; i < length; i++) { 
-               v3_outb(port, ((uint8_t *)src)[i]);
-           }
+           PrintError("generic (%s): unknown forwarding type\n", state->name);
+           return -1;
+           break;
     }
-
-    return length;
 }
 
 static int generic_write_port_print_and_passthrough(struct guest_info * core, uint16_t port, void * src, 
@@ -70,7 +109,16 @@ static int generic_write_port_print_and_passthrough(struct guest_info * core, ui
     uint_t i;
     int rc;
 
-    PrintDebug("generic: writing 0x");
+#ifdef CONFIG_DEBUG_GENERIC
+    struct generic_internal *state = (struct generic_internal *) priv_data;
+#endif
+
+    PrintDebug("generic (%s): writing 0x%x bytes to port 0x%x using %s ...", state->name,
+              length, port,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" :
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+
+    PrintDebug("generic (%s): writing 0x", state->name);
 
     for (i = 0; i < length; i++) { 
        PrintDebug("%x", ((uint8_t *)src)[i]);
@@ -85,35 +133,63 @@ static int generic_write_port_print_and_passthrough(struct guest_info * core, ui
     return rc;
 }
 
-static int generic_read_port_passthrough(struct guest_info * core, uint16_t port, void * src, 
-                                        uint_t length, void * priv_data) {
+static int generic_read_port_passthrough(struct guest_info * core, 
+                                        uint16_t port, 
+                                        void * dst, 
+                                        uint_t length, 
+                                        void * priv_data) 
+{
+    struct generic_internal *state = (struct generic_internal *) priv_data;
+
     uint_t i;
 
-    switch (length) {
-       case 1:
-           ((uint8_t *)src)[0] = v3_inb(port);
-           break;
-       case 2:
-           ((uint16_t *)src)[0] = v3_inw(port);
+    switch (state->forward_type) { 
+       case GENERIC_PHYSICAL:
+           switch (length) {
+               case 1:
+                   ((uint8_t *)dst)[0] = v3_inb(port);
+                   break;
+               case 2:
+                   ((uint16_t *)dst)[0] = v3_inw(port);
+                   break;
+               case 4:
+                   ((uint32_t *)dst)[0] = v3_indw(port);
+                   break;
+               default:
+                   for (i = 0; i < length; i++) { 
+                       ((uint8_t *)dst)[i] = v3_inb(port);
+                   }
+           }
+           return length;
            break;
-       case 4:
-           ((uint32_t *)src)[0] = v3_indw(port);
+#ifdef CONFIG_HOST_DEVICE
+       case GENERIC_HOST:
+           if (state->host_dev) { 
+               return v3_host_dev_read_io(state->host_dev,port,dst,length);
+           }
            break;
+#endif
        default:
-           for (i = 0; i < length; i++) { 
-               ((uint8_t *)src)[i] = v3_inb(port);
-           }
+           PrintError("generic (%s): unknown forwarding type\n", state->name);
+           return -1;
+           break;
     }
 
-    return length;
+    return -1;
 }
 
 static int generic_read_port_print_and_passthrough(struct guest_info * core, uint16_t port, void * src, 
                                                   uint_t length, void * priv_data) {
     uint_t i;
     int rc;
-    
-    PrintDebug("generic: reading 0x%x bytes from port 0x%x ...", length, port);
+
+#ifdef CONFIG_DEBUG_GENERIC
+    struct generic_internal *state = (struct generic_internal *) priv_data;
+#endif
+
+    PrintDebug("generic (%s): reading 0x%x bytes from port 0x%x using %s ...", state->name, length, port,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" :
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
 
 
     rc=generic_read_port_passthrough(core,port,src,length,priv_data);
@@ -141,7 +217,14 @@ static int generic_read_port_ignore(struct guest_info * core, uint16_t port, voi
 static int generic_read_port_print_and_ignore(struct guest_info * core, uint16_t port, void * src, 
                                              uint_t length, void * priv_data) {
    
-    PrintDebug("generic: reading 0x%x bytes from port 0x%x ...", length, port);
+#ifdef CONFIG_DEBUG_GENERIC
+    struct generic_internal *state = (struct generic_internal *) priv_data;
+#endif
+
+    PrintDebug("generic (%s): reading 0x%x bytes from port 0x%x using %s ...", state->name, length, port,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" :
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+
 
     memset((uint8_t *)src, 0, length);
     PrintDebug(" ignored (return zeroed buffer)\n");
@@ -159,8 +242,14 @@ static int generic_write_port_print_and_ignore(struct guest_info * core, uint16_
                                              uint_t length, void * priv_data) {
     int i;
 
-    PrintDebug("generic: writing 0x%x bytes to port 0x%x ", length, port);
+#ifdef CONFIG_DEBUG_GENERIC
+    struct generic_internal *state = (struct generic_internal *) priv_data;
+#endif
 
+    PrintDebug("generic (%s): writing 0x%x bytes to port 0x%x using %s ", state->name, length, port,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" :
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+    
     memset((uint8_t *)src, 0, length);
     PrintDebug(" ignored - data was: 0x");
 
@@ -175,10 +264,197 @@ static int generic_write_port_print_and_ignore(struct guest_info * core, uint16_
 
 
 
+static int generic_write_mem_passthrough(struct guest_info * core, 
+                                        addr_t              gpa,
+                                        void              * src,
+                                        uint_t              len,
+                                        void              * priv)
+{
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+    
+    switch (state->forward_type) { 
+       case GENERIC_PHYSICAL:
+           memcpy(V3_VAddr((void*)gpa),src,len);
+           return len;
+           break;
+#ifdef CONFIG_HOST_DEVICE
+       case GENERIC_HOST:
+           if (state->host_dev) { 
+               return v3_host_dev_write_mem(state->host_dev,gpa,src,len);
+           } else {
+               return -1;
+           }
+           break;
+#endif
+       default:
+           PrintError("generic (%s): unknown forwarding type\n", state->name);
+           return -1;
+           break;
+    }
+}
 
-static int generic_free(struct generic_internal * state) {
-    PrintDebug("generic: deinit_device\n");
+static int generic_write_mem_print_and_passthrough(struct guest_info * core, 
+                                                  addr_t              gpa,
+                                                  void              * src,
+                                                  uint_t              len,
+                                                  void              * priv)
+{
+#ifdef CONFIG_DEBUG_GENERIC
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+#endif
+
+    PrintDebug("generic (%s): writing %u bytes to GPA 0x%p via %s ... ", state->name,
+              len,(void*)gpa,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" : 
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+    
+    int rc = generic_write_mem_passthrough(core,gpa,src,len,priv);
+
+    PrintDebug("done\n");
+    
+    return rc;
+}
 
+static int generic_write_mem_ignore(struct guest_info * core, 
+                                   addr_t              gpa,
+                                   void              * src,
+                                   uint_t              len,
+                                   void              * priv)
+{
+    return len;
+}
+
+static int generic_write_mem_print_and_ignore(struct guest_info * core, 
+                                             addr_t              gpa,
+                                             void              * src,
+                                             uint_t              len,
+                                             void              * priv)
+{
+#ifdef CONFIG_DEBUG_GENERIC
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+#endif
+
+    PrintDebug("generic (%s): ignoring write of %u bytes to GPA 0x%p via %s", state->name,
+              len,(void*)gpa,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" : 
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+    
+    return len;
+}
+
+static int generic_read_mem_passthrough(struct guest_info * core, 
+                                       addr_t              gpa,
+                                       void              * dst,
+                                       uint_t              len,
+                                       void              * priv)
+{
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+    
+    switch (state->forward_type) { 
+       case GENERIC_PHYSICAL:
+           memcpy(dst,V3_VAddr((void*)gpa),len);
+           return len;
+           break;
+#ifdef CONFIG_HOST_DEVICE
+       case GENERIC_HOST:
+           if (state->host_dev) { 
+               return v3_host_dev_read_mem(state->host_dev,gpa,dst,len);
+           } else {
+               return -1;
+           }
+           break;
+#endif
+       default:
+           PrintError("generic (%s): unknown forwarding type\n", state->name);
+           break;
+    }
+    
+    return -1;
+}
+
+static int generic_read_mem_print_and_passthrough(struct guest_info * core, 
+                                                 addr_t              gpa,
+                                                 void              * dst,
+                                                 uint_t              len,
+                                                 void              * priv)
+{
+#ifdef CONFIG_DEBUG_GENERIC
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+#endif
+
+    PrintDebug("generic (%s): attempting to read %u bytes from GPA 0x%p via %s ... ", state->name,
+              len,(void*)gpa,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" : 
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+    
+    int rc = generic_read_mem_passthrough(core,gpa,dst,len,priv);
+
+    PrintDebug("done - read %d bytes\n", rc);
+    
+    return rc;
+}
+
+static int generic_read_mem_ignore(struct guest_info * core, 
+                                  addr_t              gpa,
+                                  void              * dst,
+                                  uint_t              len,
+                                  void              * priv)
+{
+#ifdef CONFIG_DEBUG_GENERIC
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+#endif
+
+    PrintDebug("generic (%s): ignoring attempt to read %u bytes from GPA 0x%p via %s ... ", state->name,
+              len,(void*)gpa,
+              state->forward_type == GENERIC_PHYSICAL ? "physical" : 
+              state->forward_type == GENERIC_HOST ? "host" : "UNKNOWN");
+
+    memset((uint8_t *)dst, 0, len);
+
+    PrintDebug("returning zeros\n");
+
+    return len;
+}
+
+
+static int generic_read_mem_print_and_ignore(struct guest_info * core, 
+                                            addr_t              gpa,
+                                            void              * dst,
+                                            uint_t              len,
+                                            void              * priv)
+{
+    memset((uint8_t *)dst, 0, len);
+    return len;
+}
+
+
+static int generic_free(struct generic_internal * state) {
+    int i;
+    
+    PrintDebug("generic (%s): deinit_device\n", state->name);
+    
+#ifdef CONFIG_HOST_DEVICE
+    if (state->host_dev) { 
+       v3_host_dev_close(state->host_dev);
+       state->host_dev=0;
+    }
+#endif
+    
+    // Note that the device manager handles unhooking the I/O ports
+    // We need to handle unhooking memory regions    
+    for (i=0;i<state->num_mem_hooks;i++) {
+       if (v3_unhook_mem(state->dev->vm,V3_MEM_CORE_ANY,state->mem_hook[i])<0) { 
+           PrintError("generic (%s): unable to unhook memory starting at 0x%p\n", state->name,(void*)(state->mem_hook[i]));
+           return -1;
+       }
+    }
+            
     V3_Free(state);
     return 0;
 }
@@ -197,73 +473,223 @@ static struct v3_device_ops dev_ops = {
 static int add_port_range(struct vm_device * dev, uint_t start, uint_t end, generic_mode_t mode) {
     uint_t i = 0;
 
-    PrintDebug("generic: Adding Port Range: 0x%x to 0x%x as %s\n", 
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+
+    PrintDebug("generic (%s): adding port range 0x%x to 0x%x as %s\n", state->name,
               start, end, 
-              (mode == GENERIC_PRINT_AND_PASSTHROUGH) ? "print-and-passthrough" : "print-and-ignore");
-    
+              (mode == GENERIC_PRINT_AND_PASSTHROUGH) ? "print-and-passthrough" : 
+              (mode == GENERIC_PRINT_AND_IGNORE) ? "print-and-ignore" :
+              (mode == GENERIC_PASSTHROUGH) ? "passthrough" :
+              (mode == GENERIC_IGNORE) ? "ignore" : "UNKNOWN");
+       
     for (i = start; i <= end; i++) { 
-       if (mode == GENERIC_PRINT_AND_PASSTHROUGH) { 
-           if (v3_dev_hook_io(dev, i, 
-                               &generic_read_port_print_and_passthrough, 
-                               &generic_write_port_print_and_passthrough) == -1) { 
-               PrintError("generic: can't hook port 0x%x (already hooked?)\n", i);
+       switch (mode) { 
+           case GENERIC_PRINT_AND_PASSTHROUGH:
+               if (v3_dev_hook_io(dev, i, 
+                                  &generic_read_port_print_and_passthrough, 
+                                  &generic_write_port_print_and_passthrough) == -1) { 
+                   PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i);
+                   return -1;
+               }
+               break;
+               
+           case GENERIC_PRINT_AND_IGNORE:
+               if (v3_dev_hook_io(dev, i, 
+                                  &generic_read_port_print_and_ignore, 
+                                  &generic_write_port_print_and_ignore) == -1) { 
+                   PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i);
+                   return -1;
+               }
+               break;
+           case GENERIC_PASSTHROUGH:
+               if (v3_dev_hook_io(dev, i, 
+                                  &generic_read_port_passthrough, 
+                                  &generic_write_port_passthrough) == -1) { 
+                   PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i);
+                   return -1;
+               }
+               break;
+           case  GENERIC_IGNORE:
+               if (v3_dev_hook_io(dev, i, 
+                                  &generic_read_port_ignore, 
+                                  &generic_write_port_ignore) == -1) { 
+                   PrintError("generic (%s): can't hook port 0x%x (already hooked?)\n", state->name, i);
+                   return -1;
+               }
+               break;
+           default:
+               PrintError("generic (%s): huh?\n", state->name);
+               break;
+       }
+    }
+    
+    return 0;
+}
+
+
+static int add_mem_range(struct vm_device * dev, addr_t start, addr_t end, generic_mode_t mode) {
+
+    struct generic_internal *state = (struct generic_internal *) dev->private_data;
+
+    PrintDebug("generic (%s): adding memory range 0x%p to 0x%p as %s\n", state->name,
+              (void*)start, (void*)end, 
+              (mode == GENERIC_PRINT_AND_PASSTHROUGH) ? "print-and-passthrough" : 
+              (mode == GENERIC_PRINT_AND_IGNORE) ? "print-and-ignore" :
+              (mode == GENERIC_PASSTHROUGH) ? "passthrough" :
+              (mode == GENERIC_IGNORE) ? "ignore" : "UNKNOWN");
+       
+    switch (mode) { 
+       case GENERIC_PRINT_AND_PASSTHROUGH:
+           if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1, 
+                                &generic_read_mem_print_and_passthrough, 
+                                &generic_write_mem_print_and_passthrough, dev) == -1) { 
+               PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end);
                return -1;
            }
-       } else if (mode == GENERIC_PRINT_AND_IGNORE) { 
-           if (v3_dev_hook_io(dev, i, 
-                               &generic_read_port_print_and_ignore, 
-                               &generic_write_port_print_and_ignore) == -1) { 
-               PrintError("generic: can't hook port 0x%x (already hooked?)\n", i);
+           break;
+           
+       case GENERIC_PRINT_AND_IGNORE:
+           if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1, 
+                                &generic_read_mem_print_and_ignore, 
+                                &generic_write_mem_print_and_ignore, dev) == -1) { 
+               PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end);
                return -1;
            }
-       } else if (mode == GENERIC_PASSTHROUGH) { 
-           if (v3_dev_hook_io(dev, i, 
-                               &generic_read_port_passthrough, 
-                               &generic_write_port_passthrough) == -1) { 
-               PrintError("generic: can't hook port 0x%x (already hooked?)\n", i);
+           break;
+
+       case GENERIC_PASSTHROUGH:
+           if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1, 
+                                &generic_read_mem_passthrough, 
+                                &generic_write_mem_passthrough, dev) == -1) { 
+               PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end);
                return -1;
            }
-       } else if (mode == GENERIC_IGNORE) { 
-           if (v3_dev_hook_io(dev, i, 
-                               &generic_read_port_ignore, 
-                               &generic_write_port_ignore) == -1) { 
-               PrintError("generic: can't hook port 0x%x (already hooked?)\n", i);
+           break;
+
+       case  GENERIC_IGNORE:
+           if (v3_hook_full_mem(dev->vm, V3_MEM_CORE_ANY, start, end+1, 
+                                &generic_read_mem_ignore, 
+                                &generic_write_mem_ignore, dev) == -1) { 
+               PrintError("generic (%s): can't hook memory region 0x%p to 0x%p\n", state->name,(void*)start,(void*)end);
                return -1;
            }
-       } 
+           break;
+       default:
+           PrintError("generic (%s): huh?\n",state->name);
+           break;
     }
-    
+
     return 0;
 }
 
 
 
+/*
+   The device can be used to forward to the underlying physical device 
+   or to a host device that has a given url.   Both memory and ports can be forwarded as
+
+        GENERIC_PASSTHROUGH => send writes and reads to physical device or host
+        GENERIC_PRINT_AND_PASSTHROUGH => also print what it's doing
+
+        GENERIC_IGNORE => ignore writes and reads
+        GENERIC_PRINT_AND_PASSTHROUGH => also print what it's doing
+
+
+       The purpose of the "PRINT" variants is to make it easy to spy on
+       device interactions (although you will not see DMA or interrupts)
+
 
+   <device class="generic" id="my_id" 
+         empty | forward="physical_device" or forward="host_device" host_device="url">
+
+  (empty implies physical_dev)
+
+     <ports>
+         <start>portno1</start>
+         <end>portno2</end>   => portno1 through portno2 (inclusive)
+         <mode>PRINT_AND_PASSTHROUGH</mode>  (as above)
+     </ports>
+
+     <memory>
+         <start>gpa1</start>
+         <end>gpa2</end>     => memory addreses gpa1 through gpa2 (inclusive); page granularity
+         <mode> ... as above </mode>
+     </memory>
+
+*/
 
 static int generic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
     struct generic_internal * state = NULL;
     char * dev_id = v3_cfg_val(cfg, "ID");
+    char * forward = v3_cfg_val(cfg, "forward");
+#ifdef CONFIG_HOST_DEVICE
+    char * host_dev = v3_cfg_val(cfg, "hostdev");
+#endif
     v3_cfg_tree_t * port_cfg = v3_cfg_subtree(cfg, "ports");
+    v3_cfg_tree_t * mem_cfg = v3_cfg_subtree(cfg, "memory");
 
 
     state = (struct generic_internal *)V3_Malloc(sizeof(struct generic_internal));
 
     if (state == NULL) {
-       PrintError("Could not allocate generic state\n");
+       PrintError("generic (%s): could not allocate generic state\n",dev_id);
        return -1;
     }
     
     memset(state, 0, sizeof(struct generic_internal));
+    strncpy(state->name,dev_id,MAX_NAME);
+
+    if (!forward) { 
+       state->forward_type=GENERIC_PHYSICAL;
+    } else {
+       if (!strcasecmp(forward,"physical_device")) { 
+           state->forward_type=GENERIC_PHYSICAL;
+       } else if (!strcasecmp(forward,"host_device")) { 
+#ifdef CONFIG_HOST_DEVICE
+           state->forward_type=GENERIC_HOST;
+#else
+           PrintError("generic (%s): cannot configure host device since host device support is not built in\n", state->name);
+           V3_Free(state);
+           return -1;
+#endif
+       } else {
+           PrintError("generic (%s): unknown forwarding type \"%s\"\n", state->name, forward);
+           V3_Free(state);
+           return -1;
+       }
+    }
     
     struct vm_device * dev = v3_add_device(vm, dev_id, &dev_ops, state);
 
     if (dev == NULL) {
-       PrintError("Could not attach device %s\n", dev_id);
+       PrintError("generic: could not attach device %s\n", state->name);
        V3_Free(state);
        return -1;
     }
 
-    PrintDebug("generic: init_device\n");
+    state->dev=dev;
+
+
+#ifdef CONFIG_HOST_DEVICE
+    if (state->forward_type==GENERIC_HOST) { 
+       if (!host_dev) { 
+           PrintError("generic (%s): host forwarding requested, but no host device given\n", state->name);
+           v3_remove_device(dev);
+           return -1;
+       } else {
+           state->host_dev = v3_host_dev_open(host_dev,V3_BUS_CLASS_DIRECT,dev,vm);
+           if (!(state->host_dev)) { 
+               PrintError("generic (%s): unable to open host device \"%s\"\n", state->name,host_dev);
+               v3_remove_device(dev);
+               return -1;
+           } else {
+               PrintDebug("generic (%s): successfully attached host device \"%s\"\n", state->name,host_dev);
+           }
+       }
+    }
+#endif
+
+    PrintDebug("generic (%s): init_device\n", state->name);
 
     // scan port list....
     while (port_cfg) {
@@ -271,7 +697,6 @@ static int generic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
        uint16_t end = atox(v3_cfg_val(port_cfg, "end"));
        char * mode_str = v3_cfg_val(port_cfg, "mode");
        generic_mode_t mode = GENERIC_IGNORE;
-
        if (strcasecmp(mode_str, "print_and_ignore") == 0) {
            mode = GENERIC_PRINT_AND_IGNORE;
        } else if (strcasecmp(mode_str, "print_and_passthrough") == 0) {
@@ -281,13 +706,14 @@ static int generic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
        } else if (strcasecmp(mode_str, "ignore") == 0) {
            mode = GENERIC_IGNORE;
        } else {
-           PrintError("Invalid Mode %s\n", mode_str);
+           PrintError("generic (%s): invalid mode %s in adding ports\n", state->name, mode_str);
            v3_remove_device(dev);
            return -1;
        }
        
+       
        if (add_port_range(dev, start, end, mode) == -1) {
-           PrintError("Could not add port range %d-%d\n", start, end);
+           PrintError("generic (%s): could not add port range 0x%x to 0x%x\n", state->name, start, end);
            v3_remove_device(dev);
            return -1;
        }
@@ -295,6 +721,46 @@ static int generic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
        port_cfg = v3_cfg_next_branch(port_cfg);
     }
 
+    // scan memory list....
+    while (mem_cfg) {
+       addr_t  start = atox(v3_cfg_val(mem_cfg, "start"));
+       addr_t end = atox(v3_cfg_val(mem_cfg, "end"));
+       char * mode_str = v3_cfg_val(mem_cfg, "mode");
+       generic_mode_t mode = GENERIC_IGNORE;
+
+       if (strcasecmp(mode_str, "print_and_ignore") == 0) {
+           mode = GENERIC_PRINT_AND_IGNORE;
+       } else if (strcasecmp(mode_str, "print_and_passthrough") == 0) {
+           mode = GENERIC_PRINT_AND_PASSTHROUGH;
+       } else if (strcasecmp(mode_str, "passthrough") == 0) {
+           mode = GENERIC_PASSTHROUGH;
+       } else if (strcasecmp(mode_str, "ignore") == 0) {
+           mode = GENERIC_IGNORE;
+       } else {
+           PrintError("generic (%s): invalid mode %s for adding memory\n", state->name, mode_str);
+           v3_remove_device(dev);
+           return -1;
+       }
+
+       if (state->num_mem_hooks>=MAX_MEM_HOOKS) { 
+           PrintError("generic (%s): cannot add another memory hook (increase MAX_MEM_HOOKS)\n", state->name);
+           v3_remove_device(dev);
+           return -1;
+       }
+       
+       if (add_mem_range(dev, start, end, mode) == -1) {
+           PrintError("generic (%s): could not add memory range 0x%p to 0x%p\n", state->name, (void*)start, (void*)end);
+           v3_remove_device(dev);
+           return -1;
+       }
+       
+       state->mem_hook[state->num_mem_hooks] = start;
+       state->num_mem_hooks++;
+
+       mem_cfg = v3_cfg_next_branch(port_cfg);
+    }
+    
+    PrintDebug("generic (%s): initialization complete\n", state->name);
 
     return 0;
 }
index bb13a69..07c7d7b 100644 (file)
 #define PrintDebug(fmt, args...)
 #endif
 
+#define TX_QUEUE_SIZE 4096
+#define RX_QUEUE_SIZE 4096
+#define CTRL_QUEUE_SIZE 64
+
+/* The feature bitmap for virtio nic
+  * from Linux */
+#define VIRTIO_NET_F_CSUM       0       /* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM 1       /* Guest handles pkts w/ partial csum */
+#define VIRTIO_NET_F_MAC        5       /* Host has given MAC address. */
+#define VIRTIO_NET_F_GSO        6       /* Host handles pkts w/ any GSO type */
+#define VIRTIO_NET_F_GUEST_TSO4 7       /* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6 8       /* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN  9       /* Guest can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_GUEST_UFO  10      /* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4  11      /* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6  12      /* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN   13      /* Host can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO   14      /* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF  15      /* Host can merge receive buffers. */
+#define VIRTIO_NET_F_STATUS     16      /* virtio_net_config.status available */
+
+/* Port to get virtio config */
+#define VIRTIO_NET_CONFIG 20  
+
 #define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10))
 
+/* for gso_type in virtio_net_hdr */
+#define VIRTIO_NET_HDR_GSO_NONE         0      
+#define VIRTIO_NET_HDR_GSO_TCPV4        1     /* GSO frame, IPv4 TCP (TSO) */
+#define VIRTIO_NET_HDR_GSO_UDP          3       /* GSO frame, IPv4 UDP (UFO) */
+#define VIRTIO_NET_HDR_GSO_TCPV6        4       /* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_ECN          0x80    /* TCP has ECN set */  
 
-struct virtio_net_hdr {
-       uint8_t flags;
-       
-       uint8_t gso_type;
-       uint16_t hdr_len;               /* Ethernet + IP + tcp/udp hdrs */
-       uint16_t gso_size;              /* Bytes to append to hdr_len per frame */
-       uint16_t csum_start;            /* Position to start checksumming from */
-       uint16_t csum_offset;           /* Offset after that to place checksum */
-}__attribute__((packed));
 
+/* for flags in virtio_net_hdr */
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM     1       /* Use csum_start, csum_offset */
 
-struct virtio_net_hdr_mrg_rxbuf {
-       struct virtio_net_hdr hdr;
-       uint16_t num_buffers;   /* Number of merged rx buffers */
-};
 
-       
-#define TX_QUEUE_SIZE 256
-#define RX_QUEUE_SIZE 4096
-#define CTRL_QUEUE_SIZE 64
+/* First element of the scatter-gather list, used with GSO or CSUM features */
+struct virtio_net_hdr
+{
+    uint8_t flags;
+    uint8_t gso_type;
+    uint16_t hdr_len;          /* Ethernet + IP + tcp/udp hdrs */
+    uint16_t gso_size;         /* Bytes to append to hdr_len per frame */
+    uint16_t csum_start;       /* Position to start checksumming from */
+    uint16_t csum_offset;      /* Offset after that to place checksum */
+}__attribute__((packed));
 
-#define VIRTIO_NET_F_MRG_RXBUF 15      /* Host can merge receive buffers. */
-#define VIRTIO_NET_F_MAC       5       /* Host has given MAC address. */
-#define VIRTIO_NET_F_GSO       6       /* Host handles pkts w/ any GSO type */
-#define VIRTIO_NET_F_HOST_TSO4 11      /* Host can handle TSOv4 in. */
-#define VIRTIO_NET_F_HOST_UFO  14      /* Host can handle UFO in. */
 
-/* Port to get virtio config */
-#define VIRTIO_NET_CONFIG 20  
+/* The header to use when the MRG_RXBUF 
+ * feature has been negotiated. */
+struct virtio_net_hdr_mrg_rxbuf {
+    struct virtio_net_hdr hdr;
+    uint16_t num_buffers;      /* Number of merged rx buffers */
+};
 
 struct virtio_net_config
 {
@@ -89,6 +113,7 @@ struct virtio_net_state {
     struct virtio_net_config net_cfg;
     struct virtio_config virtio_cfg;
 
+    struct v3_vm_info * vm;
     struct vm_device * dev;
     struct pci_device * pci_dev; 
     int io_range_size;
@@ -98,21 +123,23 @@ struct virtio_net_state {
     struct virtio_queue ctrl_vq;       /* idx 2*/
 
     struct v3_timer * timer;
+    void * poll_thread;
 
-    struct nic_statistics statistics;
+    struct nic_statistics stats;
 
     struct v3_dev_net_ops * net_ops;
     v3_lock_t rx_lock, tx_lock;
 
     uint8_t tx_notify, rx_notify;
     uint32_t tx_pkts, rx_pkts;
-    uint64_t past_ms;
+    uint64_t past_us;
 
     void * backend_data;
     struct virtio_dev_state * virtio_dev;
     struct list_head dev_link;
 };
 
+
 static int virtio_init_state(struct virtio_net_state * virtio) 
 {
     virtio->rx_vq.queue_size = RX_QUEUE_SIZE;
@@ -139,9 +166,10 @@ static int virtio_init_state(struct virtio_net_state * virtio)
 
     virtio->virtio_cfg.pci_isr = 0;
        
-    virtio->virtio_cfg.host_features = 0 | (1 << VIRTIO_NET_F_MAC) | 
-                                                               (1 << VIRTIO_NET_F_HOST_UFO) | 
-                                                               (1 << VIRTIO_NET_F_HOST_TSO4);
+    virtio->virtio_cfg.host_features = 0 | (1 << VIRTIO_NET_F_MAC);
+       //                                 (1 << VIRTIO_NET_F_GSO) | 
+       //                                 (1 << VIRTIO_NET_F_HOST_UFO) | 
+               //                         (1 << VIRTIO_NET_F_HOST_TSO4);
 
     if ((v3_lock_init(&(virtio->rx_lock)) == -1) ||
        (v3_lock_init(&(virtio->tx_lock)) == -1)){
@@ -152,32 +180,36 @@ static int virtio_init_state(struct virtio_net_state * virtio)
 }
 
 static int tx_one_pkt(struct guest_info * core, 
-       struct virtio_net_state * virtio, 
-       struct vring_desc * buf_desc) 
+                     struct virtio_net_state * virtio, 
+                     struct vring_desc * buf_desc) 
 {
     uint8_t * buf = NULL;
     uint32_t len = buf_desc->length;
+    int synchronize = 1; // (virtio->tx_notify == 1)?1:0;
 
     if (v3_gpa_to_hva(core, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) {
        PrintDebug("Could not translate buffer address\n");
        return -1;
     }
 
-    if(virtio->net_ops->send(buf, len, virtio->backend_data) >= 0){
-       virtio->statistics.tx_pkts ++;
-       virtio->statistics.tx_bytes += len;
+    V3_Net_Print(2, "Virtio-NIC: virtio_tx: size: %d\n", len);
+    if(v3_net_debug >= 4){
+       v3_hexdump(buf, len, NULL, 0);
+    }
 
-       return 0;
+    if(virtio->net_ops->send(buf, len, synchronize, virtio->backend_data) < 0){
+       virtio->stats.tx_dropped ++;
+       return -1;
     }
 
-    virtio->statistics.tx_dropped ++;
+    virtio->stats.tx_pkts ++;
+    virtio->stats.tx_bytes += len;
 
-    return -1;
+    return 0;
 }
 
 
-static int 
-copy_data_to_desc(struct guest_info * core, 
+static inline int copy_data_to_desc(struct guest_info * core, 
                  struct virtio_net_state * virtio_state, 
                  struct vring_desc * desc, 
                  uchar_t * buf, 
@@ -188,7 +220,7 @@ copy_data_to_desc(struct guest_info * core,
     uint8_t * desc_buf = NULL;
 
     if (v3_gpa_to_hva(core, desc->addr_gpa, (addr_t *)&(desc_buf)) == -1) {
-       PrintError("Could not translate buffer address\n");
+       PrintDebug("Could not translate buffer address\n");
        return -1;
     }
     len = (desc->length < buf_len)?(desc->length - offset):buf_len;
@@ -198,7 +230,7 @@ copy_data_to_desc(struct guest_info * core,
 }
 
 
-static int get_desc_count(struct virtio_queue * q, int index) {
+static inline int get_desc_count(struct virtio_queue * q, int index) {
     struct vring_desc * tmp_desc = &(q->desc[index]);
     int cnt = 1;
     
@@ -218,32 +250,10 @@ static inline void disable_cb(struct virtio_queue *queue) {
     queue->used->flags |= VRING_NO_NOTIFY_FLAG;
 }
 
-
-/* interrupt the guest, so the guest core get EXIT to Palacios */
-static inline void notify_guest(struct virtio_net_state * virtio){
-    v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0);
-}
-
-
-/* guest free some pkts for rx queue */
-static int handle_rx_queue_kick(struct guest_info * core, 
-                         struct virtio_net_state * virtio) 
-{
-    return 0;
-}
-
-
-static int handle_ctrl(struct guest_info * core, 
-                      struct virtio_net_state * virtio) {
-       
-    return 0;
-}
-
 static int handle_pkt_tx(struct guest_info * core, 
                         struct virtio_net_state * virtio_state) 
 {
     struct virtio_queue *q = &(virtio_state->tx_vq);
-    struct virtio_net_hdr *hdr = NULL;
     int txed = 0;
     unsigned long flags;
 
@@ -253,12 +263,16 @@ static int handle_pkt_tx(struct guest_info * core,
 
     flags = v3_lock_irqsave(virtio_state->tx_lock);
     while (q->cur_avail_idx != q->avail->index) {
+       struct virtio_net_hdr *hdr = NULL;
        struct vring_desc * hdr_desc = NULL;
        addr_t hdr_addr = 0;
        uint16_t desc_idx = q->avail->ring[q->cur_avail_idx % q->queue_size];
        int desc_cnt = get_desc_count(q, desc_idx);
-       uint32_t req_len = 0;
-       int i = 0;
+
+       if(desc_cnt > 2){
+           PrintError("VNIC: merged rx buffer not supported, desc_cnt %d\n", desc_cnt);
+           goto exit_error;
+       }
 
        hdr_desc = &(q->desc[desc_idx]);
        if (v3_gpa_to_hva(core, hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
@@ -269,25 +283,15 @@ static int handle_pkt_tx(struct guest_info * core,
        hdr = (struct virtio_net_hdr *)hdr_addr;
        desc_idx = hdr_desc->next;
 
-       if(desc_cnt > 2){
-           PrintError("VNIC: merged rx buffer not supported, desc_cnt %d\n", desc_cnt);
+       /* here we assumed that one ethernet pkt is not splitted into multiple buffer */        
+       struct vring_desc * buf_desc = &(q->desc[desc_idx]);
+       if (tx_one_pkt(core, virtio_state, buf_desc) == -1) {
+           PrintError("Virtio NIC: Error handling nic operation\n");
            goto exit_error;
        }
-
-       /* here we assumed that one ethernet pkt is not splitted into multiple virtio buffer */
-       for (i = 0; i < desc_cnt - 1; i++) {    
-           struct vring_desc * buf_desc = &(q->desc[desc_idx]);
-           if (tx_one_pkt(core, virtio_state, buf_desc) == -1) {
-               PrintError("Error handling nic operation\n");
-               goto exit_error;
-           }
-
-           req_len += buf_desc->length;
-           desc_idx = buf_desc->next;
-       }
-
+           
        q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size];
-       q->used->ring[q->used->index % q->queue_size].length = req_len; /* What do we set this to???? */
+       q->used->ring[q->used->index % q->queue_size].length = buf_desc->length; /* What do we set this to???? */
        q->used->index ++;
        
        q->cur_avail_idx ++;
@@ -296,12 +300,17 @@ static int handle_pkt_tx(struct guest_info * core,
     }
 
     v3_unlock_irqrestore(virtio_state->tx_lock, flags);
-       
+
+    //virtio_state->virtio_cfg.pci_isr == 0 && 
     if (txed && !(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
        v3_pci_raise_irq(virtio_state->virtio_dev->pci_bus, 0, virtio_state->pci_dev);
        virtio_state->virtio_cfg.pci_isr = 0x1;
 
-       virtio_state->statistics.interrupts ++;
+       virtio_state->stats.rx_interrupts ++;
+    }
+
+    if(txed > 0) {
+       V3_Net_Print(2, "Virtio Handle TX: txed pkts: %d\n", txed);
     }
 
     return 0;
@@ -386,6 +395,10 @@ static int virtio_io_write(struct guest_info *core,
                    break;
                case 1:
                    virtio_setup_queue(core, virtio, &virtio->tx_vq, pfn, page_addr);
+                   if(virtio->tx_notify == 0){
+                       disable_cb(&virtio->tx_vq);
+                       V3_THREAD_WAKEUP(virtio->poll_thread);
+                   }
                    break;
                case 2:
                    virtio_setup_queue(core, virtio, &virtio->ctrl_vq, pfn, page_addr);
@@ -408,20 +421,16 @@ static int virtio_io_write(struct guest_info *core,
            {
                uint16_t queue_idx = *(uint16_t *)src;                  
                if (queue_idx == 0){
-                   if(handle_rx_queue_kick(core, virtio) == -1){
-                       PrintError("Could not handle Virtio NIC rx kick\n");
-                       return -1;
-                   }
+                   /* receive queue refill */
+                   virtio->stats.tx_interrupts ++;
                } else if (queue_idx == 1){
                    if (handle_pkt_tx(core, virtio) == -1) {
                        PrintError("Could not handle Virtio NIC tx kick\n");
                        return -1;
                    }
+                   virtio->stats.tx_interrupts ++;
                } else if (queue_idx == 2){
-                   if (handle_ctrl(core, virtio) == -1) {
-                       PrintError("Could not handle Virtio NIC ctrl kick\n");
-                       return -1;
-                   }
+                   /* ctrl */
                } else {
                    PrintError("Wrong queue index %d\n", queue_idx);
                }       
@@ -462,7 +471,7 @@ static int virtio_io_read(struct guest_info *core,
        case HOST_FEATURES_PORT:
            if (length != 4) {
                PrintError("Illegal read length for host features\n");
-               return -1;
+               //return -1;
            }
            *(uint32_t *)dst = virtio->virtio_cfg.host_features;
            break;
@@ -541,13 +550,13 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) {
     struct virtio_net_hdr_mrg_rxbuf hdr;
     uint32_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
     uint32_t data_len;
-    uint32_t offset = 0;
+    //uint32_t offset = 0;
     unsigned long flags;
 
-#ifdef CONFIG_DEBUG_VIRTIO_NET
-    PrintDebug("Virtio-NIC: virtio_rx: size: %d\n", size);     
-    v3_hexdump(buf, size, NULL, 0);
-#endif
+    V3_Net_Print(2, "Virtio-NIC: virtio_rx: size: %d\n", size);
+    if(v3_net_debug >= 4){
+       v3_hexdump(buf, size, NULL, 0);
+    }
 
     flags = v3_lock_irqsave(virtio->rx_lock);
 
@@ -555,29 +564,28 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) {
     memset(&hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
 
     if (q->ring_avail_addr == 0) {
-       PrintDebug("Queue is not set\n");
+       V3_Net_Print(2, "Virtio NIC: RX Queue not set\n");
+       virtio->stats.rx_dropped ++;
        goto err_exit;
     }
 
     if (q->cur_avail_idx != q->avail->index){
        addr_t hdr_addr = 0;
-       uint16_t hdr_idx = q->avail->ring[q->cur_avail_idx % q->queue_size];
        uint16_t buf_idx = 0;
+       uint16_t hdr_idx = q->avail->ring[q->cur_avail_idx % q->queue_size];
        struct vring_desc * hdr_desc = NULL;
+       struct vring_desc * buf_desc = NULL;
+       uint32_t len;
 
        hdr_desc = &(q->desc[hdr_idx]);
        if (v3_gpa_to_hva(&(virtio->virtio_dev->vm->cores[0]), hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
-           PrintDebug("Could not translate receive buffer address\n");
+           V3_Net_Print(2, "Virtio NIC: Could not translate receive buffer address\n");
+           virtio->stats.rx_dropped ++;
            goto err_exit;
        }
-       hdr.num_buffers = 1;
-       memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf));
-       if (offset >= data_len) {
-           hdr_desc->flags &= ~VIRTIO_NEXT_FLAG;
-       }
 
-       struct vring_desc * buf_desc = NULL;
-       for (buf_idx = hdr_desc->next; offset < data_len; buf_idx = q->desc[hdr_idx].next) {
+#if 0 /* merged buffer */
+       for(buf_idx = hdr_desc->next; offset < data_len; buf_idx = q->desc[hdr_idx].next) {
            uint32_t len = 0;
            buf_desc = &(q->desc[buf_idx]);
 
@@ -587,33 +595,65 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) {
                buf_desc->flags = VIRTIO_NEXT_FLAG;             
            }
            buf_desc->length = len;
+           hdr.num_buffers ++;
        }
        buf_desc->flags &= ~VIRTIO_NEXT_FLAG;
-       
+       memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf));
+#endif
+
+       hdr.num_buffers = 1;
+       memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf));
+       if (data_len == 0) {
+           hdr_desc->flags &= ~VIRTIO_NEXT_FLAG;
+       }
+
+       buf_idx = hdr_desc->next;
+       buf_desc = &(q->desc[buf_idx]);
+       len = copy_data_to_desc(&(virtio->virtio_dev->vm->cores[0]), virtio, buf_desc, buf, data_len, 0);           
+       if (len < data_len) {
+           V3_Net_Print(2, "Virtio NIC: ring buffer len less than pkt size, merged buffer not supported\n");
+           virtio->stats.rx_dropped ++;
+               
+           goto err_exit;
+       }
+       buf_desc->flags &= ~VIRTIO_NEXT_FLAG;
+
        q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size];
        q->used->ring[q->used->index % q->queue_size].length = data_len + hdr_len; /* This should be the total length of data sent to guest (header+pkt_data) */
        q->used->index++;
        q->cur_avail_idx++;
 
-       virtio->statistics.rx_pkts ++;
-       virtio->statistics.rx_bytes += size;
+       virtio->stats.rx_pkts ++;
+       virtio->stats.rx_bytes += size;
     } else {
-       virtio->statistics.rx_dropped ++;
+       V3_Net_Print(2, "Virtio NIC: Guest RX queue is full\n");
+       virtio->stats.rx_dropped ++;
+
+       /* kick guest to refill the queue */
+       virtio->virtio_cfg.pci_isr = 0x1;       
+       v3_pci_raise_irq(virtio->virtio_dev->pci_bus, 0, virtio->pci_dev);
+       v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0);
+       virtio->stats.rx_interrupts ++;
        
        goto err_exit;
     }
 
+    V3_Net_Print(2, "pci_isr %d, virtio flags %d\n",  virtio->virtio_cfg.pci_isr, q->avail->flags);
+    //virtio->virtio_cfg.pci_isr == 0 && 
+
     if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
-       PrintDebug("Raising IRQ %d\n",  virtio->pci_dev->config_header.intr_line);
-       
+       V3_Net_Print(2, "Raising IRQ %d\n",  virtio->pci_dev->config_header.intr_line);
+
+       virtio->virtio_cfg.pci_isr = 0x1;       
        v3_pci_raise_irq(virtio->virtio_dev->pci_bus, 0, virtio->pci_dev);
-       virtio->virtio_cfg.pci_isr = 0x1;
-       virtio->statistics.interrupts ++;
+
+       virtio->stats.rx_interrupts ++;
     }
 
     v3_unlock_irqrestore(virtio->rx_lock, flags);
 
-    /* notify guest if guest is running */
+    /* notify guest if it is in guest mode */
+    /* ISSUE: What is gonna happen if guest thread is running on the same core as this thread? */
     if(virtio->rx_notify == 1){
        v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0);
     }
@@ -650,12 +690,21 @@ static struct v3_device_ops dev_ops = {
 };
 
 
-static void virtio_nic_poll(struct v3_vm_info * vm, int budget, void * data){
-    struct virtio_net_state * virtio = (struct virtio_net_state *)data;
+static int virtio_tx_flush(void * args){
+    struct virtio_net_state *virtio  = (struct virtio_net_state *)args;
+
+    V3_Print("Virtio TX Poll Thread Starting for %s\n", virtio->vm->name);
 
-    if(virtio->tx_notify == 0){
-       handle_pkt_tx(&(vm->cores[0]), virtio);
+    while(1){
+       if(virtio->tx_notify == 0){
+           handle_pkt_tx(&(virtio->vm->cores[0]), virtio);
+           v3_yield(NULL);
+       }else {
+           V3_THREAD_SLEEP();
+       }
     }
+
+    return 0;
 }
 
 static int register_dev(struct virtio_dev_state * virtio, 
@@ -732,31 +781,33 @@ static int register_dev(struct virtio_dev_state * virtio,
 
 #define RATE_UPPER_THRESHOLD 10  /* 10000 pkts per second, around 100Mbits */
 #define RATE_LOWER_THRESHOLD 1
-#define PROFILE_PERIOD 50 /*50ms*/
+#define PROFILE_PERIOD 10000 /*us*/
 
-/* Timer Functions */
 static void virtio_nic_timer(struct guest_info * core, 
                             uint64_t cpu_cycles, uint64_t cpu_freq, 
                             void * priv_data) {
     struct virtio_net_state * net_state = (struct virtio_net_state *)priv_data;
-    uint64_t period_ms;
+    uint64_t period_us;
+    static int profile_ms = 0;
 
-    period_ms = cpu_cycles/cpu_freq;
-    net_state->past_ms += period_ms;
+    period_us = (1000*cpu_cycles)/cpu_freq;
+    net_state->past_us += period_us;
 
-    if(net_state->past_ms >  PROFILE_PERIOD){ 
+#if 0
+    if(net_state->past_us > PROFILE_PERIOD){ 
        uint32_t tx_rate, rx_rate;
        
-       tx_rate = (net_state->statistics.tx_pkts - net_state->tx_pkts)/net_state->past_ms; /* pkts/per ms */
-       rx_rate = (net_state->statistics.rx_pkts - net_state->rx_pkts)/net_state->past_ms;
+       tx_rate = (net_state->stats.tx_pkts - net_state->tx_pkts)/(net_state->past_us/1000); /* pkts/per ms */
+       rx_rate = (net_state->stats.rx_pkts - net_state->rx_pkts)/(net_state->past_us/1000);
 
-       net_state->tx_pkts = net_state->statistics.tx_pkts;
-       net_state->rx_pkts = net_state->statistics.rx_pkts;
+       net_state->tx_pkts = net_state->stats.tx_pkts;
+       net_state->rx_pkts = net_state->stats.rx_pkts;
 
        if(tx_rate > RATE_UPPER_THRESHOLD && net_state->tx_notify == 1){
            V3_Print("Virtio NIC: Switch TX to VMM driven mode\n");
            disable_cb(&(net_state->tx_vq));
            net_state->tx_notify = 0;
+           V3_THREAD_WAKEUP(net_state->poll_thread);
        }
 
        if(tx_rate < RATE_LOWER_THRESHOLD && net_state->tx_notify == 0){
@@ -766,19 +817,30 @@ static void virtio_nic_timer(struct guest_info * core,
        }
 
        if(rx_rate > RATE_UPPER_THRESHOLD && net_state->rx_notify == 1){
-           PrintDebug("Virtio NIC: Switch RX to VMM None notify mode\n");
+           V3_Print("Virtio NIC: Switch RX to VMM None notify mode\n");
            net_state->rx_notify = 0;
        }
 
        if(rx_rate < RATE_LOWER_THRESHOLD && net_state->rx_notify == 0){
-           PrintDebug("Virtio NIC: Switch RX to VMM notify mode\n");
+           V3_Print("Virtio NIC: Switch RX to VMM notify mode\n");
            net_state->rx_notify = 1;
        }
 
-       net_state->past_ms = 0;
+       net_state->past_us = 0;
     }
-}
+#endif
 
+    profile_ms += period_us/1000;
+    if(profile_ms > 20000){
+       V3_Net_Print(1, "Virtio NIC: TX: Pkt: %lld, Bytes: %lld\n\t\tRX Pkt: %lld. Bytes: %lld\n\t\tDropped: tx %lld, rx %lld\nInterrupts: tx %d, rx %d\nTotal Exit: %lld\n",
+               net_state->stats.tx_pkts, net_state->stats.tx_bytes,
+               net_state->stats.rx_pkts, net_state->stats.rx_bytes,
+               net_state->stats.tx_dropped, net_state->stats.rx_dropped,
+               net_state->stats.tx_interrupts, net_state->stats.rx_interrupts,
+               net_state->vm->cores[0].num_exits);
+       profile_ms = 0;
+    }
+}
 
 static struct v3_timer_ops timer_ops = {
     .update_timer = virtio_nic_timer,
@@ -796,19 +858,21 @@ static int connect_fn(struct v3_vm_info * info,
     memset(net_state, 0, sizeof(struct virtio_net_state));
     register_dev(virtio, net_state);
 
+    net_state->vm = info;
     net_state->net_ops = ops;
     net_state->backend_data = private_data;
     net_state->virtio_dev = virtio;
-    net_state->tx_notify = 1;
-    net_state->rx_notify = 1;
-
+    net_state->tx_notify = 0;
+    net_state->rx_notify = 0;
+       
     net_state->timer = v3_add_timer(&(info->cores[0]),&timer_ops,net_state);
 
     ops->recv = virtio_rx;
-    ops->poll = virtio_nic_poll;
     ops->frontend_data = net_state;
     memcpy(ops->fnt_mac, virtio->mac, ETH_ALEN);
 
+    net_state->poll_thread = V3_CREATE_THREAD(virtio_tx_flush, (void *)net_state, "Virtio_Poll");
+
     return 0;
 }
 
@@ -834,14 +898,7 @@ static int virtio_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
 
     if (macstr != NULL && !str2mac(macstr, virtio_state->mac)) {
        PrintDebug("Virtio NIC: Mac specified %s\n", macstr);
-       PrintDebug("MAC: %x:%x:%x:%x:%x:%x\n", virtio_state->mac[0],
-                               virtio_state->mac[1],
-                               virtio_state->mac[2],
-                               virtio_state->mac[3],
-                               virtio_state->mac[4],
-                               virtio_state->mac[5]);
     }else {
-       PrintDebug("Virtio NIC: MAC not specified\n");
        random_ethaddr(virtio_state->mac);
     }
 
index b4b7342..87f158d 100644 (file)
@@ -303,7 +303,7 @@ static int do_tx_pkts(struct guest_info * core,
        memcpy(pkt.header, virtio_pkt->pkt, ETHERNET_HEADER_LEN);
        pkt.data = virtio_pkt->pkt;
 
-       v3_vnet_send_pkt(&pkt, NULL);
+       v3_vnet_send_pkt(&pkt, NULL, 1);
        
        q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size];
        q->used->ring[q->used->index % q->queue_size].length = pkt_desc->length; // What do we set this to????
index 9bc28d2..5eec5e6 100644 (file)
@@ -36,6 +36,7 @@ struct nic_bridge_state {
 };
 
 static int bridge_send(uint8_t * buf, uint32_t len, 
+                      int synchronize,
                       void * private_data) {
 
 #ifdef CONFIG_DEBUG_NIC_BRIDGE
index f2c45a3..785d38b 100644 (file)
@@ -472,7 +472,8 @@ static void nvram_update_timer(struct guest_info *vm,
     uint64_t period_us;
 
     
-    period_us = (1000000*cpu_cycles/cpu_freq);
+    // cpu freq in khz
+    period_us = (1000*cpu_cycles/cpu_freq);
 
     update_time(nvram_state,period_us);
 
diff --git a/palacios/src/devices/pci_front.c b/palacios/src/devices/pci_front.c
new file mode 100644 (file)
index 0000000..487ab5c
--- /dev/null
@@ -0,0 +1,824 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Peter Dinda <pdinda@northwestern.edu>
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Authors: 
+ *    Peter Dinda <pdinda@northwestern.edu>    (PCI front device forwarding to host dev interface)
+ *    Jack Lange <jarusl@cs.northwestern.edu>  (original PCI passthrough to physical hardware)
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+/* 
+  This is front-end PCI device intended to be used together with the
+  host device interface and a *virtual* PCI device implementation in
+  the host OS.  It makes it possible to project such a virtual device
+  into the guest as a PCI device.  It's based on the PCI passthrough
+  device, which projects *physical* PCI devices into the guest.
+
+  If you need to project a non-PCI host-based virtual or physical
+  device into the guest, you should use the generic device.
+
+*/
+
+/* 
+ * The basic idea is that we do not change the hardware PCI configuration
+ * Instead we modify the guest environment to map onto the physical configuration
+ * 
+ * The pci subsystem handles most of the configuration space, except for the bar registers.
+ * We handle them here, by either letting them go directly to hardware or remapping through virtual hooks
+ * 
+ * Memory Bars are always remapped via the shadow map, 
+ * IO Bars are selectively remapped through hooks if the guest changes them 
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_dev_mgr.h>
+#include <palacios/vmm_sprintf.h>
+#include <palacios/vmm_lowlevel.h>
+#include <palacios/vm_guest.h> 
+#include <palacios/vmm_symspy.h>
+
+#include <devices/pci.h>
+#include <devices/pci_types.h>
+
+#include <interfaces/vmm_host_dev.h>
+
+
+#ifndef CONFIG_DEBUG_PCI_FRONT
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+
+// Our own address in PCI-land
+union pci_addr_reg {
+    uint32_t value;
+    struct {
+       uint_t rsvd1   : 2;
+       uint_t reg     : 6;
+       uint_t func    : 3;
+       uint_t dev     : 5;
+       uint_t bus     : 8;
+       uint_t rsvd2   : 7;
+       uint_t enable  : 1;
+    } __attribute__((packed));
+} __attribute__((packed));
+
+
+// identical to PCI passthrough device
+typedef enum { PT_BAR_NONE,
+              PT_BAR_IO, 
+              PT_BAR_MEM32, 
+              PT_BAR_MEM24, 
+              PT_BAR_MEM64_LO, 
+              PT_BAR_MEM64_HI,
+              PT_EXP_ROM } pt_bar_type_t;
+
+// identical to PCI passthrough device
+struct pt_bar {
+    uint32_t size;
+    pt_bar_type_t type;
+
+    /*  We store 64 bit memory bar addresses in the high BAR
+     *  because they are the last to be updated
+     *  This means that the addr field must be 64 bits
+     */
+    uint64_t addr; 
+
+    uint32_t val;
+};
+
+
+
+
+struct pci_front_internal {
+    // this is our local cache of what the host device has
+    union {
+       uint8_t config_space[256];
+       struct pci_config_header real_hdr;
+    } __attribute__((packed));
+    
+    // We do need a representation of the bars
+    // since we need to be made aware when they are written
+    // so that we can change the hooks.
+    //
+    // We assume here that the PCI subsystem, on a bar write
+    // will first send us a config_update, which we forward to
+    // the host dev.   Then it will send us a bar update
+    // which we will use to rehook the device
+    //
+    struct pt_bar bars[6];      // our bars (for update purposes)
+    //
+    // Currently unsupported
+    //
+    //struct pt_bar exp_rom;      // and exp ram areas of the config space, above
+     
+    struct vm_device  *pci_bus;  // what bus we are attached to
+    struct pci_device *pci_dev;  // our representation as a registered PCI device
+
+    union pci_addr_reg pci_addr; // our pci address
+
+    char name[32];
+
+    v3_host_dev_t     host_dev;  // the actual implementation
+};
+
+
+
+/*
+static int push_config(struct pci_front_internal *state, uint8_t *config)
+{
+    if (v3_host_dev_config_write(state->host_dev, 0, config, 256) != 256) { 
+       return -1;
+    } else {
+       return 0;
+    }
+}
+*/
+
+static int pull_config(struct pci_front_internal *state, uint8_t *config)
+{
+    if (v3_host_dev_read_config(state->host_dev, 0, config, 256) != 256) { 
+       return -1;
+    } else {
+       return 0;
+    }
+}
+
+
+static int pci_front_read_mem(struct guest_info * core, 
+                             addr_t              gpa,
+                             void              * dst,
+                             uint_t              len,
+                             void              * priv)
+{
+    int i;
+    int rc;
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct pci_front_internal *state = (struct pci_front_internal *) dev->private_data;
+
+    PrintDebug("pci_front (%s): reading 0x%x bytes from gpa 0x%p from host dev 0x%p ...",
+              state->name, len, (void*)gpa, state->host_dev);
+
+    rc = v3_host_dev_read_mem(state->host_dev, gpa, dst, len);
+
+    PrintDebug(" done ... read %d bytes: 0x", rc);
+
+    for (i = 0; i < rc; i++) { 
+       PrintDebug("%x", ((uint8_t *)dst)[i]);
+    }
+
+    PrintDebug("\n");
+
+    return rc;
+}
+
+static int pci_front_write_mem(struct guest_info * core, 
+                              addr_t              gpa,
+                              void              * src,
+                              uint_t              len,
+                              void              * priv)
+{
+    int i;
+    int rc;
+    struct vm_device *dev = (struct vm_device *) priv;
+    struct pci_front_internal *state = (struct pci_front_internal *) dev->private_data;
+
+    PrintDebug("pci_front (%s): writing 0x%x bytes to gpa 0x%p to host dev 0x%p bytes=0x",
+              state->name, len, (void*)gpa, state->host_dev);
+
+    for (i = 0; i < len; i++) { 
+       PrintDebug("%x", ((uint8_t *)src)[i]);
+    }
+
+    rc = v3_host_dev_write_mem(state->host_dev, gpa, src, len);
+
+    PrintDebug(" %d bytes written\n",rc);
+    
+    return rc;
+}
+
+
+static int pci_front_read_port(struct guest_info * core, 
+                              uint16_t            port, 
+                              void              * dst, 
+                              uint_t              len, 
+                              void              * priv_data) 
+{
+    int i;
+    struct pci_front_internal *state = (struct pci_front_internal *) priv_data;
+    
+    PrintDebug("pci_front (%s): reading 0x%x bytes from port 0x%x from host dev 0x%p ...",
+              state->name, len, port, state->host_dev);
+
+    int rc = v3_host_dev_read_io(state->host_dev, port, dst, len);
+    
+    PrintDebug(" done ... read %d bytes: 0x", rc);
+
+    for (i = 0; i < rc; i++) { 
+       PrintDebug("%x", ((uint8_t *)dst)[i]);
+    }
+
+    PrintDebug("\n");
+
+    return rc;
+    
+}
+
+static int pci_front_write_port(struct guest_info * core, 
+                               uint16_t            port, 
+                               void              * src, 
+                               uint_t              len, 
+                               void              * priv_data) 
+{
+    int i;
+    struct pci_front_internal *state = (struct pci_front_internal *) priv_data;
+    
+    PrintDebug("pci_front (%s): writing 0x%x bytes to port 0x%x to host dev 0x%p bytes=0x",
+              state->name, len, port, state->host_dev);
+
+    for (i = 0; i < len; i++) { 
+       PrintDebug("%x", ((uint8_t *)src)[i]);
+    }
+
+    int rc = v3_host_dev_write_io(state->host_dev, port, src, len);
+
+    PrintDebug(" %d bytes written\n",rc);
+    
+    return rc;
+}
+
+
+
+//
+// This is called at registration time for the device
+// 
+// We assume that someone has called pull_config to get a local
+// copy of the config data from the host device by this point
+//
+static int pci_bar_init(int bar_num, uint32_t * dst, void * private_data) {
+    struct vm_device * dev = (struct vm_device *)private_data;
+    struct pci_front_internal * state = (struct pci_front_internal *)(dev->private_data);
+
+
+    const uint32_t bar_base_reg = 4;   // offset in 32bit words to skip to the first bar
+
+    union pci_addr_reg pci_addr = {state->pci_addr.value};  // my address
+
+    uint32_t bar_val = 0;
+    uint32_t max_val = 0;
+
+    struct pt_bar * pbar = &(state->bars[bar_num]);
+
+    pci_addr.reg = bar_base_reg + bar_num;
+
+    PrintDebug("pci_front (%s): pci_bar_init: PCI Address = 0x%x\n", state->name, pci_addr.value);
+
+    // This assumees that pull_config() has been previously called and 
+    // we have a local copy of the host device's configuration space
+    bar_val = *((uint32_t*)(&(state->config_space[(bar_base_reg+bar_num)*4])));
+
+    // Now let's set our copy of the relevant bar accordingly
+    pbar->val = bar_val; 
+    
+    // Now we will configure the hooks relevant to this bar
+
+    // We preset this type when we encounter a MEM64 Low BAR
+    // This is a 64 bit memory region that we turn into a memory hook
+    if (pbar->type == PT_BAR_MEM64_HI) {
+       struct pt_bar * lo_pbar = &(state->bars[bar_num - 1]);
+
+       max_val = PCI_MEM64_MASK_HI;
+
+       pbar->size += lo_pbar->size;
+
+       PrintDebug("pci_front (%s): pci_bar_init: Adding 64 bit PCI mem region: start=0x%p, end=0x%p as a full hook\n",
+                  state->name, 
+                  (void *)(addr_t)pbar->addr, 
+                  (void *)(addr_t)(pbar->addr + pbar->size));
+
+       if (v3_hook_full_mem(dev->vm,
+                            V3_MEM_CORE_ANY,
+                            pbar->addr,
+                            pbar->addr+pbar->size-1,
+                            pci_front_read_mem,
+                            pci_front_write_mem,
+                            dev)<0) { 
+           
+           PrintError("pci_front (%s): pci_bar_init: failed to hook 64 bit region (0x%p, 0x%p)\n",
+                      state->name, 
+                      (void *)(addr_t)pbar->addr,
+                      (void *)(addr_t)(pbar->addr + pbar->size - 1));
+           return -1;
+       }
+
+    } else if ((bar_val & 0x3) == 0x1) {
+       // This an I/O port region which we will turn into a range of hooks
+
+       int i = 0;
+
+       pbar->type = PT_BAR_IO;
+       pbar->addr = PCI_IO_BASE(bar_val);
+
+       max_val = bar_val | PCI_IO_MASK;
+
+       pbar->size = (uint16_t)~PCI_IO_BASE(max_val) + 1;
+
+       
+       PrintDebug("pci_front (%s): pci_bar_init: hooking ports 0x%x through 0x%x\n",
+                  state->name, (uint32_t)pbar->addr, (uint32_t)pbar->addr + pbar->size - 1);
+
+       for (i = 0; i < pbar->size; i++) {
+           if (v3_dev_hook_io(dev,
+                              pbar->addr + i, 
+                              pci_front_read_port,
+                              pci_front_write_port)<0) {
+               PrintError("pci_front (%s): pci_bar_init: unabled to hook I/O port 0x%x\n",state->name, (unsigned)(pbar->addr+i));
+               return -1;
+           }
+       }
+
+    } else {
+
+       // might be a 32 bit memory region or an empty bar
+
+       max_val = bar_val | PCI_MEM_MASK;
+
+       if (max_val == 0) {
+           // nothing, so just ignore it
+           pbar->type = PT_BAR_NONE;
+       } else {
+
+           // memory region - hook it
+
+           if ((bar_val & 0x6) == 0x0) {
+               // 32 bit memory region
+
+               pbar->type = PT_BAR_MEM32;
+               pbar->addr = PCI_MEM32_BASE(bar_val);
+               pbar->size = ~PCI_MEM32_BASE(max_val) + 1;
+
+               PrintDebug("pci_front (%s): pci_init_bar: adding 32 bit PCI mem region: start=0x%p, end=0x%p\n",
+                          state->name, 
+                          (void *)(addr_t)pbar->addr, 
+                          (void *)(addr_t)(pbar->addr + pbar->size));
+
+               if (v3_hook_full_mem(dev->vm, 
+                                    V3_MEM_CORE_ANY,
+                                    pbar->addr,
+                                    pbar->addr+pbar->size-1,
+                                    pci_front_read_mem,
+                                    pci_front_write_mem,
+                                    dev) < 0 ) { 
+                   PrintError("pci_front (%s): pci_init_bar: unable to hook 32 bit memory region 0x%p to 0x%p\n",
+                              state->name, (void*)(pbar->addr), (void*)(pbar->addr+pbar->size-1));
+                   return -1;
+               }
+
+           } else if ((bar_val & 0x6) == 0x2) {
+
+               // 24 bit memory region
+
+               pbar->type = PT_BAR_MEM24;
+               pbar->addr = PCI_MEM24_BASE(bar_val);
+               pbar->size = ~PCI_MEM24_BASE(max_val) + 1;
+
+
+               if (v3_hook_full_mem(dev->vm, 
+                                    V3_MEM_CORE_ANY,
+                                    pbar->addr,
+                                    pbar->addr+pbar->size-1,
+                                    pci_front_read_mem,
+                                    pci_front_write_mem,
+                                    dev) < 0 ) { 
+                   PrintError("pci_front (%s): pci_init_bar: unable to hook 24 bit memory region 0x%p to 0x%p\n",
+                              state->name, (void*)(pbar->addr), (void*)(pbar->addr+pbar->size-1));
+                   return -1;
+               }
+
+           } else if ((bar_val & 0x6) == 0x4) {
+               
+               // partial update of a 64 bit region, no hook done yet
+
+               struct pt_bar * hi_pbar = &(state->bars[bar_num + 1]);
+
+               pbar->type = PT_BAR_MEM64_LO;
+               hi_pbar->type = PT_BAR_MEM64_HI;
+
+               // Set the low bits, only for temporary storage until we calculate the high BAR
+               pbar->addr = PCI_MEM64_BASE_LO(bar_val);
+               pbar->size = ~PCI_MEM64_BASE_LO(max_val) + 1;
+
+               PrintDebug("pci_front (%s): pci_bar_init: partial 64 bit update\n",state->name);
+
+           } else {
+               PrintError("pci_front (%s): pci_bar_init: invalid memory bar type\n",state->name);
+               return -1;
+           }
+
+       }
+    }
+
+
+
+    // Update the pci subsystem versions
+    *dst = bar_val;
+
+    return 0;
+}
+
+
+//
+// If the guest modifies a BAR, we expect that pci.c will do the following,
+// in this order
+//
+//    1. notify us via the config_update callback, which we will feed back
+//       to the host device
+//    2. notify us of the bar change via the following callback 
+//
+// This callback will unhook as needed for the old bar value and rehook
+// as needed for the new bar value
+//
+static int pci_bar_write(int bar_num, uint32_t * src, void * private_data) {
+    struct vm_device * dev = (struct vm_device *)private_data;
+    struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data;
+    
+    struct pt_bar * pbar = &(state->bars[bar_num]);
+
+    PrintDebug("pci_front (%s): bar update: bar_num=%d, src=0x%x\n", state->name, bar_num, *src);
+    PrintDebug("pci_front (%s): the current bar has size=%u, type=%d, addr=%p, val=0x%x\n",
+              state->name, pbar->size, pbar->type, (void *)(addr_t)pbar->addr, pbar->val);
+
+
+
+    if (pbar->type == PT_BAR_NONE) {
+       PrintDebug("pci_front (%s): bar update is to empty bar - ignored\n",state->name);
+       return 0;
+    } else if (pbar->type == PT_BAR_IO) {
+       int i = 0;
+
+       // unhook old ports
+       PrintDebug("pci_front (%s): unhooking I/O ports 0x%x through 0x%x\n", 
+                  state->name, 
+                  (unsigned)(pbar->addr), (unsigned)(pbar->addr+pbar->size-1));
+       for (i = 0; i < pbar->size; i++) {
+           if (v3_dev_unhook_io(dev, pbar->addr + i) == -1) {
+               PrintError("pci_front (%s): could not unhook previously hooked port.... 0x%x\n", 
+                          state->name, 
+                          (uint32_t)pbar->addr + i);
+               return -1;
+           }
+       }
+
+       PrintDebug("pci_front (%s): setting I/O Port range size=%d\n", state->name, pbar->size);
+
+       // 
+       // Not clear if this cooking is needed... why not trust
+       // the write?  Who cares if it wants to suddenly hook more ports?
+       // 
+
+       // clear the low bits to match the size
+       *src &= ~(pbar->size - 1);
+
+       // Set reserved bits
+       *src |= (pbar->val & ~PCI_IO_MASK);
+
+       pbar->addr = PCI_IO_BASE(*src); 
+
+       PrintDebug("pci_front (%s): cooked src=0x%x\n", state->name, *src);
+
+       PrintDebug("pci_front (%s): rehooking I/O ports 0x%x through 0x%x\n",
+                  state->name, (unsigned)(pbar->addr), (unsigned)(pbar->addr+pbar->size-1));
+
+       for (i = 0; i < pbar->size; i++) {
+           if (v3_dev_hook_io(dev,
+                              pbar->addr + i, 
+                              pci_front_read_port, 
+                              pci_front_write_port)<0) { 
+               PrintError("pci_front (%s): unable to rehook port 0x%x\n",state->name, (unsigned)(pbar->addr+i));
+               return -1;
+           }
+       }
+
+    } else if (pbar->type == PT_BAR_MEM32) {
+
+       if (v3_unhook_mem(dev->vm,V3_MEM_CORE_ANY,pbar->addr)<0) { 
+           PrintError("pci_front (%s): unable to unhook 32 bit memory region starting at 0x%p\n", 
+                      state->name, (void*)(pbar->addr));
+           return -1;
+       }
+
+       // Again, not sure I need to do this cooking...
+
+       // clear the low bits to match the size
+       *src &= ~(pbar->size - 1);
+
+       // Set reserved bits
+       *src |= (pbar->val & ~PCI_MEM_MASK);
+
+       PrintDebug("pci_front (%s): cooked src=0x%x\n", state->name, *src);
+
+       pbar->addr = PCI_MEM32_BASE(*src);
+
+       PrintDebug("pci_front (%s): rehooking 32 bit memory region 0x%p through 0x%p\n",
+                  state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
+                  
+       if (v3_hook_full_mem(dev->vm,
+                            V3_MEM_CORE_ANY,
+                            pbar->addr,
+                            pbar->addr+pbar->size-1,
+                            pci_front_read_mem,
+                            pci_front_write_mem,
+                            dev)<0) { 
+           PrintError("pci_front (%s): unable to rehook 32 bit memory region 0x%p through 0x%p\n",
+                      state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
+           return -1;
+       }
+
+    } else if (pbar->type == PT_BAR_MEM64_LO) {
+       // We only store the written values here, the actual reconfig comes when the high BAR is updated
+
+       // clear the low bits to match the size
+       *src &= ~(pbar->size - 1);
+
+       // Set reserved bits
+       *src |= (pbar->val & ~PCI_MEM_MASK);
+
+       // Temp storage, used when hi bar is written
+       pbar->addr = PCI_MEM64_BASE_LO(*src);
+
+       PrintDebug("pci_front (%s): handled partial update for 64 bit memory region\n",state->name);
+
+    } else if (pbar->type == PT_BAR_MEM64_HI) {
+       struct pt_bar * lo_vbar = &(state->bars[bar_num - 1]);
+
+       if (v3_unhook_mem(dev->vm,V3_MEM_CORE_ANY,pbar->addr)<0) { 
+           PrintError("pci_front (%s): unable to unhook 64 bit memory region starting at 0x%p\n", 
+                      state->name, (void*)(pbar->addr));
+           return -1;
+       }
+
+       
+       // We don't set size, because we assume region is less than 4GB
+
+       // Set reserved bits
+       *src |= (pbar->val & ~PCI_MEM64_MASK_HI);
+
+       pbar->addr = PCI_MEM64_BASE_HI(*src);
+       pbar->addr <<= 32;
+       pbar->addr += lo_vbar->addr;
+
+       PrintDebug("pci_front (%s): rehooking 64 bit memory region 0x%p through 0x%p\n",
+                  state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
+                  
+       if (v3_hook_full_mem(dev->vm,
+                            V3_MEM_CORE_ANY,
+                            pbar->addr,
+                            pbar->addr+pbar->size-1,
+                            pci_front_read_mem,
+                            pci_front_write_mem,
+                            dev)<0) { 
+           PrintError("pci_front (%s): unable to rehook 64 bit memory region 0x%p through 0x%p\n",
+                      state->name, (void*)(pbar->addr), (void*)(pbar->addr + pbar->size - 1));
+           return -1;
+       }
+       
+    } else {
+       PrintError("pci_front (%s): unhandled PCI bar type %d\n", state->name, pbar->type);
+       return -1;
+    }
+
+    pbar->val = *src;
+    
+    return 0;
+}
+
+
+static int pci_front_config_update(uint_t reg_num, void * src, uint_t length, void * private_data) 
+{
+    int i;
+    struct vm_device * dev = (struct vm_device *)private_data;
+    struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data;
+    union pci_addr_reg pci_addr = {state->pci_addr.value};
+    
+    pci_addr.reg = reg_num >> 2;
+
+    PrintDebug("pci_front (%s): configuration update: writing 0x%x bytes at offset 0x%x to host device 0x%p, bytes=0x",
+              state->name, length, pci_addr.value, state->host_dev);
+    
+    for (i = 0; i < length; i++) { 
+       PrintDebug("%x", ((uint8_t *)src)[i]);
+    }
+
+    PrintDebug("\n");
+
+    if (v3_host_dev_write_config(state->host_dev,
+                                pci_addr.value,
+                                src,
+                                length) != length) { 
+       PrintError("pci_front (%s): configuration update: unable to write all bytes\n",state->name);
+       return -1;
+    }
+
+
+    return 0;
+}
+
+
+static int unhook_all_mem(struct pci_front_internal *state)
+{
+    int bar_num;
+    struct vm_device *bus = state->pci_bus;
+
+
+    for (bar_num=0;bar_num<6;bar_num++) { 
+       struct pt_bar * pbar = &(state->bars[bar_num]);
+
+       PrintDebug("pci_front (%s): unhooking for bar %d\n", state->name, bar_num);
+
+       if (pbar->type == PT_BAR_MEM32) {
+           if (v3_unhook_mem(bus->vm,V3_MEM_CORE_ANY,pbar->addr)<0) { 
+               PrintError("pci_front (%s): unable to unhook 32 bit memory region starting at 0x%p\n", 
+                          state->name, (void*)(pbar->addr));
+               return -1;
+           }
+       } else  if (pbar->type == PT_BAR_MEM64_HI) {
+
+           if (v3_unhook_mem(bus->vm,V3_MEM_CORE_ANY,pbar->addr)<0) { 
+               PrintError("pci_front (%s): unable to unhook 64 bit memory region starting at 0x%p\n", 
+                          state->name, (void*)(pbar->addr));
+               return -1;
+           }
+       }
+    }
+    
+    return 0;
+}
+
+
+
+static int setup_virt_pci_dev(struct v3_vm_info * vm_info, struct vm_device * dev) 
+{
+    struct pci_front_internal * state = (struct pci_front_internal *)dev->private_data;
+    struct pci_device * pci_dev = NULL;
+    struct v3_pci_bar bars[6];
+    int bus_num = 0;
+    int i;
+
+    for (i = 0; i < 6; i++) {
+       bars[i].type = PCI_BAR_PASSTHROUGH;
+       bars[i].private_data = dev;
+       bars[i].bar_init = pci_bar_init;
+       bars[i].bar_write = pci_bar_write;
+    }
+
+    pci_dev = v3_pci_register_device(state->pci_bus,
+                                    PCI_STD_DEVICE,
+                                    bus_num, -1, 0, 
+                                    state->name, bars,
+                                    pci_front_config_update,
+                                    NULL,      // no support for command updates
+                                    NULL,      // no support for expansion roms              
+                                    dev);
+
+
+    state->pci_dev = pci_dev;
+
+
+    // EXPANSION ROMS CURRENTLY UNSUPPORTED
+
+    // COMMANDS CURRENTLY UNSUPPORTED
+
+    return 0;
+}
+
+
+
+//
+// Note: potential bug:  not clear what pointer I get here
+//
+static int pci_front_free(struct pci_front_internal *state)
+{
+
+    if (unhook_all_mem(state)<0) { 
+       return -1;
+    }
+
+    // the device manager will unhook the i/o ports for us
+
+    if (state->host_dev) { 
+       v3_host_dev_close(state->host_dev);
+       state->host_dev=0;
+    }
+
+
+    V3_Free(state);
+
+    PrintDebug("pci_front (%s): freed\n",state->name);
+
+    return 0;
+}
+
+
+static struct v3_device_ops dev_ops = {
+//
+// Note: potential bug:  not clear what pointer I get here
+//
+    .free = (int (*)(void*))pci_front_free,
+};
+
+
+
+
+
+
+
+static int pci_front_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) 
+{
+    struct vm_device * dev;
+    struct vm_device * bus;
+    struct pci_front_internal *state;
+    char *dev_id;
+    char *bus_id;
+    char *url;
+
+    
+    if (!(dev_id = v3_cfg_val(cfg, "ID"))) { 
+       PrintError("pci_front: no id  given!\n");
+       return -1;
+    }
+    
+    if (!(bus_id = v3_cfg_val(cfg, "bus"))) { 
+       PrintError("pci_front (%s): no bus given!\n",dev_id);
+       return -1;
+    }
+    
+    if (!(url = v3_cfg_val(cfg, "hostdev"))) { 
+       PrintError("pci_front (%s): no host device url given!\n",dev_id);
+       return -1;
+    }
+    
+    if (!(bus = v3_find_dev(vm,bus_id))) { 
+       PrintError("pci_front (%s): cannot attach to bus %s\n",dev_id,bus_id);
+       return -1;
+    }
+    
+    if (!(state = V3_Malloc(sizeof(struct pci_front_internal)))) { 
+       PrintError("pci_front (%s): cannot allocate state for device\n",dev_id);
+       return -1;
+    }
+    
+    memset(state, 0, sizeof(struct pci_front_internal));
+    
+    state->pci_bus = bus;
+    strncpy(state->name, dev_id, 32);
+    
+    if (!(dev = v3_add_device(vm, dev_id, &dev_ops, state))) { 
+       PrintError("pci_front (%s): unable to add device\n",state->name);
+       return -1;
+    }
+    
+    if (!(state->host_dev=v3_host_dev_open(url,V3_BUS_CLASS_PCI,dev,vm))) { 
+       PrintError("pci_front (%s): unable to attach to host device %s\n",state->name, url);
+       v3_remove_device(dev);
+       return -1;
+    }
+    
+    // fetch config space from the host
+    if (pull_config(state,state->config_space)) { 
+       PrintError("pci_front (%s): cannot initially configure device\n",state->name);
+       v3_remove_device(dev);
+       return -1;
+    }
+
+    // setup virtual device for now
+    if (setup_virt_pci_dev(vm,dev)<0) { 
+       PrintError("pci_front (%s): cannot set up virtual pci device\n", state->name);
+       v3_remove_device(dev);
+       return -1;
+    }
+
+    // We do not need to hook anything here since pci will call
+    // us back via the bar_init functions
+
+    PrintDebug("pci_front (%s): inited and ready to be Potemkinized\n",state->name);
+
+    return 0;
+
+}
+
+
+device_register("PCI_FRONT", pci_front_init)
index 834f780..ce28940 100644 (file)
 
 #include "vga_regs.h"
 
+#ifndef CONFIG_DEBUG_VGA
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+#define DEBUG_MEM_DATA    0
+#define DEBUG_DEEP_MEM    0
+#define DEBUG_DEEP_RENDER 0
+
+
 #define MEM_REGION_START 0xa0000
 #define MEM_REGION_END   0xc0000
 #define MEM_REGION_NUM_PAGES (((MEM_REGION_END)-(MEM_REGION_START))/4096)
@@ -272,7 +282,7 @@ struct vga_dac_regs {
     
 
 struct vga_internal {
-    struct vm_device *dev;  
+    struct vm_device *dev; 
     
     bool passthrough;
     bool skip_next_passthrough_out; // for word access 
@@ -320,6 +330,8 @@ struct vga_internal {
 };
 
 
+typedef enum {PLANAR_SHIFT, PACKED_SHIFT, C256_SHIFT} shift_mode_t;
+
 
 static void find_text_char_dim(struct vga_internal *vga, uint32_t *w, uint32_t *h)
 {
@@ -363,6 +375,7 @@ static void find_text_data_start(struct vga_internal *vga, void **data)
 
 }
 
+
 static void find_text_attr_start(struct vga_internal *vga, void **data)
 {
     uint32_t offset;
@@ -444,6 +457,29 @@ static int blinking(struct vga_internal *vga)
 }
 
 
+static void find_graphics_data_starting_offset(struct vga_internal *vga, uint32_t *offset)
+{
+
+    *offset = vga->vga_crt_controller.vga_start_address_high;
+    *offset <<= 8;
+    *offset += vga->vga_crt_controller.vga_start_address_low;
+}
+
+
+static void find_shift_mode(struct vga_internal *vga, shift_mode_t *mode)
+{
+    if (vga->vga_graphics_controller.vga_graphics_mode.c256) { 
+       *mode=C256_SHIFT;
+    } else {
+       if (vga->vga_graphics_controller.vga_graphics_mode.shift_reg_mode) {
+           *mode=PACKED_SHIFT;
+       } else {
+           *mode=PLANAR_SHIFT;
+       }
+    }
+}
+
+
 static void find_graphics_res(struct vga_internal *vga, uint32_t *width, uint32_t *height)
 {
     uint32_t vert_lsb, vert_msb;
@@ -458,34 +494,42 @@ static void find_graphics_res(struct vga_internal *vga, uint32_t *width, uint32_
        + (vga->vga_crt_controller.vga_overflow.vertical_disp_enable_end8);
               
     *height  = ( (vert_msb << 8) + vert_lsb + 1) ; // pixels high (scanlines)
-    
-}
-
-
-static void find_graphics_cursor_pos(struct vga_internal *vga, uint32_t *width, uint32_t *height)
-{
-
-}
 
-static void render_graphics(struct vga_internal *vga, void *fb)
-{
+    // At this point we have the resolution in dot clocks across and scanlines top-to-bottom
+    // This is usually the resolution in pixels, but it can be monkeyed with
+    // at least in the following ways
 
-    PrintDebug("vga: render_graphics is unimplemented\n");
-    // Multiuplane 16
-    // Packed pixel mono
-    // packed pixel 4 color
-    // packed pixel 256 color
+    // vga sequencer dot clock divide by two 
+    if (vga->vga_sequencer.vga_clocking_mode.dot_clock) { 
+       *width/=2;
+       *height/=2;
+    }
 
-    find_graphics_cursor_pos(0,0,0);
+    // crt_controller.max_row_scan.double_scan => each row twice for 200=>400
+    if (vga->vga_crt_controller.vga_max_row_scan.double_scan) { 
+       *height/=2;
+    }
+    
+    // crt_controller.crt_mode_control.count_by_two => pixels twice as wide as normal
+    if (vga->vga_crt_controller.vga_crt_mode_control.count_by_two) { 
+       *width /= 2;
+    }
 
+    // crt_controller.crt_mode_control.horizontal_retrace_select => pixels twice as tall as normal
+    if (vga->vga_crt_controller.vga_crt_mode_control.horizontal_retrace_select) { 
+       *height /= 2;
+    }
+    
 }
 
-static void render_text_cursor(struct vga_internal *vga, void *fb)
+
+static void find_graphics_cursor_pos(struct vga_internal *vga, uint32_t *x, uint32_t *y)
 {
+    // todo
+    *x=*y=0;
 }
 
 
-
 static void dac_lookup_24bit_color(struct vga_internal *vga,
                                   uint8_t entry,
                                   uint8_t *red,
@@ -503,6 +547,209 @@ static void dac_lookup_24bit_color(struct vga_internal *vga,
 
 }
 
+
+/*
+  Colors work like this:
+
+  4 bit modes:   index is to the internal palette on the attribute controller
+                 that supplies 6 bits, but we need 8 to index the dac
+                2 more (the msbs) are supplied from the color select register
+                 we can optionally overwrite bits 5 and 4 from the color
+                select register as well, depending on a selection bit
+                in the mode control register.   The result of all this is
+                8 bit index for the dac
+
+  8 bit modes:   the attribute controller passes the index straight through
+                 to the DAC.
+
+
+  The DAC translates from the 8 bit index into 6 bits per color channel
+  (18 bit color).   We mulitply by 4 to get 24 bit color.
+*/
+
+static void find_24bit_color(struct vga_internal *vga, 
+                            uint8_t val,
+                            uint8_t *red,
+                            uint8_t *green,
+                            uint8_t *blue)
+{
+    uint8_t di;  // ultimate dac index
+
+    if (vga->vga_attribute_controller.vga_attribute_mode_control.pixel_width) { 
+       // 8 bit mode does right to the DAC
+       di=val;
+    } else {
+       struct vga_internal_palette_reg pr = vga->vga_attribute_controller.vga_internal_palette[val%16];
+       di = pr.palette_data;
+       
+       // Fix bits 5-4 if needed
+       if (vga->vga_attribute_controller.vga_attribute_mode_control.p54_select) { 
+           di &= ~0x30;  // clear 5-4
+           di |= vga->vga_attribute_controller.vga_color_select.sc4 << 4;
+           di |= vga->vga_attribute_controller.vga_color_select.sc5 << 5;
+       }
+
+       // We must always produce bits 6 and 7
+       di &= ~0xc0; // clear 7-6
+       di |= vga->vga_attribute_controller.vga_color_select.sc6 << 6;
+       di |= vga->vga_attribute_controller.vga_color_select.sc7 << 7;
+    }
+       
+    dac_lookup_24bit_color(vga,di,red,green,blue);
+}
+       
+static void render_graphics(struct vga_internal *vga, void *fb)
+{
+
+    struct v3_frame_buffer_spec *spec = &(vga->target_spec);
+
+    uint32_t gw, gh; // graphics w/h
+    uint32_t fw, fh; // fb w/h
+    uint32_t rgw, rgh;  // region we can actually show on the frame buffer
+    
+
+    uint32_t fx, fy;     // pixel position within the frame buffer
+    
+    uint32_t offset;     // offset into the maps
+    uint8_t  m;        // map
+    uint8_t  p;          // pixel in the current map byte  (0..7)
+
+    uint8_t r,g,b;  // looked up colors for entry
+
+    void    *pixel;   // current pixel in the fb
+    uint8_t *red;     // and the channels in the pixel
+    uint8_t *green;   //
+    uint8_t *blue;    //
+
+    uint8_t db[4]; // 4 bytes read at a time
+    uint8_t pb[8]; // 8 pixels assembled at a time
+
+    shift_mode_t sm;   // shift mode
+
+    uint32_t cur_x, cur_y;
+    
+
+    find_graphics_res(vga,&gw,&gh);
+
+    find_shift_mode(vga,&sm);
+
+    find_graphics_cursor_pos(vga,&cur_x,&cur_y);
+
+    find_graphics_data_starting_offset(vga,&offset);
+
+    fw = spec->width;
+    fh = spec->height;
+
+
+    PrintDebug("vga: attempting graphics render (%s): graphics_res=(%u,%u), fb_res=(%u,%u), "
+               "fb=0x%p offset=0x%x\n",
+              sm == PLANAR_SHIFT ? "planar shift" : 
+              sm == PACKED_SHIFT ? "packed shift" : 
+              sm == C256_SHIFT ? "color256 shift" : "UNKNOWN",
+              gw,gh,fw,fh,fb,offset);
+
+    // First we need to clip to what we can actually show
+    rgw = gw < fw ? gw : fw;
+    rgh = gh < fh ? gh : fh;
+
+    if (gw%8) { 
+       PrintError("vga: warning: graphics width is not a multiple of 8\n");
+    }
+
+
+
+    // Now we scan across by row
+    for (fy=0;fy<gh;fy++) { 
+       // by column
+       for (fx=0;fx<gw;
+            fx += (sm==C256_SHIFT ? 4 : 8) , offset++ ) { 
+
+           // if any of these pixels are in the rendger region
+           if (fy < rgh && fx < rgw) {
+               // assemble all 4 or 8 pixels
+               
+               // fetch the data bytes
+               for (m=0;m<4;m++) { 
+                   db[m]=*((uint8_t*)(vga->map[m]+offset));
+               }
+                
+               // assemble
+               switch (sm) { 
+                   case PLANAR_SHIFT:
+                       for (p=0;p<8;p++) { 
+                           pb[p]= 
+                               (( db[0] >> 7) & 0x1) |
+                               (( db[1] >> 6) & 0x2) |
+                               (( db[2] >> 5) & 0x4) |
+                               (( db[3] >> 4) & 0x8) ;
+                           
+                           for (m=0;m<4;m++) { 
+                               db[m] <<= 1;
+                           }
+                       }
+                       break;
+                       
+                   case PACKED_SHIFT:
+                       // first 4 pixels use planes 0 and 2
+                       for (p=0;p<4;p++) { 
+                           pb[p] = 
+                               ((db[2] >> 4) & 0xc) |
+                               ((db[0] >> 6) & 0x3) ;
+                           db[2] <<= 2;
+                           db[0] <<= 2;
+                       }
+                       break;
+                       
+                       // next 4 pixels use planes 1 and 3
+                       for (p=4;p<8;p++) { 
+                           pb[p] = 
+                               ((db[3] >> 4) & 0xc) |
+                               ((db[1] >> 6) & 0x3) ;
+                           db[3] <<= 2;
+                           db[1] <<= 2;
+                       }
+                       break;
+
+                   case C256_SHIFT:
+                       // this one is either very bizarre or as simple as this
+                       for (p=0;p<4;p++) { 
+                           pb[p] = db[p];
+                       }
+                       break;
+               }
+
+               // draw each pixel
+               for (p=0;p< (sm==C256_SHIFT ? 4 : 8);p++) { 
+                   
+                   // find its color
+                   find_24bit_color(vga,pb[p],&r,&g,&b);
+               
+                   // find its position in the framebuffer;
+                   pixel =  fb + (((fx + p) + (fy*spec->width)) * spec->bytes_per_pixel);
+                   red = pixel + spec->red_offset;
+                   green = pixel + spec->green_offset;
+                   blue = pixel + spec->blue_offset;
+
+                   // draw it
+                   *red=r;
+                   *green=g;
+                   *blue=b;
+               }
+           }
+       }
+    }
+    
+    PrintDebug("vga: render done\n");
+}
+
+
+static void render_text_cursor(struct vga_internal *vga, void *fb)
+{
+}
+
+
+
+
 //
 // A variant of this function could render to
 // a text console interface as well
@@ -593,22 +840,22 @@ static void render_text(struct vga_internal *vga, void *fb)
                // foreground
                
                if (!extended_fontset(vga)) { 
-                   fg_entry = ((uint8_t)(a.foreground_intensity_or_font_select)) << 3;
+                   fg_entry = a.foreground_intensity_or_font_select << 3;
                } else {
                    fg_entry = 0;
                }
                fg_entry |= a.fore;
 
-               dac_lookup_24bit_color(vga,fg_entry,&fgr,&fgg,&fgb);
+               find_24bit_color(vga,fg_entry,&fgr,&fgg,&fgb);
 
                if (!blinking(vga)) { 
-                   bg_entry = ((uint8_t)(a.blinking_or_bg_intensity)) << 3;
+                   bg_entry = a.blinking_or_bg_intensity << 3;
                } else {
                    bg_entry = 0;
                }
                bg_entry |= a.back;
                
-               dac_lookup_24bit_color(vga,bg_entry,&bgr,&bgg,&bgb);
+               find_24bit_color(vga,bg_entry,&bgr,&bgg,&bgb);
 
                // Draw the character
                for (l=0; l<ch; l++, font++) {
@@ -710,6 +957,15 @@ static void render_test(struct vga_internal *vga, void *fb)
     }
 }
 
+static void render_black(struct vga_internal *vga, void *fb)
+{
+    struct v3_frame_buffer_spec *s;
+
+    s=&(vga->target_spec);
+
+    memset(fb,0,s->height*s->width*s->bytes_per_pixel);
+}
+
 static void render_maps(struct vga_internal *vga, void *fb)
 {
 
@@ -757,20 +1013,21 @@ static int render(struct vga_internal *vga)
 
        fb = v3_graphics_console_get_frame_buffer_data_rw(vga->host_cons,&(vga->target_spec));
 
-       // Draw some crap for testing for now
-       if (0) { render_test(vga,fb);}
-       // Draw the maps for debugging
-       if (0) { render_maps(vga,fb);}
-
-       if (vga->vga_graphics_controller.vga_misc.graphics_mode) { 
-           render_graphics(vga,fb);
+       if (!(vga->vga_sequencer.vga_clocking_mode.screen_off)) {
+           if (vga->vga_attribute_controller.vga_attribute_mode_control.graphics) { 
+               render_graphics(vga,fb);
+           } else {
+               render_text(vga,fb);
+               render_text_cursor(vga,fb);
+           }
        } else {
-           render_text(vga,fb);
-           render_text_cursor(vga,fb);
+           render_black(vga,fb);
        }
 
-       render_maps(vga,fb);
+       if (0) { render_test(vga,fb); }
 
+       // always render maps for now 
+       render_maps(vga,fb);
 
        v3_graphics_console_release_frame_buffer_data_rw(vga->host_cons);
     }
@@ -870,7 +1127,7 @@ static int vga_write(struct guest_info * core,
        memcpy(V3_VAddr((void*)guest_addr),src,length);
     }
     
-#if 0
+#if DEBUG_MEM_DATA
     int i;
     PrintDebug("vga: data written was 0x");
     for (i=0;i<length;i++) {
@@ -887,10 +1144,6 @@ static int vga_write(struct guest_info * core,
 
     /* Write mode determine by Graphics Mode Register (Index 05h).writemode */
 
-    // Probably need to add odd/even mode access here for text
-
-    PrintDebug("vga: write is with odd/even = %u\n", vga->vga_sequencer.vga_mem_mode.odd_even);
-
 
     switch (vga->vga_graphics_controller.vga_graphics_mode.write_mode) {
        case 0: {
@@ -916,7 +1169,9 @@ static int vga_write(struct guest_info * core,
            
            offset = find_offset_write(vga, guest_addr);
 
+#if DEBUG_DEEP_MEM
            PrintDebug("vga: mode 0 write, offset=0x%llx, ror=%u, func=%u\n", offset,ror,func);
+#endif
 
            for (i=0;i<length;i++,offset+=find_increment_write(vga,guest_addr+i)) { 
                // now for each map
@@ -925,21 +1180,36 @@ static int vga_write(struct guest_info * core,
                uint8_t bm = vga->vga_graphics_controller.vga_bit_mask;
                uint8_t mm = find_map_write(vga,guest_addr+i);
 
-               PrintDebug("vga: write i=%u, mm=0x%x, offset=0x%x\n",i,(unsigned int)mm,(unsigned int)offset);
+#if DEBUG_DEEP_MEM
+               PrintDebug("vga: write i=%u, mm=0x%x, bm=0x%x sr=0x%x esr=0x%x offset=0x%x\n",i,(unsigned int)mm,(unsigned int)bm, (unsigned int)sr, (unsigned int)esr,(unsigned int)offset);
+#endif
 
-               for (mapnum=0;mapnum<4;mapnum++, sr>>=1, esr>>=1, bm>>=1, mm>>=1) { 
+               for (mapnum=0;mapnum<4;mapnum++, sr>>=1, esr>>=1, mm>>=1) { 
                    vga_map map = vga->map[mapnum];
                    uint8_t data = ((uint8_t *)src)[i];
                    uint8_t latchval = vga->latch[mapnum];
-                       
+                   
+#if DEBUG_DEEP_MEM
+                   PrintDebug("vga: raw data=0x%x\n",data);
+#endif
                    // rotate data right
-                   data = (data>>ror) | data<<(8-ror);
-
+                   if (ror) { 
+                       data = (data>>ror) | data<<(8-ror);
+                   }
+                   
+#if DEBUG_DEEP_MEM
+                   PrintDebug("vga: data after ror=0x%x\n",data);
+#endif
                    // use SR bit if ESR is on for this map
-                   if (esr & 0x1) { 
-                       data = (uint8_t)((((sint8_t)(sr&0x1))<<7)>>7);  // expand sr bit
+                   if (esr & 0x1) {
+                       data = (sr&0x1) * -1;
+                       
                    }
                    
+#if DEBUG_DEEP_MEM
+                   PrintDebug("vga: data after esrr=0x%x\n",data);
+#endif
+                   
                    // Apply function
                    switch (func) { 
                        case 0: // NOP
@@ -954,19 +1224,26 @@ static int vga_write(struct guest_info * core,
                            data ^= latchval;
                            break;
                    }
-                           
-                   // mux between latch and alu output
-                   if (bm & 0x1) { 
-                       // use alu output, which is in data
-                   } else {
-                       // use latch value
-                       data=latchval;
-                   }
+                   
+#if DEBUG_DEEP_MEM
+                   PrintDebug("vga: data after func=0x%x\n",data);
+#endif
+                   
+                   // mux between the data byte and the latch byte on
+                   // a per-bit basis
+                   data = (bm & data) | ((~bm) & latchval);
+                   
+
+#if DEBUG_DEEP_MEM
+                   PrintDebug("vga: data after bm mux=0x%x\n",data);
+#endif
                    
                    // selective write
                    if (mm & 0x1) { 
                        // write to this map
-                       //PrintDebug("vga: write map %u offset 0x%p map=0x%p pointer=0x%p\n",mapnum,(void*)offset,map,&(map[offset]));
+#if DEBUG_DEEP_MEM
+                       PrintDebug("vga: write map %u offset 0x%p map=0x%p pointer=0x%p\n",mapnum,(void*)offset,map,&(map[offset]));
+#endif
                        map[offset] = data;
                    } else {
                        // skip this map
@@ -989,7 +1266,9 @@ static int vga_write(struct guest_info * core,
 
            uint64_t offset = find_offset_write(vga,guest_addr);
 
+#if DEBUG_DEEP_MEM
            PrintDebug("vga: mode 1 write, offset=0x%llx\n", offset);
+#endif
 
            for (i=0;i<length;i++,offset+=find_increment_write(vga,guest_addr+i)) { 
 
@@ -1030,7 +1309,9 @@ static int vga_write(struct guest_info * core,
            
            offset = find_offset_write(vga, guest_addr);
 
+#if DEBUG_DEEP_MEM
            PrintDebug("vga: mode 2 write, offset=0x%llx, func=%u\n", offset,func);
+#endif
 
            for (i=0;i<length;i++,offset+=find_increment_write(vga,guest_addr+i)) { 
                // now for each map
@@ -1043,9 +1324,9 @@ static int vga_write(struct guest_info * core,
                    uint8_t latchval = vga->latch[mapnum];
                        
                    // expand relevant bit to 8 bit
-                   // it's basically esr=1, sr=bit from write
-                   data = (uint8_t)(((sint8_t)(((data>>mapnum)&0x1)<<7))>>7);
-                   
+                   // it's basically esr=1, sr=bit from mode 0 write
+                   data = ((data>>mapnum)&0x1) * -1;
+                       
                    // Apply function
                    switch (func) { 
                        case 0: // NOP
@@ -1060,14 +1341,9 @@ static int vga_write(struct guest_info * core,
                            data ^= latchval;
                            break;
                    }
-                           
+
                    // mux between latch and alu output
-                   if (bm & 0x1) { 
-                       // use alu output, which is in data
-                   } else {
-                       // use latch value
-                       data=latchval;
-                   }
+                   data = (bm & data) | ((~bm) & latchval);
                    
                    // selective write
                    if (mm & 0x1) { 
@@ -1107,7 +1383,9 @@ static int vga_write(struct guest_info * core,
                // now for each map
                uint8_t data = ((uint8_t *)src)[i];
 
-               data = (data>>ror) | data<<(8-ror);
+               if (ror) {
+                   data = (data>>ror) | data<<(8-ror);
+               }
 
                uint8_t bm = vga->vga_graphics_controller.vga_bit_mask & data;
                uint8_t sr = vga->vga_graphics_controller.vga_set_reset.val & 0xf;
@@ -1117,16 +1395,11 @@ static int vga_write(struct guest_info * core,
                    vga_map map = vga->map[mapnum];
                    uint8_t latchval = vga->latch[mapnum];
                        
-                   data = (uint8_t)((((sint8_t)(sr&0x1))<<7)>>7);  // expand sr bit
-                   
-                   
+                   // expand SR bit
+                   data = (sr&0x1) * -1;
+
                    // mux between latch and alu output
-                   if (bm & 0x1) { 
-                       // use alu output, which is in data
-                   } else {
-                       // use latch value
-                       data=latchval;
-                   }
+                   data = (bm & data) | ((~bm) & latchval);
                    
                    // selective write
                    if (mm & 0x1) { 
@@ -1212,7 +1485,15 @@ static int vga_read(struct guest_info * core,
                // address bytes select the map
                for (i=0;i<length;i++,offset+=find_increment_read(vga,guest_addr+i)) { 
                    mapnum = (guest_addr+i) % 4;
-                   ((uint8_t*)dst)[i] = vga->latch[mapnum] = *(vga->map[mapnum]+offset);
+#if DEBUG_DEEP_MEM
+                   PrintDebug("vga: mode 0 read, chain4, offset=0x%llx, mapnum=%u\n",offset,mapnum);
+#endif
+                   ((uint8_t*)dst)[i] = *(vga->map[mapnum]+offset);
+
+                   // presumably all the latches are to be reloaded, not just the selected one?
+                   for (mapnum=0;mapnum<4;mapnum++) { 
+                       vga->latch[mapnum] = *(vga->map[mapnum]+offset);
+                   }
                }
            } else {
                mapnum = vga->vga_graphics_controller.vga_read_map_select.map_select;
@@ -1222,6 +1503,10 @@ static int vga_read(struct guest_info * core,
                    PrintError("vga: read to offset=%llu map=%u (%u bytes)\n",offset,mapnum,length);
                }
                
+#if DEBUG_DEEP_MEM
+               PrintDebug("vga: mode 0 read, not-chain4, offset=0x%llx, mapnum=%u\n",offset,mapnum);
+#endif
+
                memcpy(dst,(vga->map[mapnum])+offset,length);
                
                // load the latches with the last item read
@@ -1252,6 +1537,11 @@ static int vga_read(struct guest_info * core,
            uint8_t  bits;
            
            offset = find_offset_read(vga,guest_addr);
+
+#if DEBUG_DEEP_MEM
+           PrintDebug("vga: mode 1 read, offset=0x%llx, cc=0x%x, dc-0x%x\n",offset,cc,dc);
+#endif
+               
            
            for (i=0;i<length;i++,offset++) { 
                vga_map map;
@@ -1289,7 +1579,7 @@ static int vga_read(struct guest_info * core,
     }
 
 
-#if 0
+#if DEBUG_MEM_DATA
     int i;
     PrintDebug("vga: data read is 0x");
     for (i=0;i<length;i++) {
index c9858db..08b65f2 100644 (file)
@@ -632,8 +632,7 @@ page 2-88).
            // 1 = odd/even addressing as in CGMA
            uint8_t shift_reg_mode:1;
            // 1 = shift regs get odd bits from odd maps and even/even
-           uint8_t c256:1;         
-           // 1 = 256 color mode
+           uint8_t c256:1;                 // 1 = 256 color mode
            // 0 = shift_reg_mode controls shift regs
            uint8_t reserved2:1; 
        } __attribute__((packed));
@@ -713,7 +712,7 @@ struct vga_attribute_controller_address_reg {
        uint8_t val;
        struct {
            uint8_t index:5;    // actual address
-           uint8_t internal_palette_address_srouce:1; 
+           uint8_t internal_palette_address_source:1; 
            // 0 => use the internal color palette (load the regs)
            // 1 => use the external color palette
            uint8_t reserved:2; 
@@ -866,27 +865,13 @@ struct vga_attribute_byte {
     union {
        uint8_t val;
        struct {
-           union {
-               uint8_t fore:3;
-               struct { 
-                   uint8_t fore_red:1;
-                   uint8_t fore_green:1;
-                   uint8_t fore_blue:1;
-               } __attribute__((packed));
-           } __attribute__((packed));
+           uint8_t fore:3;   //foreground color
            uint8_t foreground_intensity_or_font_select:1; // depends on char map select reg
            // character map selection is effected
            // when memory_mode.extended meomory=1
            // and the two character map enteries on character_map_select are 
            // different
-           union {
-               uint8_t back:3;
-               struct { 
-                   uint8_t back_red:1;
-                   uint8_t back_green:1;
-                   uint8_t back_blue:1;
-               } __attribute__((packed));
-           } __attribute__((packed));
+           uint8_t back:3;   //background color
            uint8_t blinking_or_bg_intensity:1; 
            // attribute mode control.enableblink = 1 => blink
            // =0 => intensity (16 colors of bg)
index 0fdaaba..05117e5 100644 (file)
@@ -42,7 +42,7 @@ struct vnet_nic_state {
 
 /* called by frontend, send pkt to VNET */
 static int vnet_nic_send(uint8_t * buf, uint32_t len, 
-                        void * private_data) {
+                        int synchronize, void * private_data) {
     struct vnet_nic_state * vnetnic = (struct vnet_nic_state *)private_data;
 
     struct v3_vnet_pkt pkt;
@@ -52,15 +52,13 @@ static int vnet_nic_send(uint8_t * buf, uint32_t len,
     memcpy(pkt.header, buf, ETHERNET_HEADER_LEN);
     pkt.data = buf;
 
-#ifdef CONFIG_DEBUG_VNET_NIC
-    {
-       PrintDebug("VNET-NIC: send pkt (size: %d, src_id: %d, src_type: %d)\n", 
+    V3_Net_Print(2, "VNET-NIC: send pkt (size: %d, src_id: %d, src_type: %d)\n", 
                   pkt.size, pkt.src_id, pkt.src_type);
-       v3_hexdump(buf, len, NULL, 0);
+    if(v3_net_debug >= 4){
+       v3_hexdump(buf, len, NULL, 0);
     }
-#endif
 
-    return v3_vnet_send_pkt(&pkt, NULL);;
+    return v3_vnet_send_pkt(&pkt, NULL, synchronize);
 }
 
 
@@ -70,22 +68,13 @@ static int virtio_input(struct v3_vm_info * info,
                        void * private_data){
     struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data;
 
-    PrintDebug("VNET-NIC: receive pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n", 
+    V3_Net_Print(2, "VNET-NIC: receive pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n", 
                pkt->size, pkt->src_id, pkt->src_type, pkt->dst_id, pkt->dst_type);
        
     return vnetnic->net_ops.recv(pkt->data, pkt->size,
                                 vnetnic->net_ops.frontend_data);
 }
 
-/* poll data from front-end */
-static void virtio_poll(struct v3_vm_info * info, 
-                       int budget,
-                       void * private_data){
-    struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data;
-
-    vnetnic->net_ops.poll(info, budget, vnetnic->net_ops.frontend_data);
-}
-
 
 static int vnet_nic_free(struct vnet_nic_state * vnetnic) {
 
@@ -102,7 +91,6 @@ static struct v3_device_ops dev_ops = {
 
 static struct v3_vnet_dev_ops vnet_dev_ops = {
     .input = virtio_input,
-    .poll = virtio_poll,
 };
 
 
diff --git a/palacios/src/extensions/Kconfig b/palacios/src/extensions/Kconfig
new file mode 100644 (file)
index 0000000..c7b7d69
--- /dev/null
@@ -0,0 +1,41 @@
+menu "Extensions"
+
+config EXT_VTIME
+       bool "Enable Time virtualization"
+       default n
+       help
+          Enables the timer virtualization extensions. These hide the cost of 
+          running inside the VMM context.  This can aid the consistency of
+           time between multiple timers, but can cause the guest to run 
+           a good bit slower than the host in VM-intensive parts of the code.
+
+config EXT_VTSC
+       bool "Fully virtualize guest TSC"
+       default n
+       depends on EXT_VTIME
+       help
+           Virtualize the processor time stamp counter in the guest, 
+           generally increasing consistency between various time sources 
+           but also potentially making guest time run slower than real time.
+        
+config EXT_MTRRS
+       bool "Support virtualized MTTRs"
+       default n
+       help
+          Provides a virtualized set of MTTR registers
+
+config EXT_MACH_CHECK
+       bool "Support Machine Check functionality"
+       default n
+       help 
+          Provides a virtualized machine check architecture
+
+
+config EXT_INSPECTOR
+       bool "VM Inspector"
+       default n
+       help
+         Provides the inspection extension
+
+endmenu
diff --git a/palacios/src/extensions/Makefile b/palacios/src/extensions/Makefile
new file mode 100644 (file)
index 0000000..ac19202
--- /dev/null
@@ -0,0 +1,5 @@
+obj-y += null.o
+obj-$(CONFIG_EXT_MTRRS) += ext_mtrr.o
+obj-$(CONFIG_EXT_VTSC) += ext_vtsc.o
+obj-$(CONFIG_EXT_VTIME) += ext_vtime.o
+obj-$(CONFIG_EXT_INSPECTOR) += ext_inspector.o
similarity index 81%
rename from palacios/src/palacios/vmm_inspector.c
rename to palacios/src/extensions/ext_inspector.c
index 2ff611b..7b89a91 100644 (file)
  */
 
 
-#include <palacios/vmm_inspector.h>
+//#include <palacios/vmm_inspector.h>
 #include <palacios/vmm.h>
 #include <palacios/vm_guest.h>
 #include <palacios/vmm_sprintf.h>
+#include <palacios/vmm_extensions.h>
+
+#include <palacios/vmm_multitree.h>
+#include <interfaces/inspector.h>
 
 // Note that v3_inspect_node_t is actuall a struct v3_mtree
 // Its set as void for opaque portability
 
+struct v3_inspector_state {
+    struct v3_mtree state_tree;
+
+};
 
-int v3_init_inspector(struct v3_vm_info * vm) {
-    struct v3_inspector_state * state = (struct v3_inspector_state *)&(vm->inspector);
 
+static int init_inspector(struct v3_vm_info * vm, v3_cfg_tree_t * cfg, void ** priv_data) {
+    struct v3_inspector_state * state = V3_Malloc(sizeof(struct v3_inspector_state));
     memset(state, 0, sizeof(struct v3_inspector_state));
 
     strncpy(state->state_tree.name, "vm->name", 50);
     state->state_tree.subtree = 1;
 
+    *priv_data = state;
+
     return 0;
 }
 
 
-int  v3_init_inspector_core(struct guest_info * core) {
-    struct v3_inspector_state * vm_state = &(core->vm_info->inspector);
+static int init_inspector_core(struct guest_info * core, void * priv_data) {
+    struct v3_inspector_state * vm_state = priv_data;
     char core_name[50];
 
     snprintf(core_name, 50, "core.%d", core->cpu_id);
@@ -72,7 +82,7 @@ int  v3_init_inspector_core(struct guest_info * core) {
        v3_inspect_64(cr_node, "EFER", (uint64_t *)&(core->ctrl_regs.efer));    
 
 
-       //      struct v3_mtree * seg_node = v3_mtree_create_subtree(core_node, "SEGMENTS");
+       //struct v3_mtree * seg_node = v3_mtree_create_subtree(core_node, "SEGMENTS");
        
 
 
@@ -82,6 +92,23 @@ int  v3_init_inspector_core(struct guest_info * core) {
 }
 
 
+
+
+
+static struct v3_extension_impl inspector_impl = {
+    .name = "inspector",
+    .init = init_inspector,
+    .deinit = NULL,
+    .core_init = init_inspector_core,
+    .core_deinit = NULL,
+    .on_entry = NULL,
+    .on_exit = NULL
+};
+
+
+register_extension(&inspector_impl);
+
+
 v3_inspect_node_t * v3_inspect_add_subtree(v3_inspect_node_t * root, char * name) {
     return v3_mtree_create_subtree(root, name);
 }
@@ -122,8 +149,6 @@ int v3_inspect_buf(v3_inspect_node_t * node, char * name,
 
 
 
-
-
 int v3_find_inspection_value(v3_inspect_node_t * node, char * name, 
                           struct v3_inspection_value * value) {
     struct v3_mtree * mt_node = v3_mtree_find_node(node, name);
@@ -152,7 +177,13 @@ struct v3_inspection_value v3_inspection_value(v3_inspect_node_t * node) {
 
 
 v3_inspect_node_t * v3_get_inspection_root(struct v3_vm_info * vm) {
-    return &(vm->inspector.state_tree);
+    struct v3_inspector_state * inspector = v3_get_extension_state(vm, inspector_impl.name);
+
+    if (inspector == NULL) {
+       return NULL;
+    }
+
+    return &(inspector->state_tree);
 }
 
 v3_inspect_node_t * v3_get_inspection_subtree(v3_inspect_node_t * root, char * name) {
@@ -167,3 +198,7 @@ v3_inspect_node_t * v3_inspection_node_next(v3_inspect_node_t * node) {
 v3_inspect_node_t * v3_inspection_first_child(v3_inspect_node_t * root) {
     return v3_mtree_first_child(root);
 }
+
+
+
+
diff --git a/palacios/src/extensions/ext_vtime.c b/palacios/src/extensions/ext_vtime.c
new file mode 100644 (file)
index 0000000..665d6be
--- /dev/null
@@ -0,0 +1,169 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *         Patrick G. Bridges <bridges@cs.unm.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+#include <palacios/vmm.h>
+#include <palacios/vmm_time.h>
+#include <palacios/vm_guest.h>
+
+
+
+
+/* Overview 
+ *
+ * Time handling in VMMs is challenging, and Palacios uses the highest 
+ * resolution, lowest overhead timer on modern CPUs that it can - the 
+ * processor timestamp counter (TSC). Note that on somewhat old processors
+ * this can be problematic; in particular, older AMD processors did not 
+ * have a constant rate timestamp counter in the face of power management
+ * events. However, the latest Intel and AMD CPUs all do (should...) have a 
+ * constant rate TSC, and Palacios relies on this fact.
+ * 
+ * Basically, Palacios keeps track of three quantities as it runs to manage
+ * the passage of time:
+ * (1) The host timestamp counter - read directly from HW and never written
+ * (2) A monotonic guest timestamp counter used to measure the progression of
+ *     time in the guest. This is computed using an offsets from (1) above.
+ * (3) The actual guest timestamp counter (which can be written by
+ *     writing to the guest TSC MSR - MSR 0x10) from the monotonic guest TSC.
+ *     This is also computed as an offset from (2) above when the TSC and
+ *     this offset is updated when the TSC MSR is written.
+ *
+ * The value used to offset the guest TSC from the host TSC is the *sum* of all
+ * of these offsets (2 and 3) above
+ * 
+ * Because all other devices are slaved off of the passage of time in the guest,
+ * it is (2) above that drives the firing of other timers in the guest, 
+ * including timer devices such as the Programmable Interrupt Timer (PIT).
+ *
+ * Future additions:
+ * (1) Add support for temporarily skewing guest time off of where it should
+ *     be to support slack simulation of guests. The idea is that simulators
+ *     set this skew to be the difference between how much time passed for a 
+ *     simulated feature and a real implementation of that feature, making 
+ *     pass at a different rate from real time on this core. The VMM will then
+ *     attempt to move this skew back towards 0 subject to resolution/accuracy
+ *     constraints from various system timers.
+ *   
+ *     The main effort in doing this will be to get accuracy/resolution 
+ *     information from each local timer and to use this to bound how much skew
+ *     is removed on each exit.
+ */
+
+
+
+struct vtime_state {
+    uint32_t guest_cpu_freq;   // can be lower than host CPU freq!
+    uint64_t initial_time;     // Time when VMM started. 
+    sint64_t guest_host_offset;// Offset of monotonic guest time from host time
+};
+
+
+
+
+static int offset_time( struct guest_info * info, sint64_t offset )
+{
+    struct vm_time * time_state = &(info->time_state);
+//    PrintDebug("Adding additional offset of %lld to guest time.\n", offset);
+    time_state->guest_host_offset += offset;
+    return 0;
+}
+
+
+// Control guest time in relation to host time so that the two stay 
+// appropriately synchronized to the extent possible. 
+int v3_adjust_time(struct guest_info * info) {
+    struct vm_time * time_state = &(info->time_state);
+    uint64_t host_time, target_host_time;
+    uint64_t guest_time, target_guest_time, old_guest_time;
+    uint64_t guest_elapsed, host_elapsed, desired_elapsed;
+
+    /* Compute the target host time given how much time has *already*
+     * passed in the guest */
+    guest_time = v3_get_guest_time(time_state);
+    guest_elapsed = (guest_time - time_state->initial_time);
+    desired_elapsed = (guest_elapsed * time_state->host_cpu_freq) / time_state->guest_cpu_freq;
+    target_host_time = time_state->initial_time + desired_elapsed;
+
+    /* Now, let the host run while the guest is stopped to make the two
+     * sync up. */
+    host_time = v3_get_host_time(time_state);
+    old_guest_time = v3_get_guest_time(time_state);
+
+    while (target_host_time > host_time) {
+       v3_yield(info);
+       host_time = v3_get_host_time(time_state);
+    }
+
+    guest_time = v3_get_guest_time(time_state);
+
+    // We do *not* assume the guest timer was paused in the VM. If it was
+    // this offseting is 0. If it wasn't we need this.
+   offset_time(info, (sint64_t)old_guest_time - (sint64_t)guest_time);
+
+    /* Now the host may have gotten ahead of the guest because
+     * yielding is a coarse grained thing. Figure out what guest time
+     * we want to be at, and use the use the offsetting mechanism in 
+     * the VMM to make the guest run forward. We limit *how* much we skew 
+     * it forward to prevent the guest time making large jumps, 
+     * however. */
+    host_elapsed = host_time - time_state->initial_time;
+    desired_elapsed = (host_elapsed * time_state->guest_cpu_freq) / time_state->host_cpu_freq;
+    target_guest_time = time_state->initial_time + desired_elapsed;
+
+    if (guest_time < target_guest_time) {
+       uint64_t max_skew, desired_skew, skew;
+
+       if (time_state->enter_time) {
+           max_skew = (time_state->exit_time - time_state->enter_time) / 10;
+       } else {
+           max_skew = 0;
+       }
+
+       desired_skew = target_guest_time - guest_time;
+       skew = desired_skew > max_skew ? max_skew : desired_skew;
+/*     PrintDebug("Guest %llu cycles behind where it should be.\n",
+                  desired_skew);
+       PrintDebug("Limit on forward skew is %llu. Skewing forward %llu.\n",
+                  max_skew, skew); */
+       
+       offset_time(info, skew);
+    }
+    
+    return 0;
+}
+
+
+static int init() {
+    khz = v3_cfg_val(cfg_tree, "khz");
+
+    if (khz) {
+       time_state->guest_cpu_freq = atoi(khz);
+       PrintDebug("Core %d CPU frequency requested at %d khz.\n", 
+                  info->cpu_id, time_state->guest_cpu_freq);
+    } 
+    
+    if ( (khz == NULL) || 
+        (time_state->guest_cpu_freq <= 0)  || 
+        (time_state->guest_cpu_freq > time_state->host_cpu_freq) ) {
+
+       time_state->guest_cpu_freq = time_state->host_cpu_freq;
+    }
+
+
+}
diff --git a/palacios/src/extensions/ext_vtsc.c b/palacios/src/extensions/ext_vtsc.c
new file mode 100644 (file)
index 0000000..96e7ce3
--- /dev/null
@@ -0,0 +1,195 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *         Patrick G. Bridges <bridges@cs.unm.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_time.h>
+#include <palacios/vm_guest.h>
+
+
+// Functions for handling exits on the TSC when fully virtualizing 
+// the timestamp counter.
+#define TSC_MSR     0x10
+#define TSC_AUX_MSR 0xC0000103
+
+int v3_handle_rdtscp(struct guest_info *info);
+int v3_handle_rdtsc(struct guest_info *info);
+
+
+struct vtsc_state {
+
+    struct v3_msr tsc_aux;     // Auxilliary MSR for RDTSCP
+
+};
+
+
+
+/* 
+ * Handle full virtualization of the time stamp counter.  As noted
+ * above, we don't store the actual value of the TSC, only the guest's
+ * offset from monotonic guest's time. If the guest writes to the TSC, we
+ * handle this by changing that offset.
+ *
+ * Possible TODO: Proper hooking of TSC read/writes?
+ */ 
+
+static int rdtsc(struct guest_info * info) {
+    uint64_t tscval = v3_get_guest_tsc(&info->time_state);
+
+    info->vm_regs.rdx = tscval >> 32;
+    info->vm_regs.rax = tscval & 0xffffffffLL;
+
+    return 0;
+}
+
+int v3_handle_rdtsc(struct guest_info * info) {
+    rdtsc(info);
+    
+    info->vm_regs.rax &= 0x00000000ffffffffLL;
+    info->vm_regs.rdx &= 0x00000000ffffffffLL;
+
+    info->rip += 2;
+    
+    return 0;
+}
+
+int v3_rdtscp(struct guest_info * info) {
+    int ret;
+    /* First get the MSR value that we need. It's safe to futz with
+     * ra/c/dx here since they're modified by this instruction anyway. */
+    info->vm_regs.rcx = TSC_AUX_MSR; 
+    ret = v3_handle_msr_read(info);
+
+    if (ret != 0) {
+       return ret;
+    }
+
+    info->vm_regs.rcx = info->vm_regs.rax;
+
+    /* Now do the TSC half of the instruction */
+    ret = v3_rdtsc(info);
+
+    if (ret != 0) {
+       return ret;
+    }
+
+    return 0;
+}
+
+
+int v3_handle_rdtscp(struct guest_info * info) {
+  PrintDebug("Handling virtual RDTSCP call.\n");
+
+    v3_rdtscp(info);
+
+    info->vm_regs.rax &= 0x00000000ffffffffLL;
+    info->vm_regs.rcx &= 0x00000000ffffffffLL;
+    info->vm_regs.rdx &= 0x00000000ffffffffLL;
+
+    info->rip += 3;
+    
+    return 0;
+}
+
+
+
+
+static int tsc_aux_msr_read_hook(struct guest_info *info, uint_t msr_num, 
+                                struct v3_msr *msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+
+    V3_ASSERT(msr_num == TSC_AUX_MSR);
+
+    msr_val->lo = time_state->tsc_aux.lo;
+    msr_val->hi = time_state->tsc_aux.hi;
+
+    return 0;
+}
+
+
+static int tsc_aux_msr_write_hook(struct guest_info *info, uint_t msr_num, 
+                             struct v3_msr msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+
+    V3_ASSERT(msr_num == TSC_AUX_MSR);
+
+    time_state->tsc_aux.lo = msr_val.lo;
+    time_state->tsc_aux.hi = msr_val.hi;
+
+    return 0;
+}
+
+
+static int tsc_msr_read_hook(struct guest_info *info, uint_t msr_num,
+                            struct v3_msr *msr_val, void *priv) {
+    uint64_t time = v3_get_guest_tsc(&info->time_state);
+
+    V3_ASSERT(msr_num == TSC_MSR);
+
+    msr_val->hi = time >> 32;
+    msr_val->lo = time & 0xffffffffLL;
+    
+    return 0;
+}
+
+
+static int tsc_msr_write_hook(struct guest_info *info, uint_t msr_num,
+                            struct v3_msr msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+    uint64_t guest_time, new_tsc;
+
+    V3_ASSERT(msr_num == TSC_MSR);
+
+    new_tsc = (((uint64_t)msr_val.hi) << 32) | (uint64_t)msr_val.lo;
+    guest_time = v3_get_guest_time(time_state);
+    time_state->tsc_guest_offset = (sint64_t)new_tsc - (sint64_t)guest_time; 
+
+    return 0;
+}
+
+
+static int deinit() {
+    v3_unhook_msr(vm, TSC_MSR);
+    v3_unhook_msr(vm, TSC_AUX_MSR);
+}
+
+
+static int init() {
+
+    time_state->tsc_aux.lo = 0;
+    time_state->tsc_aux.hi = 0;
+
+
+
+    PrintDebug("Installing TSC MSR hook.\n");
+    ret = v3_hook_msr(vm, TSC_MSR, 
+                     tsc_msr_read_hook, tsc_msr_write_hook, NULL);
+
+    if (ret != 0) {
+       return ret;
+    }
+
+    PrintDebug("Installing TSC_AUX MSR hook.\n");
+    ret = v3_hook_msr(vm, TSC_AUX_MSR, tsc_aux_msr_read_hook, 
+                     tsc_aux_msr_write_hook, NULL);
+
+    if (ret != 0) {
+       return ret;
+    }
+}
diff --git a/palacios/src/extensions/null.c b/palacios/src/extensions/null.c
new file mode 100644 (file)
index 0000000..1828c44
--- /dev/null
@@ -0,0 +1,6 @@
+/** \file
+ * Do nothing module.
+ *
+ * This file only exists to appease the kbuild gods.
+ */
+
index 064d17f..0002cb2 100644 (file)
@@ -29,12 +29,13 @@ struct v3_host_dev_hooks * host_dev_hooks = 0;
 
 v3_host_dev_t v3_host_dev_open(char *impl,
                               v3_bus_class_t bus,
-                              v3_guest_dev_t gdev)
+                              v3_guest_dev_t gdev,
+                              struct v3_vm_info *vm)
 {                                             
     V3_ASSERT(host_dev_hooks != NULL);
     V3_ASSERT(host_dev_hooks->open != NULL);
 
-    return host_dev_hooks->open(impl,bus,gdev);
+    return host_dev_hooks->open(impl,bus,gdev,vm->host_priv_data);
 }
 
 int v3_host_dev_close(v3_host_dev_t hdev) 
@@ -75,7 +76,7 @@ uint64_t v3_host_dev_read_mem(v3_host_dev_t hdev,
     V3_ASSERT(host_dev_hooks != NULL);
     V3_ASSERT(host_dev_hooks->read_mem != NULL);
     
-    return host_dev_hooks->read_mem(hdev,gpa,dst,len);
+    return host_dev_hooks->read_mem(hdev,(void*)gpa,dst,len);
 }
 
 uint64_t v3_host_dev_write_mem(v3_host_dev_t hdev,
@@ -86,7 +87,7 @@ uint64_t v3_host_dev_write_mem(v3_host_dev_t hdev,
     V3_ASSERT(host_dev_hooks != NULL);
     V3_ASSERT(host_dev_hooks->write_mem != NULL);
     
-    return host_dev_hooks->write_mem(hdev,gpa,src,len);
+    return host_dev_hooks->write_mem(hdev,(void*)gpa,src,len);
 }
 
 uint64_t v3_host_dev_read_config(v3_host_dev_t hdev,
@@ -140,7 +141,7 @@ int v3_host_dev_raise_irq(v3_host_dev_t hostdev,
 
 uint64_t v3_host_dev_read_guest_mem(v3_host_dev_t  hostdev,
                                    v3_guest_dev_t guest_dev,
-                                   addr_t         gpa,
+                                   void *         gpa,
                                    void           *dst,
                                    uint64_t       len)
 {
@@ -154,14 +155,14 @@ uint64_t v3_host_dev_read_guest_mem(v3_host_dev_t  hostdev,
        if (!vm) { 
            return 0;
        } else {
-           return v3_read_gpa_memory(&(vm->cores[0]), gpa, len, dst);
+           return v3_read_gpa_memory(&(vm->cores[0]), (addr_t)gpa, len, dst);
        }
     }
 }
 
 uint64_t v3_host_dev_write_guest_mem(v3_host_dev_t  hostdev,
                                     v3_guest_dev_t guest_dev,
-                                    addr_t         gpa,
+                                    void *         gpa,
                                     void           *src,
                                     uint64_t       len)
 {
@@ -175,7 +176,7 @@ uint64_t v3_host_dev_write_guest_mem(v3_host_dev_t  hostdev,
        if (!vm) { 
            return 0;
        } else {
-           return v3_write_gpa_memory(&(vm->cores[0]), gpa, len, src);
+           return v3_write_gpa_memory(&(vm->cores[0]), (addr_t)gpa, len, src);
        }
     }
 }
index c505b60..6a24b89 100644 (file)
@@ -31,7 +31,6 @@ obj-y := \
        vmm_binaries.o \
        vmm_cpuid.o \
        vmm_xml.o \
-       vmm_muxer.o \
        vmm_mem_hook.o \
        vmm_mptable.o \
        vmm_extensions.o \
@@ -39,7 +38,6 @@ obj-y := \
        vmm_multitree.o \
 
 
-obj-$(CONFIG_INSPECTOR) += vmm_inspector.o
 
 
 obj-$(CONFIG_XED) +=   vmm_xed.o
@@ -59,6 +57,7 @@ obj-$(CONFIG_VMX) +=          vmx.o \
                        vmx_io.o \
                        vmx_lowlevel.o \
                        vmx_msr.o \
+                       vmx_hw_info.o \
                        vmcs.o \
                        vmx_ctrl_regs.o \
                        vmx_assist.o
index b8cc549..4ab0134 100644 (file)
@@ -28,7 +28,6 @@
 #include <palacios/vm_guest_mem.h>
 #include <palacios/vmm_lowlevel.h>
 #include <palacios/vmm_sprintf.h>
-#include <palacios/vmm_muxer.h>
 #include <palacios/vmm_xed.h>
 #include <palacios/vmm_direct_paging.h>
 
@@ -526,13 +525,7 @@ static int info_hcall(struct guest_info * core, uint_t hcall_id, void * priv_dat
 int v3_init_vm(struct v3_vm_info * vm) {
     v3_cpu_arch_t cpu_type = v3_get_cpu_type(V3_Get_CPU());
 
-    if (v3_get_foreground_vm() == NULL) {
-       v3_set_foreground_vm(vm);
-    }
 
-#ifdef CONFIG_INSPECTOR
-    v3_init_inspector(vm);
-#endif
 
 #ifdef CONFIG_TELEMETRY
     v3_init_telemetry(vm);
@@ -661,9 +654,7 @@ int v3_init_core(struct guest_info * core) {
     v3_cpu_arch_t cpu_type = v3_get_cpu_type(V3_Get_CPU());
     struct v3_vm_info * vm = core->vm_info;
 
-#ifdef CONFIG_INSPECTOR
-    v3_init_inspector_core(core);
-#endif
+
 
     /*
      * Initialize the subsystem data strutures
index bf4c30e..ca6c601 100644 (file)
@@ -73,9 +73,9 @@ int v3_gpa_to_hpa(struct guest_info * info, addr_t gpa, addr_t * hpa) {
     }
     
     if (reg->flags.alloced == 0) {
-       PrintError("In GPA->HPA: Tried to translate physical address of non allocated page (addr=%p)\n", 
-                  (void *)gpa);
-       v3_print_mem_map(info->vm_info);
+       //PrintError("In GPA->HPA: Tried to translate physical address of non allocated page (addr=%p)\n", 
+       //         (void *)gpa);
+    //v3_print_mem_map(info->vm_info);
        return -1;
     }
        
@@ -133,8 +133,8 @@ int v3_gpa_to_hva(struct guest_info * guest_info, addr_t gpa, addr_t * hva) {
     *hva = 0;
 
     if (v3_gpa_to_hpa(guest_info, gpa, &hpa) != 0) {
-       PrintError("In GPA->HVA: Invalid GPA(%p)->HPA lookup\n", 
-                  (void *)gpa);
+       //      PrintError("In GPA->HVA: Invalid GPA(%p)->HPA lookup\n", 
+       //         (void *)gpa);
        return -1;
     }
   
index 0b874fd..f8fe322 100644 (file)
@@ -624,6 +624,7 @@ static void print_exec_ctrls() {
 #ifdef __V3_32BIT__
     print_vmcs_field(VMCS_IO_BITMAP_A_ADDR_HIGH);
 #endif
+
     print_vmcs_field(VMCS_IO_BITMAP_B_ADDR);
 #ifdef __V3_32BIT__
     print_vmcs_field(VMCS_IO_BITMAP_B_ADDR_HIGH);
@@ -762,162 +763,29 @@ void v3_print_vmcs() {
 
 /*
  * Returns the field length in bytes
+ *   It doesn't get much uglier than this... Thanks Intel
  */
 int v3_vmcs_get_field_len(vmcs_field_t field) {
-    switch(field)  {
-       /* 16 bit Control Fields */
-        case VMCS_GUEST_ES_SELECTOR:
-        case VMCS_GUEST_CS_SELECTOR:
-        case VMCS_GUEST_SS_SELECTOR:
-        case VMCS_GUEST_DS_SELECTOR:
-        case VMCS_GUEST_FS_SELECTOR:
-        case VMCS_GUEST_GS_SELECTOR:
-        case VMCS_GUEST_LDTR_SELECTOR:
-        case VMCS_GUEST_TR_SELECTOR:
-        case VMCS_HOST_ES_SELECTOR:
-        case VMCS_HOST_CS_SELECTOR:
-        case VMCS_HOST_SS_SELECTOR:
-        case VMCS_HOST_DS_SELECTOR:
-        case VMCS_HOST_FS_SELECTOR:
-        case VMCS_HOST_GS_SELECTOR:
-        case VMCS_HOST_TR_SELECTOR:
-            return 2;
-
-       /* 32 bit Control Fields */
-        case VMCS_PIN_CTRLS:
-        case VMCS_PROC_CTRLS:
-       case VMCS_SEC_PROC_CTRLS:
-        case VMCS_EXCP_BITMAP:
-        case VMCS_PG_FAULT_ERR_MASK:
-        case VMCS_PG_FAULT_ERR_MATCH:
-        case VMCS_CR3_TGT_CNT:
-        case VMCS_EXIT_CTRLS:
-        case VMCS_EXIT_MSR_STORE_CNT:
-        case VMCS_EXIT_MSR_LOAD_CNT:
-        case VMCS_ENTRY_CTRLS:
-        case VMCS_ENTRY_MSR_LOAD_CNT:
-        case VMCS_ENTRY_INT_INFO:
-        case VMCS_ENTRY_EXCP_ERR:
-        case VMCS_ENTRY_INSTR_LEN:
-        case VMCS_TPR_THRESHOLD:
-        case VMCS_INSTR_ERR:
-        case VMCS_EXIT_REASON:
-        case VMCS_EXIT_INT_INFO:
-        case VMCS_EXIT_INT_ERR:
-        case VMCS_IDT_VECTOR_INFO:
-        case VMCS_IDT_VECTOR_ERR:
-        case VMCS_EXIT_INSTR_LEN:
-        case VMCS_EXIT_INSTR_INFO:
-        case VMCS_GUEST_ES_LIMIT:
-        case VMCS_GUEST_CS_LIMIT:
-        case VMCS_GUEST_SS_LIMIT:
-        case VMCS_GUEST_DS_LIMIT:
-        case VMCS_GUEST_FS_LIMIT:
-        case VMCS_GUEST_GS_LIMIT:
-        case VMCS_GUEST_LDTR_LIMIT:
-        case VMCS_GUEST_TR_LIMIT:
-        case VMCS_GUEST_GDTR_LIMIT:
-        case VMCS_GUEST_IDTR_LIMIT:
-        case VMCS_GUEST_ES_ACCESS:
-        case VMCS_GUEST_CS_ACCESS:
-        case VMCS_GUEST_SS_ACCESS:
-        case VMCS_GUEST_DS_ACCESS:
-        case VMCS_GUEST_FS_ACCESS:
-        case VMCS_GUEST_GS_ACCESS:
-        case VMCS_GUEST_LDTR_ACCESS:
-        case VMCS_GUEST_TR_ACCESS:
-        case VMCS_GUEST_INT_STATE:
-        case VMCS_GUEST_ACTIVITY_STATE:
-        case VMCS_GUEST_SMBASE:
-        case VMCS_GUEST_SYSENTER_CS:
-        case VMCS_HOST_SYSENTER_CS:
-            return 4;
+    struct vmcs_field_encoding * enc = (struct vmcs_field_encoding *)&field;
 
-
-       /* high bits of variable width fields
-        * We can probably just delete most of these....
-        */
-        case VMCS_IO_BITMAP_A_ADDR_HIGH:
-        case VMCS_IO_BITMAP_B_ADDR_HIGH:
-        case VMCS_MSR_BITMAP_HIGH:
-        case VMCS_EXIT_MSR_STORE_ADDR_HIGH:
-        case VMCS_EXIT_MSR_LOAD_ADDR_HIGH:
-        case VMCS_ENTRY_MSR_LOAD_ADDR_HIGH:
-        case VMCS_EXEC_PTR_HIGH:
-        case VMCS_TSC_OFFSET_HIGH:
-        case VMCS_VAPIC_ADDR_HIGH:
-       case VMCS_APIC_ACCESS_ADDR_HIGH:
-        case VMCS_LINK_PTR_HIGH:
-        case VMCS_GUEST_DBG_CTL_HIGH:
-        case VMCS_GUEST_PERF_GLOBAL_CTRL_HIGH:
-       case VMCS_HOST_PERF_GLOBAL_CTRL_HIGH:
-       case VMCS_GUEST_EFER_HIGH:
+    switch (enc->width)  {
+       case 0:
+            return 2;
+       case 1: {
+           if (enc->access_type == 1) {
+               return 4;
+           } else {
+#ifdef __V3_64BIT__
+               return 8;
+#else
+               return 4;
+#endif
+           }
+       }
+       case 2:
             return 4;
-
-            /* Natural Width Control Fields */
-        case VMCS_IO_BITMAP_A_ADDR:
-        case VMCS_IO_BITMAP_B_ADDR:
-        case VMCS_MSR_BITMAP:
-        case VMCS_EXIT_MSR_STORE_ADDR:
-        case VMCS_EXIT_MSR_LOAD_ADDR:
-        case VMCS_ENTRY_MSR_LOAD_ADDR:
-        case VMCS_EXEC_PTR:
-        case VMCS_TSC_OFFSET:
-        case VMCS_VAPIC_ADDR:
-       case VMCS_APIC_ACCESS_ADDR:
-        case VMCS_LINK_PTR:
-        case VMCS_GUEST_DBG_CTL:
-        case VMCS_GUEST_PERF_GLOBAL_CTRL:
-       case VMCS_HOST_PERF_GLOBAL_CTRL:
-        case VMCS_CR0_MASK:
-        case VMCS_CR4_MASK:
-        case VMCS_CR0_READ_SHDW:
-        case VMCS_CR4_READ_SHDW:
-        case VMCS_CR3_TGT_VAL_0:
-        case VMCS_CR3_TGT_VAL_1:
-        case VMCS_CR3_TGT_VAL_2:
-        case VMCS_CR3_TGT_VAL_3:
-        case VMCS_EXIT_QUAL:
-        case VMCS_IO_RCX:
-        case VMCS_IO_RSI:
-        case VMCS_IO_RDI:
-        case VMCS_IO_RIP:
-        case VMCS_GUEST_LINEAR_ADDR:
-        case VMCS_GUEST_CR0:
-        case VMCS_GUEST_CR3:
-        case VMCS_GUEST_CR4:
-        case VMCS_GUEST_ES_BASE:
-        case VMCS_GUEST_CS_BASE:
-        case VMCS_GUEST_SS_BASE:
-        case VMCS_GUEST_DS_BASE:
-        case VMCS_GUEST_FS_BASE:
-        case VMCS_GUEST_GS_BASE:
-        case VMCS_GUEST_LDTR_BASE:
-        case VMCS_GUEST_TR_BASE:
-        case VMCS_GUEST_GDTR_BASE:
-        case VMCS_GUEST_IDTR_BASE:
-        case VMCS_GUEST_DR7:
-        case VMCS_GUEST_RSP:
-        case VMCS_GUEST_RIP:
-        case VMCS_GUEST_RFLAGS:
-        case VMCS_GUEST_PENDING_DBG_EXCP:
-        case VMCS_GUEST_SYSENTER_ESP:
-        case VMCS_GUEST_SYSENTER_EIP:
-        case VMCS_HOST_CR0:
-        case VMCS_HOST_CR3:
-        case VMCS_HOST_CR4:
-        case VMCS_HOST_FS_BASE:
-        case VMCS_HOST_GS_BASE:
-        case VMCS_HOST_TR_BASE:
-        case VMCS_HOST_GDTR_BASE:
-        case VMCS_HOST_IDTR_BASE:
-        case VMCS_HOST_SYSENTER_ESP:
-        case VMCS_HOST_SYSENTER_EIP:
-        case VMCS_HOST_RSP:
-        case VMCS_HOST_RIP:
-       case VMCS_GUEST_EFER:
+       case 3:
             return sizeof(addr_t);
-
         default:
            PrintError("Invalid VMCS field: 0x%x\n", field);
             return -1;
index ea24ca3..cb38aab 100644 (file)
@@ -183,14 +183,13 @@ v3_cpu_arch_t v3_get_cpu_type(int cpu_id) {
 struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
     struct v3_vm_info * vm = v3_config_guest(cfg, priv_data);
 
-    V3_Print("CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
-
-
     if (vm == NULL) {
        PrintError("Could not configure guest\n");
        return NULL;
     }
 
+    V3_Print("CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
+
     if (name == NULL) {
        name = "[V3_VM]";
     } else if (strlen(name) >= 128) {
@@ -374,8 +373,6 @@ int v3_stop_vm(struct v3_vm_info * vm) {
            break;
        }
 
-       V3_Print("Yielding\n");
-
        v3_yield(NULL);
     }
     
similarity index 51%
rename from palacios/include/palacios/vmm_muxer.h
rename to palacios/src/palacios/vmm_barrier.c
index 1c50789..1115ce0 100644 (file)
@@ -1,4 +1,4 @@
-/* 
+/*
  * This file is part of the Palacios Virtual Machine Monitor developed
  * by the V3VEE Project with funding from the United States National 
  * Science Foundation and the Department of Energy.  
@@ -7,33 +7,15 @@
  * and the University of New Mexico.  You can find out more at 
  * http://www.v3vee.org
  *
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu> 
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
  * All rights reserved.
  *
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ * Author: Jack Lange <jacklangel@cs.pitt.edu>
  *
  * This is free software.  You are permitted to use,
  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  */
 
-#ifndef __VMM_MUXER_H__
-#define __VMM_MUXER_H__
 
-#ifdef __V3VEE__
-
-
-struct v3_vm_info;
-
-
-
-struct v3_vm_info * v3_get_foreground_vm();
-void v3_set_foreground_vm(struct v3_vm_info * vm);
-
-
-int v3_add_mux_notification(int (*focus_change)(struct v3_vm_info * old_vm, struct v3_vm_info * new_vm));
-
-
-#endif
-
-#endif
+#include <util/vmm_barrier.h>
index b1d747e..25d8b23 100644 (file)
@@ -410,6 +410,11 @@ static int post_config_core(struct guest_info * info, v3_cfg_tree_t * cfg) {
 
     info->core_run_state = CORE_STOPPED;
  
+    if (v3_init_core_extensions(info) == -1) {
+        PrintError("Error intializing extension core states\n");
+        return -1;
+    }
+
     if (info->vm_info->vm_class == V3_PC_VM) {
        if (post_config_pc_core(info, cfg) == -1) {
            PrintError("PC Post configuration failure\n");
@@ -552,6 +557,7 @@ int v3_free_config(struct v3_vm_info * vm) {
 
 
 
+
 static int setup_memory_map(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
     v3_cfg_tree_t * mem_region = v3_cfg_subtree(v3_cfg_subtree(cfg, "memmap"), "region");
 
index 9fcf197..0c7ef4c 100644 (file)
@@ -77,13 +77,21 @@ static int post_config_pc(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
     }
 
 
-    if (vm->num_cores > 1) {
+    if (vm->num_cores>1 && !v3_find_dev(vm,"apic")) { 
+       PrintError("palacios: VM has more than one core, but no device named \"apic\"!\n");
+       return -1;
+    } 
+    
+    if (v3_find_dev(vm,"apic")) { 
+       if (!v3_find_dev(vm,"ioapic")) { 
+           PrintError("palacios: VM cores have apics, but there is no device named \"ioapic\"!\n");
+       }
        if (v3_inject_mptable(vm) == -1) { 
            PrintError("Failed to inject mptable during configuration\n");
            return -1;
        }
     }
-
+    
     return 0;
 }
 
index a093bf7..ce7c244 100644 (file)
@@ -7,11 +7,10 @@
  * and the University of New Mexico.  You can find out more at 
  * http://www.v3vee.org
  *
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu> 
  * All rights reserved.
  *
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
  *
  * This is free software.  You are permitted to use,
  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
 #include <palacios/vmm_lowlevel.h>
 #include <palacios/vm_guest.h>
 
+struct masked_cpuid {
+    uint32_t rax_mask;
+    uint32_t rbx_mask;
+    uint32_t rcx_mask;
+    uint32_t rdx_mask;
+
+    uint32_t rax;
+    uint32_t rbx;
+    uint32_t rcx;
+    uint32_t rdx;
+};
+
 
 void v3_init_cpuid_map(struct v3_vm_info * vm) {
     vm->cpuid_map.map.rb_node = NULL;
+
+    // Setup default cpuid entries
+
+
+    // Disable XSAVE (cpuid 0x01, ECX bit 26)
+    v3_cpuid_add_fields(vm, 0x01, 0, 0, 0, 0, (1 << 26), 0, 0, 0);
+
 }
 
+
+
+
 int v3_deinit_cpuid_map(struct v3_vm_info * vm) {
     struct rb_node * node = v3_rb_first(&(vm->cpuid_map.map));
     struct v3_cpuid_hook * hook = NULL;
@@ -104,6 +125,105 @@ static struct v3_cpuid_hook * get_cpuid_hook(struct v3_vm_info * vm, uint32_t cp
 }
 
 
+
+static int mask_hook(struct guest_info * core, uint32_t cpuid, 
+             uint32_t * eax, uint32_t * ebx, 
+             uint32_t * ecx, uint32_t * edx,
+             void * priv_data) {
+    struct masked_cpuid * mask = (struct masked_cpuid *)priv_data;
+
+    v3_cpuid(cpuid, eax, ebx, ecx, edx);
+
+    *eax &= ~(mask->rax_mask);
+    *eax |= mask->rax;
+
+    *ebx &= ~(mask->rbx_mask);
+    *ebx |= mask->rbx;
+
+    *ecx &= ~(mask->rcx_mask);
+    *ecx |= mask->rcx;
+
+    *edx &= ~(mask->rdx_mask);
+    *edx |= mask->rdx;
+
+    return 0;
+}
+
+int v3_cpuid_add_fields(struct v3_vm_info * vm, uint32_t cpuid, 
+                       uint32_t rax_mask, uint32_t rax,
+                       uint32_t rbx_mask, uint32_t rbx, 
+                       uint32_t rcx_mask, uint32_t rcx, 
+                       uint32_t rdx_mask, uint32_t rdx) {
+    struct v3_cpuid_hook * hook = get_cpuid_hook(vm, cpuid);
+
+    if (hook == NULL) {
+       struct masked_cpuid * mask = V3_Malloc(sizeof(struct masked_cpuid));
+       memset(mask, 0, sizeof(struct masked_cpuid));
+       
+       mask->rax_mask = rax_mask;
+       mask->rax = rax;
+       mask->rbx_mask = rbx_mask;
+       mask->rbx = rbx;
+       mask->rcx_mask = rcx_mask;
+       mask->rcx = rcx;
+       mask->rdx_mask = rdx_mask;
+       mask->rdx = rdx;
+
+       if (v3_hook_cpuid(vm, cpuid, mask_hook, mask) == -1) {
+           PrintError("Error hooking cpuid %d\n", cpuid);
+           return -1;
+       }
+    } else {
+       struct masked_cpuid * mask = NULL;
+       uint32_t tmp_val = 0;
+
+       if (hook->hook_fn != mask_hook) {
+           PrintError("trying to add fields to a fully hooked cpuid (%d)\n", cpuid);
+           return -1;
+       }
+       
+       mask = (struct masked_cpuid *)(hook->private_data);
+
+       if ((mask->rax_mask & rax_mask) ||
+           (mask->rbx_mask & rbx_mask) || 
+           (mask->rcx_mask & rcx_mask) || 
+           (mask->rdx_mask & rdx_mask)) {
+           PrintError("Trying to add fields that have already been masked\n");
+           return -1;
+       }
+
+       if ((~rax_mask & rax) || (~rbx_mask & rbx) ||
+           (~rcx_mask & rcx) || (~rdx_mask & rdx)) {
+           PrintError("Invalid cpuid reg value (mask overrun)\n");
+           return -1;
+       }
+
+       mask->rax_mask |= rax_mask;
+       mask->rbx_mask |= rbx_mask;
+       mask->rcx_mask |= rcx_mask;
+       mask->rdx_mask |= rdx_mask;
+       
+       mask->rax |= rax;
+       tmp_val = (~rax_mask | rax);
+       mask->rax &= tmp_val;
+
+       mask->rbx |= rbx;
+       tmp_val = (~rbx_mask | rbx);
+       mask->rbx &= tmp_val;
+
+       mask->rcx |= rcx;
+       tmp_val = (~rcx_mask | rcx);
+       mask->rcx &= tmp_val;
+
+       mask->rdx |= rdx;
+       tmp_val = (~rdx_mask | rdx);
+       mask->rdx &= tmp_val;
+
+    }
+
+    return 0;
+}
+
 int v3_unhook_cpuid(struct v3_vm_info * vm, uint32_t cpuid) {
     struct v3_cpuid_hook * hook = get_cpuid_hook(vm, cpuid);
 
@@ -185,3 +305,8 @@ int v3_handle_cpuid(struct guest_info * info) {
 
     return 0;
 }
+
+
+
+
+
index 15a56d6..c05e09d 100644 (file)
@@ -314,7 +314,7 @@ static int run_str_op(struct guest_info * core, struct x86_instr * instr,
     struct rflags * flags_reg = (struct rflags *)&(core->ctrl_regs.rflags);
 
 
-    PrintError("Emulation_len=%d, tmp_rcx=%d\n", emulation_length, (uint_t)tmp_rcx);
+    PrintDebug("Emulation_len=%d, tmp_rcx=%d\n", emulation_length, (uint_t)tmp_rcx);
 
 
     if (instr->op_type == V3_OP_MOVS) {
index de1cfa0..1db9dc6 100644 (file)
@@ -69,6 +69,8 @@ int V3_init_extensions() {
 }
 
 
+
+
 int V3_deinit_extensions() {
     v3_free_htable(ext_table, 0, 0);
     return 0;
@@ -85,6 +87,15 @@ int v3_init_ext_manager(struct v3_vm_info * vm) {
     return 0;
 }
 
+
+int v3_deinit_ext_manager(struct v3_vm_info * vm)  {
+
+       PrintError("I should really do something here... \n");
+       return -1;
+}
+
+
+
 int v3_add_extension(struct v3_vm_info * vm, const char * name, v3_cfg_tree_t * cfg) {
     struct v3_extension_impl * impl = NULL;
     struct v3_extension * ext = NULL;
@@ -125,3 +136,34 @@ int v3_add_extension(struct v3_vm_info * vm, const char * name, v3_cfg_tree_t *
     
     return 0;
 }
+
+int v3_init_core_extensions(struct guest_info * core) {
+    struct v3_extension * ext = NULL;
+
+    list_for_each_entry(ext, &(core->vm_info->extensions.extensions), node) {
+       if ((ext->impl) && (ext->impl->core_init)) {
+           if (ext->impl->core_init(core, ext->priv_data) == -1) {
+               PrintError("Error configuring per core extension %s on core %d\n", 
+                          ext->impl->name, core->cpu_id);
+               return -1;
+           }
+       }
+    }
+
+    return 0;
+}
+
+
+
+
+void * v3_get_extension_state(struct v3_vm_info * vm, const char * name) {
+    struct v3_extension * ext = NULL;
+
+    list_for_each_entry(ext, &(vm->extensions.extensions), node) {
+       if (strncmp(ext->impl->name, name, strlen(ext->impl->name)) == 0) {
+           return ext->priv_data;
+       }
+    }
+
+    return NULL;
+}
index 432b9fb..3e6d09b 100644 (file)
@@ -20,7 +20,6 @@
 #include <palacios/vmm.h>
 #include <palacios/vmm_host_events.h>
 #include <palacios/vm_guest.h>
-#include <palacios/vmm_muxer.h>
 
 int v3_init_host_events(struct v3_vm_info * vm) {
     struct v3_host_events * host_evts = &(vm->host_event_hooks);
@@ -125,9 +124,6 @@ int v3_deliver_keyboard_event(struct v3_vm_info * vm,
     struct v3_host_events * host_evts = NULL;
     struct v3_host_event_hook * hook = NULL;
 
-    if (vm == NULL) {
-       vm = v3_get_foreground_vm();
-    }
 
     host_evts = &(vm->host_event_hooks);
 
@@ -150,9 +146,6 @@ int v3_deliver_mouse_event(struct v3_vm_info * vm,
     struct v3_host_events * host_evts = NULL;
     struct v3_host_event_hook * hook = NULL;
 
-    if (vm == NULL) {
-       vm = v3_get_foreground_vm();
-    }
 
     host_evts = &(vm->host_event_hooks);
 
@@ -175,9 +168,6 @@ int v3_deliver_timer_event(struct v3_vm_info * vm,
     struct v3_host_events * host_evts = NULL;
     struct v3_host_event_hook * hook = NULL;
 
-    if (vm == NULL) {
-       vm = v3_get_foreground_vm();
-    }
 
     host_evts = &(vm->host_event_hooks);
 
@@ -199,9 +189,6 @@ int v3_deliver_serial_event(struct v3_vm_info * vm,
     struct v3_host_events * host_evts = NULL;
     struct v3_host_event_hook * hook = NULL;
 
-    if (vm == NULL) {
-       vm = v3_get_foreground_vm();
-    }
 
     host_evts = &(vm->host_event_hooks);
 
@@ -225,9 +212,6 @@ int v3_deliver_console_event(struct v3_vm_info * vm,
     struct v3_host_events * host_evts = NULL;
     struct v3_host_event_hook * hook = NULL;
 
-    if (vm == NULL) {
-       vm = v3_get_foreground_vm();
-    }
 
     host_evts = &(vm->host_event_hooks);
 
@@ -250,9 +234,6 @@ int v3_deliver_packet_event(struct v3_vm_info * vm,
     struct v3_host_events * host_evts = NULL;
     struct v3_host_event_hook * hook = NULL;
 
-    if (vm == NULL) {
-       vm = v3_get_foreground_vm();
-    }
 
     host_evts = &(vm->host_event_hooks);
 
diff --git a/palacios/src/palacios/vmm_muxer.c b/palacios/src/palacios/vmm_muxer.c
deleted file mode 100644 (file)
index 01e8169..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-/* 
- * This file is part of the Palacios Virtual Machine Monitor developed
- * by the V3VEE Project with funding from the United States National 
- * Science Foundation and the Department of Energy.  
- *
- * The V3VEE Project is a joint project between Northwestern University
- * and the University of New Mexico.  You can find out more at 
- * http://www.v3vee.org
- *
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
- * All rights reserved.
- *
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
- *
- * This is free software.  You are permitted to use,
- * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
- */
-
-#include <palacios/vmm.h>
-#include <palacios/vmm_muxer.h>
-#include <palacios/vmm_list.h>
-
-
-
-static struct v3_vm_info * foreground_vm = NULL;
-
-// list of notification callbacks
-static LIST_HEAD(cb_list);
-
-
-struct mux_callback {
-    struct list_head cb_node;
-
-    int (*focus_change)(struct v3_vm_info * old_vm, struct v3_vm_info * new_vm);
-};
-
-
-struct v3_vm_info * v3_get_foreground_vm() {
-    return foreground_vm;
-}
-
-
-void v3_set_foreground_vm(struct v3_vm_info * vm) {
-    struct mux_callback * tmp_cb;
-
-    list_for_each_entry(tmp_cb, &(cb_list), cb_node) {
-       tmp_cb->focus_change(foreground_vm, vm);
-    }
-
-    foreground_vm = vm;
-}
-
-
-int v3_add_mux_notification(int (*focus_change)(struct v3_vm_info * old_vm, 
-                                               struct v3_vm_info * new_vm)) {
-
-    struct mux_callback * cb = (struct mux_callback *)V3_Malloc(sizeof(struct mux_callback));
-
-    cb->focus_change = focus_change;
-    
-    list_add(&(cb->cb_node), &cb_list);
-
-    return 0;
-}
index b06ff73..03cfb6d 100644 (file)
 
 #include <palacios/vmm_queue.h>
 
-void v3_init_queue(struct gen_queue * queue) {
+void v3_init_queue(struct v3_queue * queue) {
     queue->num_entries = 0;
     INIT_LIST_HEAD(&(queue->entries));
     v3_lock_init(&queue->lock);
 }
 
-struct gen_queue * v3_create_queue() {
-    struct gen_queue * tmp_queue = V3_Malloc(sizeof(struct gen_queue));
+struct v3_queue * v3_create_queue() {
+    struct v3_queue * tmp_queue = V3_Malloc(sizeof(struct v3_queue));
     v3_init_queue(tmp_queue);
     return tmp_queue;
 }
 
-void v3_enqueue(struct gen_queue * queue, addr_t entry) {
-    struct queue_entry * q_entry = V3_Malloc(sizeof(struct queue_entry));
+void v3_enqueue(struct v3_queue * queue, addr_t entry) {
+    struct v3_queue_entry * q_entry = V3_Malloc(sizeof(struct v3_queue_entry));
 
     v3_lock(queue->lock);
     q_entry->entry = entry;
@@ -42,13 +42,13 @@ void v3_enqueue(struct gen_queue * queue, addr_t entry) {
 }
 
 
-addr_t v3_dequeue(struct gen_queue * queue) {
+addr_t v3_dequeue(struct v3_queue * queue) {
     addr_t entry_val = 0;
 
     v3_lock(queue->lock);
     if (!list_empty(&(queue->entries))) {
        struct list_head * q_entry = queue->entries.next;
-       struct queue_entry * tmp_entry = list_entry(q_entry, struct queue_entry, entry_list);
+       struct v3_queue_entry * tmp_entry = list_entry(q_entry, struct v3_queue_entry, entry_list);
 
        entry_val = tmp_entry->entry;
        list_del(q_entry);
index e0e0ac7..4b54d71 100644 (file)
@@ -31,6 +31,8 @@
 #define PrintDebug(fmt, args...)
 #endif
 
+int v3_net_debug = 0;
+
 struct eth_hdr {
     uint8_t dst_mac[ETH_ALEN];
     uint8_t src_mac[ETH_ALEN];
@@ -45,11 +47,6 @@ struct vnet_dev {
     struct v3_vnet_dev_ops dev_ops;
     void * private_data;
 
-    int active;
-
-    uint64_t bytes_tx, bytes_rx;
-    uint32_t pkts_tx, pkt_rx;
-    
     struct list_head node;
 } __attribute__((packed));
 
@@ -60,7 +57,6 @@ struct vnet_brg_dev {
 
     uint8_t type;
 
-    int active;
     void * private_data;
 } __attribute__((packed));
 
@@ -85,6 +81,20 @@ struct route_list {
 } __attribute__((packed));
 
 
+struct queue_entry{
+    uint8_t use;
+    struct v3_vnet_pkt pkt;
+    uint8_t data[ETHERNET_PACKET_LEN];
+};
+
+#define VNET_QUEUE_SIZE 10240
+struct vnet_queue {
+       struct queue_entry buf[VNET_QUEUE_SIZE];
+       int head, tail;
+       int count;
+       v3_lock_t lock;
+};
+
 static struct {
     struct list_head routes;
     struct list_head devs;
@@ -97,10 +107,13 @@ static struct {
     v3_lock_t lock;
     struct vnet_stat stats;
 
-    struct hashtable * route_cache;
-} vnet_state;
+    void * pkt_flush_thread;
 
+    struct vnet_queue pkt_q;
 
+    struct hashtable * route_cache;
+} vnet_state;
+       
 
 #ifdef CONFIG_DEBUG_VNET
 static inline void mac_to_string(uint8_t * mac, char * buf) {
@@ -182,7 +195,8 @@ static int clear_hash_cache() {
     return 0;
 }
 
-static int look_into_cache(const struct v3_vnet_pkt * pkt, struct route_list ** routes) {
+static int look_into_cache(const struct v3_vnet_pkt * pkt, 
+                          struct route_list ** routes) {
     *routes = (struct route_list *)v3_htable_search(vnet_state.route_cache, (addr_t)(pkt->hash_buf));
    
     return 0;
@@ -306,8 +320,8 @@ static struct route_list * match_route(const struct v3_vnet_pkt * pkt) {
     int max_rank = 0;
     struct list_head match_list;
     struct eth_hdr * hdr = (struct eth_hdr *)(pkt->data);
-//    uint8_t src_type = pkt->src_type;
-  //  uint32_t src_link = pkt->src_id;
+    //    uint8_t src_type = pkt->src_type;
+    //  uint32_t src_link = pkt->src_id;
 
 #ifdef CONFIG_DEBUG_VNET
     {
@@ -425,19 +439,18 @@ static struct route_list * match_route(const struct v3_vnet_pkt * pkt) {
 }
 
 
-int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
+int vnet_tx_one_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
     struct route_list * matched_routes = NULL;
     unsigned long flags;
     int i;
 
-#ifdef CONFIG_DEBUG_VNET
-   {
-       int cpu = V3_Get_CPU();
-       PrintDebug("VNET/P Core: cpu %d: pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n",
+    int cpu = V3_Get_CPU();
+    V3_Net_Print(2, "VNET/P Core: cpu %d: pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n",
                  cpu, pkt->size, pkt->src_id, 
                  pkt->src_type, pkt->dst_id, pkt->dst_type);
-   }
-#endif
+    if(v3_net_debug >= 4){
+           v3_hexdump(pkt->data, pkt->size, NULL, 0);
+    }
 
     flags = v3_lock_irqsave(vnet_state.lock);
 
@@ -466,30 +479,30 @@ int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
     for (i = 0; i < matched_routes->num_routes; i++) {
        struct vnet_route_info * route = matched_routes->routes[i];
        
-        if (route->route_def.dst_type == LINK_EDGE) {
-           struct vnet_brg_dev *bridge = vnet_state.bridge;
-            pkt->dst_type = LINK_EDGE;
-            pkt->dst_id = route->route_def.dst_id;
+       if (route->route_def.dst_type == LINK_EDGE) {
+           struct vnet_brg_dev * bridge = vnet_state.bridge;
+           pkt->dst_type = LINK_EDGE;
+           pkt->dst_id = route->route_def.dst_id;
 
-           if (bridge == NULL || (bridge->active == 0)) {
-               PrintDebug("VNET/P Core: No active bridge to sent data to\n");
+           if (bridge == NULL) {
+               V3_Net_Print(2, "VNET/P Core: No active bridge to sent data to\n");
                 continue;
            }
 
            if(bridge->brg_ops.input(bridge->vm, pkt, bridge->private_data) < 0){
-                PrintDebug("VNET/P Core: Packet not sent properly to bridge\n");
+                V3_Net_Print(2, "VNET/P Core: Packet not sent properly to bridge\n");
                 continue;
            }         
            vnet_state.stats.tx_bytes += pkt->size;
            vnet_state.stats.tx_pkts ++;
         } else if (route->route_def.dst_type == LINK_INTERFACE) {
-            if (route->dst_dev == NULL || route->dst_dev->active == 0){
-               PrintDebug("VNET/P Core: No active device to sent data to\n");
+            if (route->dst_dev == NULL){
+                 V3_Net_Print(2, "VNET/P Core: No active device to sent data to\n");
                continue;
             }
 
            if(route->dst_dev->dev_ops.input(route->dst_dev->vm, pkt, route->dst_dev->private_data) < 0) {
-                PrintDebug("VNET/P Core: Packet not sent properly\n");
+                V3_Net_Print(2, "VNET/P Core: Packet not sent properly\n");
                 continue;
            }
            vnet_state.stats.tx_bytes += pkt->size;
@@ -502,6 +515,50 @@ int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
     return 0;
 }
 
+
+static int vnet_pkt_enqueue(struct v3_vnet_pkt * pkt){
+    unsigned long flags;
+    struct queue_entry * entry;
+    struct vnet_queue * q = &(vnet_state.pkt_q);
+
+    flags = v3_lock_irqsave(q->lock);
+
+    if (q->count >= VNET_QUEUE_SIZE){
+       V3_Net_Print(1, "VNET Queue overflow!\n");
+       v3_unlock_irqrestore(q->lock, flags);
+       return -1;
+    }
+       
+    q->count ++;
+    entry = &(q->buf[q->tail++]);
+    q->tail %= VNET_QUEUE_SIZE;
+       
+    v3_unlock_irqrestore(q->lock, flags);
+
+    /* this is ugly, but should happen very unlikely */
+    while(entry->use);
+
+    entry->pkt.data = entry->data;
+    memcpy(&(entry->pkt), pkt, sizeof(struct v3_vnet_pkt));
+    memcpy(entry->data, pkt->data, pkt->size);
+
+    entry->use = 1;
+
+    return 0;
+}
+
+
+int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data, int synchronize) {
+    if(synchronize){
+       vnet_tx_one_pkt(pkt, NULL);
+    }else {
+       vnet_pkt_enqueue(pkt);
+       V3_Net_Print(2, "VNET/P Core: Put pkt into Queue: pkt size %d\n", pkt->size);
+    }
+       
+    return 0;
+}
+
 int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac, 
                    struct v3_vnet_dev_ops *ops,
                    void * priv_data){
@@ -517,11 +574,9 @@ int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac,
    
     memcpy(new_dev->mac_addr, mac, 6);
     new_dev->dev_ops.input = ops->input;
-    new_dev->dev_ops.poll = ops->poll;
     new_dev->private_data = priv_data;
     new_dev->vm = vm;
     new_dev->dev_id = 0;
-    new_dev->active = 1;
 
     flags = v3_lock_irqsave(vnet_state.lock);
 
@@ -544,7 +599,6 @@ int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac,
 }
 
 
-
 int v3_vnet_del_dev(int dev_id){
     struct vnet_dev * dev = NULL;
     unsigned long flags;
@@ -566,6 +620,7 @@ int v3_vnet_del_dev(int dev_id){
     return 0;
 }
 
+
 int v3_vnet_stat(struct vnet_stat * stats){
        
     stats->rx_bytes = vnet_state.stats.rx_bytes;
@@ -604,12 +659,10 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm,
     struct vnet_brg_dev * tmp_bridge = NULL;    
     
     flags = v3_lock_irqsave(vnet_state.lock);
-
     if (vnet_state.bridge == NULL) {
        bridge_free = 1;
        vnet_state.bridge = (void *)1;
     }
-
     v3_unlock_irqrestore(vnet_state.lock, flags);
 
     if (bridge_free == 0) {
@@ -629,7 +682,6 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm,
     tmp_bridge->brg_ops.input = ops->input;
     tmp_bridge->brg_ops.poll = ops->poll;
     tmp_bridge->private_data = priv_data;
-    tmp_bridge->active = 1;
     tmp_bridge->type = type;
        
     /* make this atomic to avoid possible race conditions */
@@ -641,20 +693,39 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm,
 }
 
 
-void v3_vnet_do_poll(struct v3_vm_info * vm){
-    struct vnet_dev * dev = NULL;
+static int vnet_tx_flush(void *args){
+    unsigned long flags;
+    struct queue_entry * entry;
+    struct vnet_queue * q = &(vnet_state.pkt_q);
 
-    /* TODO: run this on separate threads
-      * round-robin schedule, with maximal budget for each poll
-      */
-    list_for_each_entry(dev, &(vnet_state.devs), node) {
-           if(dev->dev_ops.poll != NULL){
-               dev->dev_ops.poll(vm, -1, dev->private_data);
-           }
+    V3_Print("VNET/P Handing Pkt Thread Starting ....\n");
+
+    //V3_THREAD_SLEEP();
+    /* we need thread sleep/wakeup in Palacios */
+    while(1){
+       flags = v3_lock_irqsave(q->lock);
+
+       if (q->count <= 0){
+           v3_unlock_irqrestore(q->lock, flags);
+           v3_yield(NULL);
+           //V3_THREAD_SLEEP();
+       }else {
+           q->count --;
+           entry = &(q->buf[q->head++]);
+           q->head %= VNET_QUEUE_SIZE;
+
+           v3_unlock_irqrestore(q->lock, flags);
+
+           /* this is ugly, but should happen very unlikely */
+           while(!entry->use);
+           vnet_tx_one_pkt(&(entry->pkt), NULL);
+           entry->use = 0;
+
+           V3_Net_Print(2, "vnet_tx_flush: pkt (size %d)\n", entry->pkt.size);   
+       }
     }
 }
 
-
 int v3_init_vnet() {
     memset(&vnet_state, 0, sizeof(vnet_state));
        
@@ -669,12 +740,15 @@ int v3_init_vnet() {
     }
 
     vnet_state.route_cache = v3_create_htable(0, &hash_fn, &hash_eq);
-
     if (vnet_state.route_cache == NULL) {
         PrintError("VNET/P Core: Fails to initiate route cache\n");
         return -1;
     }
 
+    v3_lock_init(&(vnet_state.pkt_q.lock));
+
+    vnet_state.pkt_flush_thread = V3_CREATE_THREAD(vnet_tx_flush, NULL, "VNET_Pkts");
+
     PrintDebug("VNET/P Core is initiated\n");
 
     return 0;
index 9f3d7ac..80fbfde 100644 (file)
@@ -465,7 +465,7 @@ int v3_decode(struct guest_info * info, addr_t instr_ptr, struct x86_instr * ins
            }
        }
 
-       V3_Print("Operand 0 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op)));
+//     V3_Print("Operand 0 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op)));
 
 
        if (xed_operand_read(op)) {
@@ -555,7 +555,7 @@ int v3_decode(struct guest_info * info, addr_t instr_ptr, struct x86_instr * ins
            }
        }
 
-       V3_Print("Operand 1 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op)));
+//     V3_Print("Operand 1 mode: %s\n", xed_operand_action_enum_t2str(xed_operand_rw(op)));
 
        if (xed_operand_read(op)) {
            v3_op->read = 1;
index eb79fa3..4326788 100644 (file)
@@ -34,6 +34,7 @@
 #include <palacios/vmx_io.h>
 #include <palacios/vmx_msr.h>
 
+#include <palacios/vmx_hw_info.h>
 
 #ifndef CONFIG_DEBUG_VMX
 #undef PrintDebug
 #endif
 
 
-static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
+/* These fields contain the hardware feature sets supported by the local CPU */
+static struct vmx_hw_info hw_info;
+
+
 static addr_t active_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
+static addr_t host_vmcs_ptrs[CONFIG_MAX_CPUS] = { [0 ... CONFIG_MAX_CPUS - 1] = 0};
 
 extern int v3_vmx_launch(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
 extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, struct v3_ctrl_regs * ctrl_regs);
@@ -50,7 +55,7 @@ extern int v3_vmx_resume(struct v3_gprs * vm_regs, struct guest_info * info, str
 static int inline check_vmcs_write(vmcs_field_t field, addr_t val) {
     int ret = 0;
 
-    ret = vmcs_write(field,val);
+    ret = vmcs_write(field, val);
 
     if (ret != VMX_SUCCESS) {
         PrintError("VMWRITE error on %s!: %d\n", v3_vmcs_field_to_str(field), ret);
@@ -76,7 +81,6 @@ static int inline check_vmcs_read(vmcs_field_t field, void * val) {
 
 
 static addr_t allocate_vmcs() {
-    reg_ex_t msr;
     struct vmcs_data * vmcs_page = NULL;
 
     PrintDebug("Allocating page\n");
@@ -84,10 +88,8 @@ static addr_t allocate_vmcs() {
     vmcs_page = (struct vmcs_data *)V3_VAddr(V3_AllocPages(1));
     memset(vmcs_page, 0, 4096);
 
-    v3_get_msr(VMX_BASIC_MSR, &(msr.e_reg.high), &(msr.e_reg.low));
-    
-    vmcs_page->revision = ((struct vmx_basic_msr*)&msr)->revision;
-    PrintDebug("VMX Revision: 0x%x\n",vmcs_page->revision);
+    vmcs_page->revision = hw_info.basic_info.revision;
+    PrintDebug("VMX Revision: 0x%x\n", vmcs_page->revision);
 
     return (addr_t)V3_PAddr((void *)vmcs_page);
 }
@@ -388,7 +390,7 @@ static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state)
     // reenable global interrupts for vm state initialization now
     // that the vm state is initialized. If another VM kicks us off, 
     // it'll update our vmx state so that we know to reload ourself
-    v3_disable_ints();
+    v3_enable_ints();
 
     return 0;
 }
@@ -641,6 +643,13 @@ int v3_vmx_enter(struct guest_info * info) {
     // disable global interrupts for vm state transition
     v3_disable_ints();
 
+
+    if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
+       vmcs_load(vmx_info->vmcs_ptr_phys);
+       active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
+    }
+
+
     v3_vmx_restore_vmcs(info);
 
 
@@ -666,10 +675,6 @@ int v3_vmx_enter(struct guest_info * info) {
     check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
     check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
 
-    if (active_vmcs_ptrs[V3_Get_CPU()] != vmx_info->vmcs_ptr_phys) {
-       vmcs_load(vmx_info->vmcs_ptr_phys);
-       active_vmcs_ptrs[V3_Get_CPU()] = vmx_info->vmcs_ptr_phys;
-    }
 
     if (vmx_info->state == VMX_UNLAUNCHED) {
        vmx_info->state = VMX_LAUNCHED;
@@ -726,10 +731,15 @@ int v3_vmx_enter(struct guest_info * info) {
     update_irq_exit_state(info);
 #endif
 
-    // Handle any exits needed still in the atomic section
-    if (v3_handle_atomic_vmx_exit(info, &exit_info) == -1) {
-       PrintError("Error in atomic VMX exit handler\n");
-       return -1;
+    if (exit_info.exit_reason == VMEXIT_INTR_WINDOW) {
+       // This is a special case whose only job is to inject an interrupt
+       vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
+        vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
+        vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
+
+#ifdef CONFIG_DEBUG_INTERRUPTS
+        PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip);
+#endif
     }
 
     // reenable global interrupts after vm exit
@@ -807,6 +817,12 @@ int v3_start_vmx_guest(struct guest_info * info) {
 }
 
 
+
+
+#define VMX_FEATURE_CONTROL_MSR     0x0000003a
+#define CPUID_VMX_FEATURES 0x00000005  /* LOCK and VMXON */
+#define CPUID_1_ECX_VTXFLAG 0x00000020
+
 int v3_is_vmx_capable() {
     v3_msr_t feature_msr;
     uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
@@ -820,7 +836,7 @@ int v3_is_vmx_capable() {
        
         PrintDebug("MSRREGlow: 0x%.8x\n", feature_msr.lo);
 
-        if ((feature_msr.lo & FEATURE_CONTROL_VALID) != FEATURE_CONTROL_VALID) {
+        if ((feature_msr.lo & CPUID_VMX_FEATURES) != CPUID_VMX_FEATURES) {
             PrintDebug("VMX is locked -- enable in the BIOS\n");
             return 0;
         }
@@ -833,82 +849,23 @@ int v3_is_vmx_capable() {
     return 1;
 }
 
-static int has_vmx_nested_paging() {
-    return 0;
-}
 
 
 
-void v3_init_vmx_cpu(int cpu_id) {
-    extern v3_cpu_arch_t v3_cpu_types[];
-    struct v3_msr tmp_msr;
-    uint64_t ret = 0;
 
-    v3_get_msr(VMX_CR4_FIXED0_MSR, &(tmp_msr.hi), &(tmp_msr.lo));
 
-#ifdef __V3_64BIT__
-    __asm__ __volatile__ (
-                         "movq %%cr4, %%rbx;"
-                         "orq  $0x00002000, %%rbx;"
-                         "movq %%rbx, %0;"
-                         : "=m"(ret) 
-                         :
-                         : "%rbx"
-                         );
-
-    if ((~ret & tmp_msr.value) == 0) {
-        __asm__ __volatile__ (
-                             "movq %0, %%cr4;"
-                             :
-                             : "q"(ret)
-                             );
-    } else {
-        PrintError("Invalid CR4 Settings!\n");
-        return;
-    }
+void v3_init_vmx_cpu(int cpu_id) {
+    extern v3_cpu_arch_t v3_cpu_types[];
 
-    __asm__ __volatile__ (
-                         "movq %%cr0, %%rbx; "
-                         "orq  $0x00000020,%%rbx; "
-                         "movq %%rbx, %%cr0;"
-                         :
-                         :
-                         : "%rbx"
-                         );
-#elif __V3_32BIT__
-    __asm__ __volatile__ (
-                         "movl %%cr4, %%ecx;"
-                         "orl  $0x00002000, %%ecx;"
-                         "movl %%ecx, %0;"
-                         : "=m"(ret) 
-                         :
-                         : "%ecx"
-                         );
-
-    if ((~ret & tmp_msr.value) == 0) {
-        __asm__ __volatile__ (
-                             "movl %0, %%cr4;"
-                             :
-                             : "q"(ret)
-                             );
-    } else {
-        PrintError("Invalid CR4 Settings!\n");
-        return;
+    if (cpu_id == 0) {
+       if (v3_init_vmx_hw(&hw_info) == -1) {
+           PrintError("Could not initialize VMX hardware features on cpu %d\n", cpu_id);
+           return;
+       }
     }
 
-    __asm__ __volatile__ (
-                         "movl %%cr0, %%ecx; "
-                         "orl  $0x00000020,%%ecx; "
-                         "movl %%ecx, %%cr0;"
-                         :
-                         :
-                         : "%ecx"
-                         );
 
-#endif
-
-    //
-    // Should check and return Error here.... 
+    enable_vmx();
 
 
     // Setup VMXON Region
@@ -916,7 +873,7 @@ void v3_init_vmx_cpu(int cpu_id) {
 
     PrintDebug("VMXON pointer: 0x%p\n", (void *)host_vmcs_ptrs[cpu_id]);
 
-    if (v3_enable_vmx(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
+    if (vmx_on(host_vmcs_ptrs[cpu_id]) == VMX_SUCCESS) {
         PrintDebug("VMX Enabled\n");
     } else {
         PrintError("VMX initialization failure\n");
@@ -924,11 +881,8 @@ void v3_init_vmx_cpu(int cpu_id) {
     }
     
 
-    if (has_vmx_nested_paging() == 1) {
-        v3_cpu_types[cpu_id] = V3_VMX_EPT_CPU;
-    } else {
-        v3_cpu_types[cpu_id] = V3_VMX_CPU;
-    }
+    v3_cpu_types[cpu_id] = V3_VMX_CPU;
+
 
 }
 
diff --git a/palacios/src/palacios/vmx_ept.c b/palacios/src/palacios/vmx_ept.c
new file mode 100644 (file)
index 0000000..42ca942
--- /dev/null
@@ -0,0 +1,19 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jacklange@cs.pitt.edu> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jacklange@cs.pitt.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
index 4066bf2..e5da762 100644 (file)
 #endif
 
 /* At this point the GPRs are already copied into the guest_info state */
-int v3_handle_atomic_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_info) {
-    struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data);
-
-    switch (exit_info->exit_reason) {
-        case VMEXIT_INTR_WINDOW:
-           // This is here because we touch the VMCS 
-           vmcs_read(VMCS_PROC_CTRLS, &(vmx_info->pri_proc_ctrls.value));
-            vmx_info->pri_proc_ctrls.int_wndw_exit = 0;
-            vmcs_write(VMCS_PROC_CTRLS, vmx_info->pri_proc_ctrls.value);
-
-#ifdef CONFIG_DEBUG_INTERRUPTS
-            PrintDebug("Interrupts available again! (RIP=%llx)\n", info->rip);
-#endif
-            break;
-    }
-    return 0;
-}
-
-/* At this point the GPRs are already copied into the guest_info state */
 int v3_handle_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_info) {
     /*
       PrintError("Handling VMEXIT: %s (%u), %lu (0x%lx)\n", 
diff --git a/palacios/src/palacios/vmx_hw_info.c b/palacios/src/palacios/vmx_hw_info.c
new file mode 100644 (file)
index 0000000..3220e52
--- /dev/null
@@ -0,0 +1,113 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2011, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2011, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_lowlevel.h>
+#include <palacios/vmx_hw_info.h>
+#include <palacios/vmm_msr.h>
+
+// Intel VMX Feature MSRs
+
+
+
+static int get_ex_ctrl_caps(struct vmx_hw_info * hw_info, struct vmx_ctrl_field * field, 
+                 uint32_t old_msr, uint32_t true_msr) {
+    uint32_t old_0;  /* Bit is 1 => MB1 */
+    uint32_t old_1;  /* Bit is 0 => MBZ */
+    uint32_t true_0; /* Bit is 1 => MB1 */
+    uint32_t true_1; /* Bit is 0 => MBZ */
+
+    v3_get_msr(old_msr, &old_1, &old_0);
+    field->def_val = old_0;
+
+    if (hw_info->basic_info.def1_maybe_0) {
+       v3_get_msr(true_msr, &true_1, &true_0);
+    } else {
+       true_0 = old_0;
+       true_1 = old_1;
+    }
+    
+    field->req_val = true_0;
+    field->req_mask = ~(true_1 ^ true_0);
+
+    return 0;
+}
+
+
+static int get_ctrl_caps(struct vmx_ctrl_field * field, uint32_t msr) {
+    uint32_t mbz = 0; /* Bit is 0 => MBZ */
+    uint32_t mb1 = 0; /* Bit is 1 => MB1 */
+    
+    v3_get_msr(msr, &mbz, &mb1);
+    
+    field->def_val = mb1;
+    field->req_val = mb1;
+    field->req_mask = ~(mbz ^ mb1);
+
+    return 0;
+}
+
+
+
+static int get_cr_fields(struct vmx_cr_field * field, uint32_t fixed_1_msr, uint32_t fixed_0_msr) {
+    struct v3_msr mbz; /* Bit is 0 => MBZ */
+    struct v3_msr mb1; /* Bit is 0 => MBZ */
+
+    v3_get_msr(fixed_1_msr, &(mbz.hi), &(mbz.lo));
+    v3_get_msr(fixed_0_msr, &(mb1.hi), &(mb1.lo));
+     
+    field->def_val = mb1.value;
+    field->req_val = mb1.value;
+    field->req_mask = ~(mbz.value ^ mb1.value);
+
+    return 0;
+}
+
+
+
+
+
+int v3_init_vmx_hw(struct vmx_hw_info * hw_info) {
+    //  extern v3_cpu_arch_t v3_cpu_types[];
+
+    memset(hw_info, 0, sizeof(struct vmx_hw_info));
+
+    v3_get_msr(VMX_BASIC_MSR, &(hw_info->basic_info.hi), &(hw_info->basic_info.lo));
+    v3_get_msr(VMX_MISC_MSR, &(hw_info->misc_info.hi), &(hw_info->misc_info.lo));
+    v3_get_msr(VMX_EPT_VPID_CAP_MSR, &(hw_info->ept_info.hi), &(hw_info->ept_info.lo));
+
+    PrintError("BASIC_MSR: Lo: %x, Hi: %x\n", hw_info->basic_info.lo, hw_info->basic_info.hi);
+
+    get_ex_ctrl_caps(hw_info, &(hw_info->pin_ctrls), VMX_PINBASED_CTLS_MSR, VMX_TRUE_PINBASED_CTLS_MSR);
+    get_ex_ctrl_caps(hw_info, &(hw_info->proc_ctrls), VMX_PROCBASED_CTLS_MSR, VMX_TRUE_PROCBASED_CTLS_MSR);
+    get_ex_ctrl_caps(hw_info, &(hw_info->exit_ctrls), VMX_EXIT_CTLS_MSR, VMX_TRUE_EXIT_CTLS_MSR);
+    get_ex_ctrl_caps(hw_info, &(hw_info->entry_ctrls), VMX_ENTRY_CTLS_MSR, VMX_TRUE_ENTRY_CTLS_MSR);
+
+    /* Get secondary PROCBASED controls if secondary controls are available (optional or required) */
+    /* Intel Manual 3B. Sect. G.3.3 */
+    if ( ((hw_info->proc_ctrls.req_mask & 0x80000000) == 0) || 
+        ((hw_info->proc_ctrls.req_val & 0x80000000) == 1) ) {
+       get_ctrl_caps(&(hw_info->proc_ctrls_2), VMX_PROCBASED_CTLS2_MSR);
+    }
+    
+    get_cr_fields(&(hw_info->cr0), VMX_CR0_FIXED1_MSR, VMX_CR0_FIXED0_MSR);
+    get_cr_fields(&(hw_info->cr4), VMX_CR4_FIXED1_MSR, VMX_CR4_FIXED0_MSR);
+
+    return 0;
+}