Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Merge branch 'devel' of palacios@newskysaw.cs.northwestern.edu:/home/palacios/palacio...
Lei Xia [Wed, 3 Nov 2010 20:03:52 +0000 (15:03 -0500)]
Conflicts:

palacios/src/palacios/vmm.c

52 files changed:
Kconfig
Makefile
palacios/include/devices/apic.h
palacios/include/devices/icc_bus.h [deleted file]
palacios/include/devices/vnet.h [deleted file]
palacios/include/palacios/vm_guest.h
palacios/include/palacios/vmcs_gen.h [deleted file]
palacios/include/palacios/vmm.h
palacios/include/palacios/vmm_console.h
palacios/include/palacios/vmm_file.h
palacios/include/palacios/vmm_mem.h
palacios/include/palacios/vmm_time.h
palacios/include/palacios/vmm_types.h
palacios/src/devices/8254.c
palacios/src/devices/Kconfig
palacios/src/devices/Makefile
palacios/src/devices/apic.c
palacios/src/devices/curses_cons.c
palacios/src/devices/icc_bus.c [deleted file]
palacios/src/devices/io_apic.c
palacios/src/palacios/mmu/Makefile
palacios/src/palacios/mmu/vmm_shdw_pg_cache.c [new file with mode: 0644]
palacios/src/palacios/mmu/vmm_shdw_pg_cache_32.h [new file with mode: 0644]
palacios/src/palacios/mmu/vmm_shdw_pg_swapbypass.c
palacios/src/palacios/mmu/vmm_shdw_pg_tlb_32.h
palacios/src/palacios/mmu/vmm_shdw_pg_tlb_64.h
palacios/src/palacios/svm.c
palacios/src/palacios/svm_handler.c
palacios/src/palacios/vm_guest.c
palacios/src/palacios/vmm.c
palacios/src/palacios/vmm_config.c
palacios/src/palacios/vmm_config_class.h
palacios/src/palacios/vmm_direct_paging_64.h
palacios/src/palacios/vmm_halt.c
palacios/src/palacios/vmm_mem.c
palacios/src/palacios/vmm_msr.c
palacios/src/palacios/vmm_shadow_paging.c
palacios/src/palacios/vmm_time.c
palacios/src/palacios/vmx.c
palacios/src/palacios/vmx_handler.c
test/geekos_test_vm/build/Makefile
test/geekos_test_vm/build/depend.mak
test/geekos_test_vm/include/geekos/segment.h
test/geekos_test_vm/src/common/fmtout.c
test/geekos_test_vm/src/geekos/bget.c
test/geekos_test_vm/src/geekos/main.c
test/geekos_test_vm/src/geekos/malloc.c
test/geekos_test_vm/src/geekos/mem.c
test/geekos_test_vm/src/geekos/screen.c
test/geekos_test_vm/src/geekos/serial.c
test/geekos_test_vm/src/geekos/vm_cons.c
utils/guest_creator/default.xml

diff --git a/Kconfig b/Kconfig
index c4aacfb..0910a5f 100644 (file)
--- a/Kconfig
+++ b/Kconfig
@@ -188,7 +188,6 @@ endmenu
 source "Kconfig.stdlibs"
 
 
-
 menu "Virtual Paging"
 
 config SHADOW_PAGING
@@ -213,9 +212,29 @@ config DEBUG_SHDW_PG_VTLB
        help
          Enables debugging messages for VTLB implementation
 
+config SHADOW_PAGING_CACHE1
+       bool "Shadow Page Cache (1)"
+       default y
+       depends on SHADOW_PAGING
+       help 
+          Enables caching implemenation for shadow paging
+
+
 endmenu
 
 
+menu "Time Management"
+
+config TIME_VIRTUALIZE_TSC
+       bool "Virtualize guest TSC"
+       default n
+       help
+           Virtualize the processor time stamp counter in the guest, 
+           generally increasing consistency between various time sources 
+           but also potentially making guest time run slower than real time.
+
+endmenu
+
 menu "Symbiotic Functions"
 
 config SYMBIOTIC
@@ -323,7 +342,12 @@ config DEBUG_INTERRUPTS
        help 
          This turns on debugging for the interrupt system
 
-
+config DEBUG_TIME
+       bool "Timing"
+       default n
+       depends on DEBUG_ON
+       help
+         This turns on debugging of system time virtualization
 
 config DEBUG_IO
        bool "IO"
index bb5c155..9d77f82 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -818,7 +818,7 @@ endif
 ALLSOURCE_ARCHS := $(ARCH)
 
 define all-sources
-       ( find $(__srctree)/palacios $(RCS_FIND_IGNORE) \
+       ( find $(__srctree)palacios $(RCS_FIND_IGNORE) \
               \( -name lib \) -prune -o \
               -name '*.[chS]' -print; )
 endef
index e4452e7..40415e6 100644 (file)
 #include <palacios/vmm_dev_mgr.h>
 
 
+typedef enum {IPI_FIXED = 0,
+             IPI_LOWEST_PRIO = 1,
+             IPI_SMI = 2,
+             IPI_NMI = 4,
+             IPI_INIT = 5,
+             IPI_EXINT = 7 } ipi_mode_t; 
+
+
+struct v3_gen_ipi {
+    uint8_t vector;
+    ipi_mode_t mode;
+
+    uint8_t logical      : 1;
+    uint8_t trigger_mode : 1;
+    uint8_t dst_shorthand : 2;
+
+    uint8_t dst;
+} __attribute__((packed));
+
+int v3_apic_send_ipi(struct v3_vm_info * vm, struct vm_device * dev, 
+                    struct v3_gen_ipi * ipi);
+
+int v3_apic_raise_intr(struct v3_vm_info * vm, struct vm_device * apic_dev, 
+                      uint32_t irq, uint32_t dst);
+
 
-int v3_apic_raise_intr(struct guest_info * info, struct vm_device * apic_dev, int intr_num);
 
 
 #endif // ! __V3VEE__
diff --git a/palacios/include/devices/icc_bus.h b/palacios/include/devices/icc_bus.h
deleted file mode 100644 (file)
index 4084340..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * This file is part of the Palacios Virtual Machine Monitor developed
- * by the V3VEE Project with funding from the United States National
- * Science Foundation and the Department of Energy.
- *
- * The V3VEE Project is a joint project between Northwestern University
- * and the University of New Mexico.  You can find out more at
- * http://www.v3vee.org
- *
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
- * All rights reserved.
- *
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
- *
- * This is free software.  You are permitted to use,
- * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
- */
-
-#ifndef ICC_BUS_H_
-#define ICC_BUS_H_
-
-
-struct v3_icc_ops {
-    int (*raise_intr)(struct guest_info * core, int intr_num, void * private_data);
-    int (*should_deliver_flat)(struct guest_info * core, uint8_t mda, void * private_data);
-    int (*should_deliver_cluster)(struct guest_info * core, uint8_t mda, void * private_data);
-};
-
-
-/**
- *
- */
-int v3_icc_register_apic(struct guest_info *core, struct vm_device * icc_bus, uint8_t apic_phys_id, struct v3_icc_ops * ops, void * priv_data);
-int v3_icc_register_ioapic(struct v3_vm_info *vm, struct vm_device * icc_bus, uint8_t apic_phys_id);
-
-/**
- * Send an inter-processor interrupt (IPI) from one local APIC to another local APIC.
- *
- * @param icc_bus  - The ICC bus that routes IPIs.
- * @param apic_src - The source APIC id.
- * @param apic_num - The remote APIC number.
- * @param icr      - A copy of the APIC's ICR.  (LAPIC-style ICR, clone from redir table for ioapics)
- * @param dfr      - A copy of the APIC's DFR   (LAPIC-style DFR)
- & @param extirq   - irq for external interrupts (e.g., from 8259)
- */
-int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t apic_src, uint64_t icr, uint32_t dfr, uint32_t ext_irq);
-
-
-#if 0
-/**
- * Send an IRQinter-processor interrupt (IPI) from one local APIC to another local APIC.
- *
- * @param icc_bus  - The ICC bus that routes IPIs.
- * @param apic_src - The source APIC id.
- * @param apic_num - The remote APIC number.
- * @param icrlo    - The low 32 bites of the APIC's ICR.
- */
-int v3_icc_send_irq(struct vm_device * icc_bus, uint32_t ioapic_src, uint8_t apic_num, uint8_t irq);
-
-#endif
-
-
-#endif /* ICC_BUS_H_ */
diff --git a/palacios/include/devices/vnet.h b/palacios/include/devices/vnet.h
deleted file mode 100644 (file)
index 91cde55..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-/* 
- * This file is part of the Palacios Virtual Machine Monitor developed
- * by the V3VEE Project with funding from the United States National 
- * Science Foundation and the Department of Energy.  
- *
- * The V3VEE Project is a joint project between Northwestern University
- * and the University of New Mexico.  You can find out more at 
- * http://www.v3vee.org
- *
- * Copyright (c) 2009, Lei Xia <lxia@northwestern.edu> 
- * Copyright (c) 2009, Yuan Tang <ytang@northwestern.edu> 
- * Copyright (c) 2009, Jack Lange <jarusl@cs.northwestern.edu> 
- * Copyright (c) 2009, Peter Dinda <pdinda@northwestern.edu
- * Copyright (c) 2009, The V3VEE Project <http://www.v3vee.org> 
- * All rights reserved.
- *
- * Author: Lei Xia <lxia@northwestern.edu>
- *               Yuan Tang <ytang@northwestern.edu>
- *               Jack Lange <jarusl@cs.northwestern.edu> 
- *               Peter Dinda <pdinda@northwestern.edu
- *
- * This is free software.  You are permitted to use,
- * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
- */
-
-#ifndef __VNET_H__
-#define __VNET_H__
-
-#include <palacios/vmm.h>
-#include <palacios/vmm_string.h>
-#include <palacios/vmm_types.h>
-#include <palacios/vmm_queue.h>
-#include <palacios/vmm_socket.h>
-#include <palacios/vmm_hashtable.h>
-
-
-#define ETHERNET_HEADER_LEN 14
-#define ETHERNET_DATA_MIN   46
-#define ETHERNET_DATA_MAX   1500
-#define ETHERNET_PACKET_LEN (ETHERNET_HEADER_LEN + ETHERNET_DATA_MAX)
-
-//the routing entry
-struct routing {
-    char src_mac[6];
-    char dest_mac[6];
-
-    int src_mac_qual;
-    int dest_mac_qual;
-
-    int dest;
-    int type; //EDGE_TYPE|INTERFACE_TYPE
-    int src;
-    int src_type;
-
-    int use;
-
-    int next;
-    int prev;
-};
-
- //This is the structure that stores the topology 
-struct topology {
-    SOCK link_sock;
-
-    unsigned long dest;
-
-    // Port for UDP
-    unsigned short remote_port;
-
-    // LINK OR GATEWAY
-    // int link_class;
-
-    int use;
-    int type; //TCP=0, UDP=1,VTP=2, can be extended so on
-
-    int next;
-    int prev;
-};
-
-struct sock_list {
-    SOCK sock;
-
-    int next;
-    int prev;
-};
-
-
-#define GENERAL_NIC 0
-
-struct vnet_if_device {
-    char name[50];
-    struct ethAddr device_addr;
-    
-    int (*input)(uchar_t * pkt, uint_t size);
-    
-    void * data;
-};
-
-
-struct device_list {
-    struct vnet_if_device *device;
-
-    int use;
-    int type;
-
-    int next;
-    int prev;
-};
-
-// 14 (ethernet frame) + 20 bytes
-struct HEADERS {
-    char ethernetdest[6];
-    char ethernetsrc[6];
-    unsigned char ethernettype[2]; // indicates layer 3 protocol type
-    char ip[20];
-};
-
-#define FOREACH(iter, list, start) for (iter = start; iter != -1; iter = list[iter].next)
-#define FOREACH_SOCK(iter, socks, start) FOREACH(iter, socks, start)
-#define FOREACH_LINK(iter, links, start) FOREACH(iter, links, start)
-#define FOREACH_ROUTE(iter, routes, start) FOREACH(iter, routes, start)
-#define FOREACH_DEVICE(iter, devices, start) FOREACH(iter, devices, start)
-
-
-int V3_Send_pkt(uchar_t *buf, int length);
-int V3_Register_pkt_event(int (*netif_input)(uchar_t * pkt, uint_t size));
-
-
-int vnet_send_pkt(char *buf, int length);
-int vnet_register_pkt_event(char *dev_name, int (*netif_input)(uchar_t * pkt, uint_t size), void *data);
-
-int vnet_pkt_process();
-
-void vnet_init();
-
-#endif
-
-
index 322f390..d9ccd54 100644 (file)
@@ -71,7 +71,8 @@ struct guest_info {
        uint32_t flags;
        struct {
            uint8_t use_large_pages        : 1;    /* Enable virtual page tables to use large pages */
-           uint32_t rsvd                  : 31;
+           uint8_t use_giant_pages        : 1;    /* Enable virtual page tables to use giant (1GB) pages */
+           uint32_t rsvd                  : 30;
        } __attribute__((packed));
     } __attribute__((packed));
 
@@ -113,9 +114,13 @@ struct guest_info {
     struct v3_sym_core_state sym_core_state;
 #endif
 
+    /* Per-core config tree data. */
+    v3_cfg_tree_t * core_cfg_data;
 
     struct v3_vm_info * vm_info;
 
+    v3_core_operating_mode_t core_run_state;
+
     /* the logical cpu on which this core runs */
     uint32_t cpu_id;
 };
@@ -168,9 +173,13 @@ struct v3_vm_info {
 
     uint64_t yield_cycle_period;  
 
+
+    void * host_priv_data;
+
     int num_cores;
-    struct guest_info cores[0];
 
+    // JRL: This MUST be the last entry...
+    struct guest_info cores[0];
 };
 
 int v3_init_vm(struct v3_vm_info * vm);
diff --git a/palacios/include/palacios/vmcs_gen.h b/palacios/include/palacios/vmcs_gen.h
deleted file mode 100644 (file)
index a78990b..0000000
+++ /dev/null
@@ -1,809 +0,0 @@
-/*
- * This file is part of the Palacios Virtual Machine Monitor developed
- * by the V3VEE Project with funding from the United States National 
- * Science Foundation and the Department of Energy.  
- *
- * The V3VEE Project is a joint project between Northwestern University
- * and the University of New Mexico.  You can find out more at 
- * http://www.v3vee.org
- *
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
- * All rights reserved.
- *
- * Author: Automatically Generated File
- *
- * This is free software.  You are permitted to use,
- * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
- */
-
-#ifndef __VMCS_GEN__
-#define __VMCS_GEN__
-
-#ifdef __V3VEE__
-
-
-#include <palacios/vmcs.h>
-#include <palacios/vmm.h>
-
-void    Set_VMCS_GUEST_ES_SELECTOR(uint_t val);
-uint_t  Get_VMCS_GUEST_ES_SELECTOR();
-
-void    PrintTrace_VMCS_GUEST_ES_SELECTOR();
-
-
-void    Set_VMCS_GUEST_CS_SELECTOR(uint_t val);
-uint_t  Get_VMCS_GUEST_CS_SELECTOR();
-
-void    PrintTrace_VMCS_GUEST_CS_SELECTOR();
-
-
-void    Set_VMCS_GUEST_SS_SELECTOR(uint_t val);
-uint_t  Get_VMCS_GUEST_SS_SELECTOR();
-
-void    PrintTrace_VMCS_GUEST_SS_SELECTOR();
-
-
-void    Set_VMCS_GUEST_DS_SELECTOR(uint_t val);
-uint_t  Get_VMCS_GUEST_DS_SELECTOR();
-
-void    PrintTrace_VMCS_GUEST_DS_SELECTOR();
-
-
-void    Set_VMCS_GUEST_FS_SELECTOR(uint_t val);
-uint_t  Get_VMCS_GUEST_FS_SELECTOR();
-
-void    PrintTrace_VMCS_GUEST_FS_SELECTOR();
-
-
-void    Set_VMCS_GUEST_GS_SELECTOR(uint_t val);
-uint_t  Get_VMCS_GUEST_GS_SELECTOR();
-
-void    PrintTrace_VMCS_GUEST_GS_SELECTOR();
-
-
-void    Set_VMCS_GUEST_LDTR_SELECTOR(uint_t val);
-uint_t  Get_VMCS_GUEST_LDTR_SELECTOR();
-
-void    PrintTrace_VMCS_GUEST_LDTR_SELECTOR();
-
-
-void    Set_VMCS_GUEST_TR_SELECTOR(uint_t val);
-uint_t  Get_VMCS_GUEST_TR_SELECTOR();
-
-void    PrintTrace_VMCS_GUEST_TR_SELECTOR();
-
-
-void    Set_VMCS_HOST_ES_SELECTOR(uint_t val);
-uint_t  Get_VMCS_HOST_ES_SELECTOR();
-
-void    PrintTrace_VMCS_HOST_ES_SELECTOR();
-
-
-void    Set_VMCS_HOST_CS_SELECTOR(uint_t val);
-uint_t  Get_VMCS_HOST_CS_SELECTOR();
-
-void    PrintTrace_VMCS_HOST_CS_SELECTOR();
-
-
-void    Set_VMCS_HOST_SS_SELECTOR(uint_t val);
-uint_t  Get_VMCS_HOST_SS_SELECTOR();
-
-void    PrintTrace_VMCS_HOST_SS_SELECTOR();
-
-
-void    Set_VMCS_HOST_DS_SELECTOR(uint_t val);
-uint_t  Get_VMCS_HOST_DS_SELECTOR();
-
-void    PrintTrace_VMCS_HOST_DS_SELECTOR();
-
-
-void    Set_VMCS_HOST_FS_SELECTOR(uint_t val);
-uint_t  Get_VMCS_HOST_FS_SELECTOR();
-
-void    PrintTrace_VMCS_HOST_FS_SELECTOR();
-
-
-void    Set_VMCS_HOST_GS_SELECTOR(uint_t val);
-uint_t  Get_VMCS_HOST_GS_SELECTOR();
-
-void    PrintTrace_VMCS_HOST_GS_SELECTOR();
-
-
-void    Set_VMCS_HOST_TR_SELECTOR(uint_t val);
-uint_t  Get_VMCS_HOST_TR_SELECTOR();
-
-void    PrintTrace_VMCS_HOST_TR_SELECTOR();
-
-
-void    Set_IO_BITMAP_A_ADDR(uint_t val);
-uint_t  Get_IO_BITMAP_A_ADDR();
-
-void    PrintTrace_IO_BITMAP_A_ADDR();
-
-
-void    Set_IO_BITMAP_A_ADDR_HIGH(uint_t val);
-uint_t  Get_IO_BITMAP_A_ADDR_HIGH();
-
-void    PrintTrace_IO_BITMAP_A_ADDR_HIGH();
-
-
-void    Set_IO_BITMAP_B_ADDR(uint_t val);
-uint_t  Get_IO_BITMAP_B_ADDR();
-
-void    PrintTrace_IO_BITMAP_B_ADDR();
-
-
-void    Set_IO_BITMAP_B_ADDR_HIGH(uint_t val);
-uint_t  Get_IO_BITMAP_B_ADDR_HIGH();
-
-void    PrintTrace_IO_BITMAP_B_ADDR_HIGH();
-
-
-void    Set_MSR_BITMAPS(uint_t val);
-uint_t  Get_MSR_BITMAPS();
-
-void    PrintTrace_MSR_BITMAPS();
-
-
-void    Set_MSR_BITMAPS_HIGH(uint_t val);
-uint_t  Get_MSR_BITMAPS_HIGH();
-
-void    PrintTrace_MSR_BITMAPS_HIGH();
-
-
-void    Set_VM_EXIT_MSR_STORE_ADDR(uint_t val);
-uint_t  Get_VM_EXIT_MSR_STORE_ADDR();
-
-void    PrintTrace_VM_EXIT_MSR_STORE_ADDR();
-
-
-void    Set_VM_EXIT_MSR_STORE_ADDR_HIGH(uint_t val);
-uint_t  Get_VM_EXIT_MSR_STORE_ADDR_HIGH();
-
-void    PrintTrace_VM_EXIT_MSR_STORE_ADDR_HIGH();
-
-
-void    Set_VM_EXIT_MSR_LOAD_ADDR(uint_t val);
-uint_t  Get_VM_EXIT_MSR_LOAD_ADDR();
-
-void    PrintTrace_VM_EXIT_MSR_LOAD_ADDR();
-
-
-void    Set_VM_EXIT_MSR_LOAD_ADDR_HIGH(uint_t val);
-uint_t  Get_VM_EXIT_MSR_LOAD_ADDR_HIGH();
-
-void    PrintTrace_VM_EXIT_MSR_LOAD_ADDR_HIGH();
-
-
-void    Set_VM_ENTRY_MSR_LOAD_ADDR(uint_t val);
-uint_t  Get_VM_ENTRY_MSR_LOAD_ADDR();
-
-void    PrintTrace_VM_ENTRY_MSR_LOAD_ADDR();
-
-
-void    Set_VM_ENTRY_MSR_LOAD_ADDR_HIGH(uint_t val);
-uint_t  Get_VM_ENTRY_MSR_LOAD_ADDR_HIGH();
-
-void    PrintTrace_VM_ENTRY_MSR_LOAD_ADDR_HIGH();
-
-
-void    Set_VMCS_EXEC_PTR(uint_t val);
-uint_t  Get_VMCS_EXEC_PTR();
-
-void    PrintTrace_VMCS_EXEC_PTR();
-
-
-void    Set_VMCS_EXEC_PTR_HIGH(uint_t val);
-uint_t  Get_VMCS_EXEC_PTR_HIGH();
-
-void    PrintTrace_VMCS_EXEC_PTR_HIGH();
-
-
-void    Set_TSC_OFFSET(uint_t val);
-uint_t  Get_TSC_OFFSET();
-
-void    PrintTrace_TSC_OFFSET();
-
-
-void    Set_TSC_OFFSET_HIGH(uint_t val);
-uint_t  Get_TSC_OFFSET_HIGH();
-
-void    PrintTrace_TSC_OFFSET_HIGH();
-
-
-void    Set_VIRT_APIC_PAGE_ADDR(uint_t val);
-uint_t  Get_VIRT_APIC_PAGE_ADDR();
-
-void    PrintTrace_VIRT_APIC_PAGE_ADDR();
-
-
-void    Set_VIRT_APIC_PAGE_ADDR_HIGH(uint_t val);
-uint_t  Get_VIRT_APIC_PAGE_ADDR_HIGH();
-
-void    PrintTrace_VIRT_APIC_PAGE_ADDR_HIGH();
-
-
-void    Set_VMCS_LINK_PTR(uint_t val);
-uint_t  Get_VMCS_LINK_PTR();
-
-void    PrintTrace_VMCS_LINK_PTR();
-
-
-void    Set_VMCS_LINK_PTR_HIGH(uint_t val);
-uint_t  Get_VMCS_LINK_PTR_HIGH();
-
-void    PrintTrace_VMCS_LINK_PTR_HIGH();
-
-
-void    Set_GUEST_IA32_DEBUGCTL(uint_t val);
-uint_t  Get_GUEST_IA32_DEBUGCTL();
-
-void    PrintTrace_GUEST_IA32_DEBUGCTL();
-
-
-void    Set_GUEST_IA32_DEBUGCTL_HIGH(uint_t val);
-uint_t  Get_GUEST_IA32_DEBUGCTL_HIGH();
-
-void    PrintTrace_GUEST_IA32_DEBUGCTL_HIGH();
-
-
-void    Set_PIN_VM_EXEC_CTRLS(uint_t val);
-uint_t  Get_PIN_VM_EXEC_CTRLS();
-
-void    PrintTrace_PIN_VM_EXEC_CTRLS();
-
-
-void    Set_PROC_VM_EXEC_CTRLS(uint_t val);
-uint_t  Get_PROC_VM_EXEC_CTRLS();
-
-void    PrintTrace_PROC_VM_EXEC_CTRLS();
-
-
-void    Set_EXCEPTION_BITMAP(uint_t val);
-uint_t  Get_EXCEPTION_BITMAP();
-
-void    PrintTrace_EXCEPTION_BITMAP();
-
-
-void    Set_PAGE_FAULT_ERROR_MASK(uint_t val);
-uint_t  Get_PAGE_FAULT_ERROR_MASK();
-
-void    PrintTrace_PAGE_FAULT_ERROR_MASK();
-
-
-void    Set_PAGE_FAULT_ERROR_MATCH(uint_t val);
-uint_t  Get_PAGE_FAULT_ERROR_MATCH();
-
-void    PrintTrace_PAGE_FAULT_ERROR_MATCH();
-
-
-void    Set_CR3_TARGET_COUNT(uint_t val);
-uint_t  Get_CR3_TARGET_COUNT();
-
-void    PrintTrace_CR3_TARGET_COUNT();
-
-
-void    Set_VM_EXIT_CTRLS(uint_t val);
-uint_t  Get_VM_EXIT_CTRLS();
-
-void    PrintTrace_VM_EXIT_CTRLS();
-
-
-void    Set_VM_EXIT_MSR_STORE_COUNT(uint_t val);
-uint_t  Get_VM_EXIT_MSR_STORE_COUNT();
-
-void    PrintTrace_VM_EXIT_MSR_STORE_COUNT();
-
-
-void    Set_VM_EXIT_MSR_LOAD_COUNT(uint_t val);
-uint_t  Get_VM_EXIT_MSR_LOAD_COUNT();
-
-void    PrintTrace_VM_EXIT_MSR_LOAD_COUNT();
-
-
-void    Set_VM_ENTRY_CTRLS(uint_t val);
-uint_t  Get_VM_ENTRY_CTRLS();
-
-void    PrintTrace_VM_ENTRY_CTRLS();
-
-
-void    Set_VM_ENTRY_MSR_LOAD_COUNT(uint_t val);
-uint_t  Get_VM_ENTRY_MSR_LOAD_COUNT();
-
-void    PrintTrace_VM_ENTRY_MSR_LOAD_COUNT();
-
-
-void    Set_VM_ENTRY_INT_INFO_FIELD(uint_t val);
-uint_t  Get_VM_ENTRY_INT_INFO_FIELD();
-
-void    PrintTrace_VM_ENTRY_INT_INFO_FIELD();
-
-
-void    Set_VM_ENTRY_EXCEPTION_ERROR(uint_t val);
-uint_t  Get_VM_ENTRY_EXCEPTION_ERROR();
-
-void    PrintTrace_VM_ENTRY_EXCEPTION_ERROR();
-
-
-void    Set_VM_ENTRY_INSTR_LENGTH(uint_t val);
-uint_t  Get_VM_ENTRY_INSTR_LENGTH();
-
-void    PrintTrace_VM_ENTRY_INSTR_LENGTH();
-
-
-void    Set_TPR_THRESHOLD(uint_t val);
-uint_t  Get_TPR_THRESHOLD();
-
-void    PrintTrace_TPR_THRESHOLD();
-
-
-void    Set_VM_INSTR_ERROR(uint_t val);
-uint_t  Get_VM_INSTR_ERROR();
-
-void    PrintTrace_VM_INSTR_ERROR();
-
-
-void    Set_EXIT_REASON(uint_t val);
-uint_t  Get_EXIT_REASON();
-
-void    PrintTrace_EXIT_REASON();
-
-
-void    Set_VM_EXIT_INT_INFO(uint_t val);
-uint_t  Get_VM_EXIT_INT_INFO();
-
-void    PrintTrace_VM_EXIT_INT_INFO();
-
-
-void    Set_VM_EXIT_INT_ERROR(uint_t val);
-uint_t  Get_VM_EXIT_INT_ERROR();
-
-void    PrintTrace_VM_EXIT_INT_ERROR();
-
-
-void    Set_IDT_VECTOR_INFO(uint_t val);
-uint_t  Get_IDT_VECTOR_INFO();
-
-void    PrintTrace_IDT_VECTOR_INFO();
-
-
-void    Set_IDT_VECTOR_ERROR(uint_t val);
-uint_t  Get_IDT_VECTOR_ERROR();
-
-void    PrintTrace_IDT_VECTOR_ERROR();
-
-
-void    Set_VM_EXIT_INSTR_LENGTH(uint_t val);
-uint_t  Get_VM_EXIT_INSTR_LENGTH();
-
-void    PrintTrace_VM_EXIT_INSTR_LENGTH();
-
-
-void    Set_VMX_INSTR_INFO(uint_t val);
-uint_t  Get_VMX_INSTR_INFO();
-
-void    PrintTrace_VMX_INSTR_INFO();
-
-
-void    Set_GUEST_ES_LIMIT(uint_t val);
-uint_t  Get_GUEST_ES_LIMIT();
-
-void    PrintTrace_GUEST_ES_LIMIT();
-
-
-void    Set_GUEST_CS_LIMIT(uint_t val);
-uint_t  Get_GUEST_CS_LIMIT();
-
-void    PrintTrace_GUEST_CS_LIMIT();
-
-
-void    Set_GUEST_SS_LIMIT(uint_t val);
-uint_t  Get_GUEST_SS_LIMIT();
-
-void    PrintTrace_GUEST_SS_LIMIT();
-
-
-void    Set_GUEST_DS_LIMIT(uint_t val);
-uint_t  Get_GUEST_DS_LIMIT();
-
-void    PrintTrace_GUEST_DS_LIMIT();
-
-
-void    Set_GUEST_FS_LIMIT(uint_t val);
-uint_t  Get_GUEST_FS_LIMIT();
-
-void    PrintTrace_GUEST_FS_LIMIT();
-
-
-void    Set_GUEST_GS_LIMIT(uint_t val);
-uint_t  Get_GUEST_GS_LIMIT();
-
-void    PrintTrace_GUEST_GS_LIMIT();
-
-
-void    Set_GUEST_LDTR_LIMIT(uint_t val);
-uint_t  Get_GUEST_LDTR_LIMIT();
-
-void    PrintTrace_GUEST_LDTR_LIMIT();
-
-
-void    Set_GUEST_TR_LIMIT(uint_t val);
-uint_t  Get_GUEST_TR_LIMIT();
-
-void    PrintTrace_GUEST_TR_LIMIT();
-
-
-void    Set_GUEST_GDTR_LIMIT(uint_t val);
-uint_t  Get_GUEST_GDTR_LIMIT();
-
-void    PrintTrace_GUEST_GDTR_LIMIT();
-
-
-void    Set_GUEST_IDTR_LIMIT(uint_t val);
-uint_t  Get_GUEST_IDTR_LIMIT();
-
-void    PrintTrace_GUEST_IDTR_LIMIT();
-
-
-void    Set_GUEST_ES_ACCESS(uint_t val);
-uint_t  Get_GUEST_ES_ACCESS();
-
-void    PrintTrace_GUEST_ES_ACCESS();
-
-
-void    Set_GUEST_CS_ACCESS(uint_t val);
-uint_t  Get_GUEST_CS_ACCESS();
-
-void    PrintTrace_GUEST_CS_ACCESS();
-
-
-void    Set_GUEST_SS_ACCESS(uint_t val);
-uint_t  Get_GUEST_SS_ACCESS();
-
-void    PrintTrace_GUEST_SS_ACCESS();
-
-
-void    Set_GUEST_DS_ACCESS(uint_t val);
-uint_t  Get_GUEST_DS_ACCESS();
-
-void    PrintTrace_GUEST_DS_ACCESS();
-
-
-void    Set_GUEST_FS_ACCESS(uint_t val);
-uint_t  Get_GUEST_FS_ACCESS();
-
-void    PrintTrace_GUEST_FS_ACCESS();
-
-
-void    Set_GUEST_GS_ACCESS(uint_t val);
-uint_t  Get_GUEST_GS_ACCESS();
-
-void    PrintTrace_GUEST_GS_ACCESS();
-
-
-void    Set_GUEST_LDTR_ACCESS(uint_t val);
-uint_t  Get_GUEST_LDTR_ACCESS();
-
-void    PrintTrace_GUEST_LDTR_ACCESS();
-
-
-void    Set_GUEST_TR_ACCESS(uint_t val);
-uint_t  Get_GUEST_TR_ACCESS();
-
-void    PrintTrace_GUEST_TR_ACCESS();
-
-
-void    Set_GUEST_INT_STATE(uint_t val);
-uint_t  Get_GUEST_INT_STATE();
-
-void    PrintTrace_GUEST_INT_STATE();
-
-
-void    Set_GUEST_ACTIVITY_STATE(uint_t val);
-uint_t  Get_GUEST_ACTIVITY_STATE();
-
-void    PrintTrace_GUEST_ACTIVITY_STATE();
-
-
-void    Set_GUEST_SMBASE(uint_t val);
-uint_t  Get_GUEST_SMBASE();
-
-void    PrintTrace_GUEST_SMBASE();
-
-
-void    Set_GUEST_IA32_SYSENTER_CS(uint_t val);
-uint_t  Get_GUEST_IA32_SYSENTER_CS();
-
-void    PrintTrace_GUEST_IA32_SYSENTER_CS();
-
-
-void    Set_HOST_IA32_SYSENTER_CS(uint_t val);
-uint_t  Get_HOST_IA32_SYSENTER_CS();
-
-void    PrintTrace_HOST_IA32_SYSENTER_CS();
-
-
-void    Set_CR0_GUEST_HOST_MASK(uint_t val);
-uint_t  Get_CR0_GUEST_HOST_MASK();
-
-void    PrintTrace_CR0_GUEST_HOST_MASK();
-
-
-void    Set_CR4_GUEST_HOST_MASK(uint_t val);
-uint_t  Get_CR4_GUEST_HOST_MASK();
-
-void    PrintTrace_CR4_GUEST_HOST_MASK();
-
-
-void    Set_CR0_READ_SHADOW(uint_t val);
-uint_t  Get_CR0_READ_SHADOW();
-
-void    PrintTrace_CR0_READ_SHADOW();
-
-
-void    Set_CR4_READ_SHADOW(uint_t val);
-uint_t  Get_CR4_READ_SHADOW();
-
-void    PrintTrace_CR4_READ_SHADOW();
-
-
-void    Set_CR3_TARGET_VALUE_0(uint_t val);
-uint_t  Get_CR3_TARGET_VALUE_0();
-
-void    PrintTrace_CR3_TARGET_VALUE_0();
-
-
-void    Set_CR3_TARGET_VALUE_1(uint_t val);
-uint_t  Get_CR3_TARGET_VALUE_1();
-
-void    PrintTrace_CR3_TARGET_VALUE_1();
-
-
-void    Set_CR3_TARGET_VALUE_2(uint_t val);
-uint_t  Get_CR3_TARGET_VALUE_2();
-
-void    PrintTrace_CR3_TARGET_VALUE_2();
-
-
-void    Set_CR3_TARGET_VALUE_3(uint_t val);
-uint_t  Get_CR3_TARGET_VALUE_3();
-
-void    PrintTrace_CR3_TARGET_VALUE_3();
-
-
-void    Set_EXIT_QUALIFICATION(uint_t val);
-uint_t  Get_EXIT_QUALIFICATION();
-
-void    PrintTrace_EXIT_QUALIFICATION();
-
-
-void    Set_IO_RCX(uint_t val);
-uint_t  Get_IO_RCX();
-
-void    PrintTrace_IO_RCX();
-
-
-void    Set_IO_RSI(uint_t val);
-uint_t  Get_IO_RSI();
-
-void    PrintTrace_IO_RSI();
-
-
-void    Set_IO_RDI(uint_t val);
-uint_t  Get_IO_RDI();
-
-void    PrintTrace_IO_RDI();
-
-
-void    Set_IO_RIP(uint_t val);
-uint_t  Get_IO_RIP();
-
-void    PrintTrace_IO_RIP();
-
-
-void    Set_GUEST_LINEAR_ADDR(uint_t val);
-uint_t  Get_GUEST_LINEAR_ADDR();
-
-void    PrintTrace_GUEST_LINEAR_ADDR();
-
-
-void    Set_GUEST_CR0(uint_t val);
-uint_t  Get_GUEST_CR0();
-
-void    PrintTrace_GUEST_CR0();
-
-
-void    Set_GUEST_CR3(uint_t val);
-uint_t  Get_GUEST_CR3();
-
-void    PrintTrace_GUEST_CR3();
-
-
-void    Set_GUEST_CR4(uint_t val);
-uint_t  Get_GUEST_CR4();
-
-void    PrintTrace_GUEST_CR4();
-
-
-void    Set_GUEST_ES_BASE(uint_t val);
-uint_t  Get_GUEST_ES_BASE();
-
-void    PrintTrace_GUEST_ES_BASE();
-
-
-void    Set_GUEST_CS_BASE(uint_t val);
-uint_t  Get_GUEST_CS_BASE();
-
-void    PrintTrace_GUEST_CS_BASE();
-
-
-void    Set_GUEST_SS_BASE(uint_t val);
-uint_t  Get_GUEST_SS_BASE();
-
-void    PrintTrace_GUEST_SS_BASE();
-
-
-void    Set_GUEST_DS_BASE(uint_t val);
-uint_t  Get_GUEST_DS_BASE();
-
-void    PrintTrace_GUEST_DS_BASE();
-
-
-void    Set_GUEST_FS_BASE(uint_t val);
-uint_t  Get_GUEST_FS_BASE();
-
-void    PrintTrace_GUEST_FS_BASE();
-
-
-void    Set_GUEST_GS_BASE(uint_t val);
-uint_t  Get_GUEST_GS_BASE();
-
-void    PrintTrace_GUEST_GS_BASE();
-
-
-void    Set_GUEST_LDTR_BASE(uint_t val);
-uint_t  Get_GUEST_LDTR_BASE();
-
-void    PrintTrace_GUEST_LDTR_BASE();
-
-
-void    Set_GUEST_TR_BASE(uint_t val);
-uint_t  Get_GUEST_TR_BASE();
-
-void    PrintTrace_GUEST_TR_BASE();
-
-
-void    Set_GUEST_GDTR_BASE(uint_t val);
-uint_t  Get_GUEST_GDTR_BASE();
-
-void    PrintTrace_GUEST_GDTR_BASE();
-
-
-void    Set_GUEST_IDTR_BASE(uint_t val);
-uint_t  Get_GUEST_IDTR_BASE();
-
-void    PrintTrace_GUEST_IDTR_BASE();
-
-
-void    Set_GUEST_DR7(uint_t val);
-uint_t  Get_GUEST_DR7();
-
-void    PrintTrace_GUEST_DR7();
-
-
-void    Set_GUEST_RSP(uint_t val);
-uint_t  Get_GUEST_RSP();
-
-void    PrintTrace_GUEST_RSP();
-
-
-void    Set_GUEST_RIP(uint_t val);
-uint_t  Get_GUEST_RIP();
-
-void    PrintTrace_GUEST_RIP();
-
-
-void    Set_GUEST_RFLAGS(uint_t val);
-uint_t  Get_GUEST_RFLAGS();
-
-void    PrintTrace_GUEST_RFLAGS();
-
-
-void    Set_GUEST_PENDING_DEBUG_EXCS(uint_t val);
-uint_t  Get_GUEST_PENDING_DEBUG_EXCS();
-
-void    PrintTrace_GUEST_PENDING_DEBUG_EXCS();
-
-
-void    Set_GUEST_IA32_SYSENTER_ESP(uint_t val);
-uint_t  Get_GUEST_IA32_SYSENTER_ESP();
-
-void    PrintTrace_GUEST_IA32_SYSENTER_ESP();
-
-
-void    Set_GUEST_IA32_SYSENTER_EIP(uint_t val);
-uint_t  Get_GUEST_IA32_SYSENTER_EIP();
-
-void    PrintTrace_GUEST_IA32_SYSENTER_EIP();
-
-
-void    Set_HOST_CR0(uint_t val);
-uint_t  Get_HOST_CR0();
-
-void    PrintTrace_HOST_CR0();
-
-
-void    Set_HOST_CR3(uint_t val);
-uint_t  Get_HOST_CR3();
-
-void    PrintTrace_HOST_CR3();
-
-
-void    Set_HOST_CR4(uint_t val);
-uint_t  Get_HOST_CR4();
-
-void    PrintTrace_HOST_CR4();
-
-
-void    Set_HOST_FS_BASE(uint_t val);
-uint_t  Get_HOST_FS_BASE();
-
-void    PrintTrace_HOST_FS_BASE();
-
-
-void    Set_HOST_GS_BASE(uint_t val);
-uint_t  Get_HOST_GS_BASE();
-
-void    PrintTrace_HOST_GS_BASE();
-
-
-void    Set_HOST_TR_BASE(uint_t val);
-uint_t  Get_HOST_TR_BASE();
-
-void    PrintTrace_HOST_TR_BASE();
-
-
-void    Set_HOST_GDTR_BASE(uint_t val);
-uint_t  Get_HOST_GDTR_BASE();
-
-void    PrintTrace_HOST_GDTR_BASE();
-
-
-void    Set_HOST_IDTR_BASE(uint_t val);
-uint_t  Get_HOST_IDTR_BASE();
-
-void    PrintTrace_HOST_IDTR_BASE();
-
-
-void    Set_HOST_IA32_SYSENTER_ESP(uint_t val);
-uint_t  Get_HOST_IA32_SYSENTER_ESP();
-
-void    PrintTrace_HOST_IA32_SYSENTER_ESP();
-
-
-void    Set_HOST_IA32_SYSENTER_EIP(uint_t val);
-uint_t  Get_HOST_IA32_SYSENTER_EIP();
-
-void    PrintTrace_HOST_IA32_SYSENTER_EIP();
-
-
-void    Set_HOST_RSP(uint_t val);
-uint_t  Get_HOST_RSP();
-
-void    PrintTrace_HOST_RSP();
-
-
-void    Set_HOST_RIP(uint_t val);
-uint_t  Get_HOST_RIP();
-
-void    PrintTrace_HOST_RIP();
-
-void PrintTrace_VMCS_ALL();
-
-
-#endif // !__V3VEE
-
-#endif
-
-
-
index 7a466a9..a5b47dc 100644 (file)
@@ -160,6 +160,9 @@ struct guest_info;
     } while (0)
 
 
+
+
+
 #define V3_Hook_Interrupt(vm, irq) ({                                  \
            int ret = 0;                                                \
            extern struct v3_os_hooks * os_hooks;                       \
@@ -188,6 +191,17 @@ struct guest_info;
     } while (0)
 
 
+
+#define V3_CREATE_THREAD_ON_CPU(cpu, fn, arg, name) ({                 \
+           void * thread = NULL;                                       \
+           extern struct v3_os_hooks * os_hooks;                       \
+           if ((os_hooks) && (os_hooks)->start_thread_on_cpu) {        \
+               thread = (os_hooks)->start_thread_on_cpu(cpu, fn, arg, name); \
+           }                                                           \
+           thread;                                                     \
+       })
+
+
 #define V3_ACK_IRQ(irq)                                                \
     do {                                                       \
        extern struct v3_os_hooks * os_hooks;                   \
@@ -211,7 +225,7 @@ struct guest_info;
            while(1);                                                   \
        }                                                               \
     } while(0)                                                         \
-
+       
 
 
 
@@ -300,7 +314,7 @@ struct v3_interrupt {
 void Init_V3(struct v3_os_hooks * hooks,  int num_cpus);
 
 
-struct v3_vm_info * v3_create_vm(void * cfg);
+struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data);
 int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask);
 
 
index 2b38c0d..52a7d5f 100644 (file)
 
 #ifdef __V3VEE__
 
-#define V3_TtyOpen(path, mode)                                         \
+#define V3_TtyOpen(vm, path, mode)                                     \
     ({                                                                 \
-       extern struct v3_console_hooks *console_hooks;                          \
-       ((console_hooks) && (console_hooks)->tty_open) ?                                \
-           (console_hooks)->tty_open((path), (mode)) : NULL;           \
+       extern struct v3_console_hooks * console_hooks;                 \
+       ((console_hooks) && (console_hooks)->tty_open) ?                \
+           (console_hooks)->tty_open((path), (mode), (vm)->host_priv_data) : NULL; \
     })
 
 #define V3_TtyCursorSet(tty, x, y)                                     \
     ({                                                                 \
-       extern struct v3_console_hooks *console_hooks;                          \
-       ((console_hooks) && (console_hooks)->tty_cursor_set) ?                  \
-           (console_hooks)->tty_cursor_set((tty), (x), (y)) : -1;              \
+       extern struct v3_console_hooks * console_hooks;                 \
+       ((console_hooks) && (console_hooks)->tty_cursor_set) ?          \
+           (console_hooks)->tty_cursor_set((tty), (x), (y)) : -1;      \
     })
 
 #define V3_TtyCharacterSet(tty, x, y, c, style)                                \
     ({                                                                 \
-       extern struct v3_console_hooks *console_hooks;                          \
-       ((console_hooks) && (console_hooks)->tty_character_set) ?                       \
+       extern struct v3_console_hooks * console_hooks;                 \
+       ((console_hooks) && (console_hooks)->tty_character_set) ?       \
            (console_hooks)->tty_character_set((tty), (x), (y), (c), (style)) : -1; \
     })
 
 #define V3_TtyScroll(tty, lines)                                       \
     ({                                                                 \
-       extern struct v3_console_hooks *console_hooks;                          \
-       ((console_hooks) && (console_hooks)->tty_scroll) ?                      \
+       extern struct v3_console_hooks * console_hooks;                 \
+       ((console_hooks) && (console_hooks)->tty_scroll) ?              \
            (console_hooks)->tty_scroll((tty), (lines)) : -1;           \
     })
 
 #define V3_TtyUpdate(tty)                                              \
     ({                                                                 \
-       extern struct v3_console_hooks *console_hooks;                          \
-       ((console_hooks) && (console_hooks)->tty_update) ?                      \
-           (console_hooks)->tty_update((tty)) : -1;                            \
+       extern struct v3_console_hooks * console_hooks;                 \
+       ((console_hooks) && (console_hooks)->tty_update) ?              \
+           (console_hooks)->tty_update((tty)) : -1;                    \
     })
 
 #endif
 
 struct v3_console_hooks {
     /* open console device, mode is a combination of TTY_OPEN_MODE_* flags */
-    void *(*tty_open)(const char *path, int mode);
+    void *(*tty_open)(const char * path, int mode, void * priv_data);
 
     /* set cursor position */
-    int (*tty_cursor_set)(void *tty, int x, int y);
+    int (*tty_cursor_set)(void * tty, int x, int y);
 
     /* output character c with specified style at (x, y) */
-    int (*tty_character_set)(void *tty, int x, int y, char c, unsigned char style);
+    int (*tty_character_set)(void * tty, int x, int y, char c, unsigned char style);
 
     /* scroll the console down the specified number of lines */
-    int (*tty_scroll)(void *tty, int lines);
+    int (*tty_scroll)(void * tty, int lines);
 
     /* force update of console display; all updates by above functions
      * may be defferred until the next tty_update call 
      */
-    int (*tty_update)(void *tty);
+    int (*tty_update)(void * tty);
 };
 
 
index 20718c3..b523475 100644 (file)
 
 #define V3_FileOpen(path, mode)                                                \
     ({                                                                 \
-       extern struct v3_file_hooks *file_hooks;                                \
-       ((file_hooks) && (file_hooks)->file_open) ?                             \
-           (file_hooks)->file_open((path), (mode)) : -1 ;              \
+       extern struct v3_file_hooks * file_hooks;                       \
+       ((file_hooks) && (file_hooks)->file_open) ?                     \
+           (file_hooks)->file_open((path), (mode)) : -1;               \
     })
 
 #define V3_FileClose(fd)                                               \
     ({                                                                 \
-       extern struct v3_file_hooks *file_hooks;                                \
-       ((file_hooks) && (file_hooks)->file_close) ?                            \
-           (file_hooks)->file_close((fd))  :  -1 ;     \
+       extern struct v3_file_hooks * file_hooks;                       \
+       ((file_hooks) && (file_hooks)->file_close) ?                    \
+           (file_hooks)->file_close((fd))  :  -1;                      \
     })
 
-#define V3_FileSize(fd)                                                \
+#define V3_FileSize(fd)                                                        \
     ({                                                                 \
-       extern struct v3_file_hooks *file_hooks;                                \
-       ((file_hooks) && (file_hooks)->file_size) ?                             \
-           (file_hooks)->file_size((fd))  : -1 ;       \
+       extern struct v3_file_hooks * file_hooks;                       \
+       ((file_hooks) && (file_hooks)->file_size) ?                     \
+           (file_hooks)->file_size((fd))  : -1;                        \
     })
 
-#define V3_FileRead(fd,start,buf,len)                                  \
+#define V3_FileRead(fd, start, buf, len)                               \
     ({                                                                 \
-       extern struct v3_file_hooks *file_hooks;                                \
-       ((file_hooks) && (file_hooks)->file_read) ?                             \
-           (file_hooks)->file_read((fd),(start),(buf),(len)) : -1 ;  \
+       extern struct v3_file_hooks * file_hooks;                       \
+       ((file_hooks) && (file_hooks)->file_read) ?                     \
+           (file_hooks)->file_read((fd), (buf), (len), (start)) : -1;  \
     })
 
 #define V3_FileWrite(fd,start,buf,len)                                 \
     ({                                                                 \
-       extern struct v3_file_hooks *file_hooks;                                \
-       ((file_hooks) && (file_hooks)->file_write) ?                            \
-           (file_hooks)->file_write((fd),(start),(buf),(len)) : -1 ;  \
+       extern struct v3_file_hooks * file_hooks;                       \
+       ((file_hooks) && (file_hooks)->file_write) ?                    \
+           (file_hooks)->file_write((fd), (buf), (len), (start)) : -1; \
     })
 
 
 
 struct v3_file_hooks {
 
-    int (*file_open)(const char *path, int mode);
+    int (*file_open)(const char * path, int mode);
     int (*file_close)(int fd);
 
     long long (*file_size)(int fd);
 
     // blocking reads and writes
-    long long  (*file_read)(int fd,  long long start, void *buffer, long long length);
-    long long  (*file_write)(int fd, long long start, void *buffer, long long length);
+    long long (*file_read)(int fd, void * buffer, long long length, long long offset);
+    long long (*file_write)(int fd, void * buffer, long long length, long long offset);
 
 };
 
index 32d6ae6..58b5dd3 100644 (file)
@@ -103,14 +103,13 @@ int v3_add_shadow_mem(struct v3_vm_info * vm, uint16_t core_id,
 
 
 struct v3_mem_region * v3_get_mem_region(struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr);
-struct v3_mem_region * v3_get_next_mem_region(struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr);
 
 
+uint32_t v3_get_max_page_size(struct guest_info * core, addr_t fault_addr, v3_cpu_mode_t mode);
+
 
 void v3_print_mem_map(struct v3_vm_info * vm);
 
-uint32_t v3_get_max_page_size(struct guest_info * core, addr_t fault_addr, uint32_t req_size);
-uint32_t v3_compute_page_alignment(addr_t addr);
 
 
 #endif // ! __V3VEE__
index 120838d..74c7584 100644 (file)
 
 #include <palacios/vmm_types.h>
 #include <palacios/vmm_list.h>
+#include <palacios/vmm_msr.h>
+#include <palacios/vmm_util.h>
 
 struct guest_info;
 
 struct vm_time {
-    uint32_t cpu_freq; // in kHZ
-
-    // Total number of guest run time cycles
-    uint64_t guest_tsc;
-
-    // Cache value to help calculate the guest_tsc
-    uint64_t cached_host_tsc;
-
-    // The number of cycles pending for notification to the timers
-    //ullong_t pending_cycles;
-
-    // Installed Timers 
+    uint32_t host_cpu_freq;    // in kHZ 
+    uint32_t guest_cpu_freq;   // can be lower than host CPU freq!
+         
+    sint64_t guest_host_offset;// Offset of monotonic guest time from host time
+    sint64_t tsc_guest_offset; // Offset of guest TSC from monotonic guest time
+    
+    uint64_t last_update;      // Last time (in monotonic guest time) the 
+                               // timers were updated
+
+    uint64_t initial_time;     // Time when VMM started. 
+    
+    struct v3_msr tsc_aux;     // Auxilliary MSR for RDTSCP
+
+    // Installed Timers slaved off of the guest monotonic TSC
     uint_t num_timers;
     struct list_head timers;
 };
 
-
-
-
 struct vm_timer_ops {
-    void (*update_time)(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * priv_data);
+    void (*update_timer)(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * priv_data);
     void (*advance_timer)(struct guest_info * info, void * private_data);
 };
 
@@ -59,18 +60,48 @@ struct vm_timer {
     struct list_head timer_link;
 };
 
+// Basic functions for handling passage of time in palacios
+void v3_init_time(struct guest_info * info);
+int v3_start_time(struct guest_info * info);
+int v3_adjust_time(struct guest_info * info);
 
-
-
+// Basic functions for attaching timers to the passage of time
 int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, void * private_data);
 int v3_remove_timer(struct guest_info * info, struct vm_timer * timer);
+void v3_update_timers(struct guest_info * info);
 
-void v3_advance_time(struct guest_info * info);
+// Functions to return the different notions of time in Palacios.
+static inline uint64_t v3_get_host_time(struct vm_time *t) {
+    uint64_t tmp;
+    rdtscll(tmp);
+    return tmp;
+}
+
+// Returns *monotonic* guest time.
+static inline uint64_t v3_get_guest_time(struct vm_time *t) {
+    return v3_get_host_time(t) + t->guest_host_offset;
+}
+
+// Returns the TSC value seen by the guest
+static inline uint64_t v3_get_guest_tsc(struct vm_time *t) {
+    return v3_get_guest_time(t) + t->tsc_guest_offset;
+}
+
+// Returns offset of guest TSC from host TSC
+static inline sint64_t v3_tsc_host_offset(struct vm_time *time_state) {
+    return time_state->guest_host_offset + time_state->tsc_guest_offset;
+}
+
+// Functions for handling exits on the TSC when fully virtualizing 
+// the timestamp counter.
+#define TSC_MSR     0x10
+#define TSC_AUX_MSR 0xC0000103
+
+int v3_handle_rdtscp(struct guest_info *info);
+int v3_handle_rdtsc(struct guest_info *info);
 
-void v3_update_time(struct guest_info * info, ullong_t cycles);
 
 
-void v3_init_time(struct guest_info * info);
 
 #endif // !__V3VEE__
 
index fc4fd5f..021d0e8 100644 (file)
 
 typedef enum {SHADOW_PAGING, NESTED_PAGING} v3_paging_mode_t;
 typedef enum {VM_RUNNING, VM_STOPPED, VM_SUSPENDED, VM_ERROR, VM_EMULATING} v3_vm_operating_mode_t;
+typedef enum {CORE_RUNNING, CORE_STOPPED} v3_core_operating_mode_t;
 
 typedef enum {PAGING_4KB, PAGING_2MB} v3_paging_size_t;
 
-typedef enum {INIT, SIPI, REAL, /*UNREAL,*/ PROTECTED, PROTECTED_PAE, LONG, LONG_32_COMPAT, LONG_16_COMPAT} v3_cpu_mode_t;
+typedef enum {REAL, /*UNREAL,*/ PROTECTED, PROTECTED_PAE, LONG, LONG_32_COMPAT, LONG_16_COMPAT} v3_cpu_mode_t;
 typedef enum {PHYSICAL_MEM, VIRTUAL_MEM} v3_mem_mode_t;
 
 
index 51c3f04..ecb2fa0 100644 (file)
@@ -236,7 +236,7 @@ static int handle_crystal_tics(struct vm_device * dev, struct channel * ch, uint
 
 #include <palacios/vm_guest.h>
 
-static void pit_update_time(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * private_data) {
+static void pit_update_timer(struct guest_info * info, ullong_t cpu_cycles, ullong_t cpu_freq, void * private_data) {
     struct vm_device * dev = (struct vm_device *)private_data;
     struct pit * state = (struct pit *)dev->private_data;
     //  ullong_t tmp_ctr = state->pit_counter;
@@ -313,14 +313,6 @@ static void pit_update_time(struct guest_info * info, ullong_t cpu_cycles, ullon
     return;
 }
 
-
-static void pit_advance_time(struct guest_info * core, void * private_data) {
-
-    v3_raise_irq(core->vm_info, 0);
-}
-
-
-
 /* This should call out to handle_SQR_WAVE_write, etc...
  */
 static int handle_channel_write(struct channel * ch, char val) {
@@ -624,8 +616,7 @@ static int pit_write_command(struct guest_info * core, ushort_t port, void * src
 
 
 static struct vm_timer_ops timer_ops = {
-    .update_time = pit_update_time,
-    .advance_timer = pit_advance_time,
+    .update_timer = pit_update_timer,
 };
 
 
index 8d62f77..456d717 100644 (file)
@@ -3,7 +3,7 @@ menu "Virtual Devices"
 config APIC
        bool "APIC" 
        default y
-       depends on ICC_BUS && EXPERIMENTAL
+       depends on EXPERIMENTAL
        help 
          Includes the Virtual APIC device
 
@@ -19,7 +19,7 @@ config DEBUG_APIC
 
 config IO_APIC
        bool "IOAPIC"
-       depends on ICC_BUS && EXPERIMENTAL
+       depends on EXPERIMENTAL
        default y
        help 
          Includes the Virtual IO APIC
@@ -32,20 +32,6 @@ config DEBUG_IO_APIC
          Enable debugging for the IO APIC
 
 
-config ICC_BUS
-       bool "ICC BUS"
-       default y
-       depends on EXPERIMENTAL
-       help 
-         The ICC Bus for APIC/IOAPIC communication
-
-config DEBUG_ICC_BUS
-       bool "ICC BUS Debugging"
-       default n
-       depends on ICC_BUS && DEBUG_ON
-       help
-         Enable debugging for the ICC BUS
-
 
 config BOCHS_DEBUG
        bool "Bochs Debug Console Device"
index a90ac91..434caee 100644 (file)
@@ -1,6 +1,5 @@
 obj-$(CONFIG_APIC) += apic.o
 obj-$(CONFIG_IO_APIC) += io_apic.o
-obj-$(CONFIG_ICC_BUS) += icc_bus.o
 obj-$(CONFIG_PIT) += 8254.o
 obj-$(CONFIG_PIC) += 8259a.o
 obj-$(CONFIG_BOCHS_DEBUG) += bochs_debug.o
index 763b538..c051bc8 100644 (file)
 
 #include <devices/apic.h>
 #include <devices/apic_regs.h>
-#include <devices/icc_bus.h>
 #include <palacios/vmm.h>
 #include <palacios/vmm_msr.h>
 #include <palacios/vmm_sprintf.h>
 #include <palacios/vm_guest.h>
+#include <palacios/vmm_types.h>
 
 
 #ifndef CONFIG_DEBUG_APIC
 #define PrintDebug(fmt, args...)
 #endif
 
+#ifdef CONFIG_DEBUG_APIC
+static char *shorthand_str[] = { 
+    "(no shorthand)",
+    "(self)",
+    "(all)",
+    "(all-but-me)",
+};
+
+static char *deliverymode_str[] = { 
+    "(fixed)",
+    "(lowest priority)",
+    "(SMI)",
+    "(reserved)",
+    "(NMI)",
+    "(INIT)",
+    "(Start Up)",
+    "(ExtInt)",
+};
+#endif
 
 typedef enum { APIC_TMR_INT, APIC_THERM_INT, APIC_PERF_INT, 
               APIC_LINT0_INT, APIC_LINT1_INT, APIC_ERR_INT } apic_irq_type_t;
@@ -116,10 +135,6 @@ typedef enum { APIC_TMR_INT, APIC_THERM_INT, APIC_PERF_INT,
 #define EXT_INT_LOC_VEC_TBL_OFFSET2       0x520   // 0x500 - 0x530
 #define EXT_INT_LOC_VEC_TBL_OFFSET3       0x530   // 0x500 - 0x530
 
-
-
-
-
 struct apic_msr {
     union {
        uint64_t value;
@@ -128,13 +143,20 @@ struct apic_msr {
            uint8_t bootstrap_cpu : 1;
            uint8_t rsvd2         : 2;
            uint8_t apic_enable   : 1;
-           uint64_t base_addr   : 40;
-           uint32_t rsvd3         : 12;
+           uint64_t base_addr    : 40;
+           uint32_t rsvd3        : 12;
        } __attribute__((packed));
     } __attribute__((packed));
 } __attribute__((packed));
 
 
+
+typedef enum {INIT_ST, 
+             SIPI, 
+             STARTED} ipi_state_t; 
+
+struct apic_dev_state;
+
 struct apic_state {
     addr_t base_addr;
 
@@ -176,14 +198,16 @@ struct apic_state {
     uint32_t rem_rd_data;
 
 
+    ipi_state_t ipi_state;
+
     uint8_t int_req_reg[32];
     uint8_t int_svc_reg[32];
     uint8_t int_en_reg[32];
     uint8_t trig_mode_reg[32];
-  
-    uint32_t eoi;
 
-    struct vm_device * icc_bus;
+    struct guest_info * core;
+
+    uint32_t eoi;
 
     v3_lock_t  lock;
 };
@@ -191,11 +215,18 @@ struct apic_state {
 
 
 
+struct apic_dev_state {
+    int num_apics;
+
+    struct apic_state apics[0];
+} __attribute__((packed));
+
+
 
 static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, uint_t length, void * priv_data);
 static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, uint_t length, void * priv_data);
 
-static void init_apic_state(struct apic_state * apic, uint32_t id, struct vm_device * icc) {
+static void init_apic_state(struct apic_state * apic, uint32_t id) {
     apic->base_addr = DEFAULT_BASE_ADDR;
 
     if (id == 0) { 
@@ -226,7 +257,7 @@ static void init_apic_state(struct apic_state * apic, uint32_t id, struct vm_dev
 
     apic->lapic_id.val = id;
     
-    apic->icc_bus = icc;
+    apic->ipi_state = INIT_ST;
 
     // The P6 has 6 LVT entries, so we set the value to (6-1)...
     apic->apic_ver.val = 0x80050010;
@@ -258,9 +289,8 @@ static void init_apic_state(struct apic_state * apic, uint32_t id, struct vm_dev
 
 
 static int read_apic_msr(struct guest_info * core, uint_t msr, v3_msr_t * dst, void * priv_data) {
-    struct vm_device * dev = (struct vm_device *)priv_data;
-    struct apic_state * apics = (struct apic_state *)(dev->private_data);
-    struct apic_state * apic = &(apics[core->cpu_id]);
+    struct apic_dev_state * apic_dev = (struct apic_dev_state *)priv_data;
+    struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
 
     PrintDebug("apic %u: core %u: MSR read\n", apic->lapic_id.val, core->cpu_id);
     v3_lock(apic->lock);
@@ -271,13 +301,12 @@ static int read_apic_msr(struct guest_info * core, uint_t msr, v3_msr_t * dst, v
 
 
 static int write_apic_msr(struct guest_info * core, uint_t msr, v3_msr_t src, void * priv_data) {
-    struct vm_device * dev = (struct vm_device *)priv_data;
-    struct apic_state * apics = (struct apic_state *)(dev->private_data);
-    struct apic_state * apic = &(apics[core->cpu_id]);
-    struct v3_mem_region * old_reg = v3_get_mem_region(dev->vm, core->cpu_id, apic->base_addr);
+    struct apic_dev_state * apic_dev = (struct apic_dev_state *)priv_data;
+    struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
+    struct v3_mem_region * old_reg = v3_get_mem_region(core->vm_info, core->cpu_id, apic->base_addr);
 
 
-    PrintDebug("apic %u: core %u: MSR write\n",apic->lapic_id.val,core->cpu_id);
+    PrintDebug("apic %u: core %u: MSR write\n", apic->lapic_id.val, core->cpu_id);
 
     if (old_reg == NULL) {
        // uh oh...
@@ -288,11 +317,13 @@ static int write_apic_msr(struct guest_info * core, uint_t msr, v3_msr_t src, vo
     
     v3_lock(apic->lock);
 
-    v3_delete_mem_region(dev->vm, old_reg);
+    v3_delete_mem_region(core->vm_info, old_reg);
 
     apic->base_addr = src.value;
 
-    if (v3_hook_full_mem(dev->vm, core->cpu_id, apic->base_addr, apic->base_addr + PAGE_SIZE_4KB, apic_read, apic_write, dev) == -1) {
+    if (v3_hook_full_mem(core->vm_info, core->cpu_id, apic->base_addr, 
+                        apic->base_addr + PAGE_SIZE_4KB, 
+                        apic_read, apic_write, apic_dev) == -1) {
        PrintError("apic %u: core %u: Could not hook new APIC Base address\n",
                   apic->lapic_id.val, core->cpu_id);
        v3_unlock(apic->lock);
@@ -315,22 +346,23 @@ static int activate_apic_irq(struct apic_state * apic, uint32_t irq_num) {
 
 
     if (irq_num <= 15) {
-//     PrintError("apic %u: core ?: Attempting to raise an invalid interrupt: %d\n", apic->lapic_id.val,irq_num);
+       PrintError("apic %u: core %d: Attempting to raise an invalid interrupt: %d\n", 
+                  apic->lapic_id.val, apic->core->cpu_id, irq_num);
        return -1;
     }
 
 
-    PrintDebug("apic %u: core ?: Raising APIC IRQ %d\n", apic->lapic_id.val, irq_num);
+    PrintDebug("apic %u: core %d: Raising APIC IRQ %d\n", apic->lapic_id.val, apic->core->cpu_id, irq_num);
 
     if (*req_location & flag) {
-       //V3_Print("Interrupts coallescing\n");
+       PrintDebug("Interrupt %d  coallescing\n", irq_num);
     }
 
     if (*en_location & flag) {
        *req_location |= flag;
     } else {
-       PrintDebug("apic %u: core ?: Interrupt  not enabled... %.2x\n", 
-                  apic->lapic_id.val, *en_location);
+       PrintDebug("apic %u: core %d: Interrupt  not enabled... %.2x\n", 
+                  apic->lapic_id.val, apic->core->cpu_id,*en_location);
        return 0;
     }
 
@@ -474,8 +506,294 @@ static int activate_internal_irq(struct apic_state * apic, apic_irq_type_t int_t
 }
 
 
+
+static inline int should_deliver_cluster_ipi(struct guest_info * dst_core, 
+                                            struct apic_state * dst_apic, uint8_t mda) {
+
+    if         ( ((mda & 0xf0) == (dst_apic->log_dst.dst_log_id & 0xf0)) &&     // (I am in the cluster and
+         ((mda & 0x0f) & (dst_apic->log_dst.dst_log_id & 0x0f)) ) {  //  I am in the set)
+
+       PrintDebug("apic %u core %u: accepting clustered IRQ (mda 0x%x == log_dst 0x%x)\n",
+                  dst_apic->lapic_id.val, dst_core->cpu_id, mda, 
+                  dst_apic->log_dst.dst_log_id);
+       
+       return 1;
+    } else {
+       PrintDebug("apic %u core %u: rejecting clustered IRQ (mda 0x%x != log_dst 0x%x)\n",
+                  dst_apic->lapic_id.val, dst_core->cpu_id, mda, 
+                  dst_apic->log_dst.dst_log_id);
+       return 0;
+    }
+}
+
+static inline int should_deliver_flat_ipi(struct guest_info * dst_core,
+                                         struct apic_state * dst_apic, uint8_t mda) {
+
+    if (dst_apic->log_dst.dst_log_id & mda) {  // I am in the set 
+
+       PrintDebug("apic %u core %u: accepting flat IRQ (mda 0x%x == log_dst 0x%x)\n",
+                  dst_apic->lapic_id.val, dst_core->cpu_id, mda, 
+                  dst_apic->log_dst.dst_log_id);
+      return 1;
+  } else {
+       PrintDebug("apic %u core %u: rejecting flat IRQ (mda 0x%x != log_dst 0x%x)\n",
+                  dst_apic->lapic_id.val, dst_core->cpu_id, mda, 
+                  dst_apic->log_dst.dst_log_id);
+      return 0;
+  }
+}
+
+
+
+static int should_deliver_ipi(struct guest_info * dst_core, 
+                             struct apic_state * dst_apic, uint8_t mda) {
+
+
+    if (dst_apic->dst_fmt.model == 0xf) {
+
+       if (mda == 0xff) {
+           // always deliver broadcast
+           return 1;
+       }
+
+       return should_deliver_flat_ipi(dst_core, dst_apic, mda);
+    } else if (dst_apic->dst_fmt.model == 0x0) {
+
+       if (mda == 0xff) {
+           // always deliver broadcast
+           return 1;
+       }
+
+       return should_deliver_cluster_ipi(dst_core, dst_apic, mda);
+    } else {
+       PrintError("apic %u core %u: invalid destination format register value 0x%x for logical mode delivery.\n", 
+                  dst_apic->lapic_id.val, dst_core->cpu_id, dst_apic->dst_fmt.model);
+       return -1;
+    }
+}
+
+
+static int deliver_ipi(struct apic_state * src_apic, 
+                      struct apic_state * dst_apic, 
+                      uint32_t vector, uint8_t del_mode) {
+
+    struct guest_info * dst_core = dst_apic->core;
+
+    switch (del_mode) {
+
+       case 0:  //fixed
+       case 1: // lowest priority
+           PrintDebug("delivering IRQ %d to core %u\n", vector, dst_core->cpu_id); 
+
+           activate_apic_irq(dst_apic, vector);
+
+           if (dst_apic != src_apic) { 
+               // Assume core # is same as logical processor for now
+               // TODO FIX THIS FIX THIS
+               // THERE SHOULD BE:  guestapicid->virtualapicid map,
+               //                   cpu_id->logical processor map
+               //     host maitains logical proc->phsysical proc
+               PrintDebug(" non-local core, forcing it to exit\n"); 
+
+               v3_interrupt_cpu(dst_core->vm_info, dst_core->cpu_id, 0);
+           }
+
+           break;
+       case 5: { //INIT
+
+           PrintDebug(" INIT delivery to core %u\n", dst_core->cpu_id);
+
+           // TODO: any APIC reset on dest core (shouldn't be needed, but not sure...)
+
+           // Sanity check
+           if (dst_apic->ipi_state != INIT_ST) { 
+               PrintError(" Warning: core %u is not in INIT state (mode = %d), ignored\n",
+                          dst_core->cpu_id, dst_apic->ipi_state);
+               // Only a warning, since INIT INIT SIPI is common
+               break;
+           }
+
+           // We transition the target core to SIPI state
+           dst_apic->ipi_state = SIPI;  // note: locking should not be needed here
+
+           // That should be it since the target core should be
+           // waiting in host on this transition
+           // either it's on another core or on a different preemptive thread
+           // in both cases, it will quickly notice this transition 
+           // in particular, we should not need to force an exit here
+
+           PrintDebug(" INIT delivery done\n");
+
+           break;                                                      
+       }
+       case 6: { //SIPI
+
+           // Sanity check
+           if (dst_apic->ipi_state != SIPI) { 
+               PrintError(" core %u is not in SIPI state (mode = %d), ignored!\n",
+                          dst_core->cpu_id, dst_apic->ipi_state);
+               break;
+           }
+
+           // Write the RIP, CS, and descriptor
+           // assume the rest is already good to go
+           //
+           // vector VV -> rip at 0
+           //              CS = VV00
+           //  This means we start executing at linear address VV000
+           //
+           // So the selector needs to be VV00
+           // and the base needs to be VV000
+           //
+           dst_core->rip = 0;
+           dst_core->segments.cs.selector = vector << 8;
+           dst_core->segments.cs.limit = 0xffff;
+           dst_core->segments.cs.base = vector << 12;
+
+           PrintDebug(" SIPI delivery (0x%x -> 0x%x:0x0) to core %u\n",
+                      vector, dst_core->segments.cs.selector, dst_core->cpu_id);
+           // Maybe need to adjust the APIC?
+           
+           // We transition the target core to SIPI state
+           dst_core->core_run_state = CORE_RUNNING;  // note: locking should not be needed here
+           dst_apic->ipi_state = STARTED;
+
+           // As with INIT, we should not need to do anything else
+
+           PrintDebug(" SIPI delivery done\n");
+
+           break;                                                      
+       }
+       case 2: // SMI                  
+       case 3: // reserved                                             
+       case 4: // NMI                                  
+       case 7: // ExtInt
+       default:
+           PrintError("IPI %d delivery is unsupported\n", del_mode); 
+           return -1;
+    }
+
+    return 0;
+
+}
+
+
+static int route_ipi(struct apic_dev_state * apic_dev,
+                    struct apic_state * src_apic, 
+                    struct int_cmd_reg * icr) {
+    struct apic_state * dest_apic = NULL;
+
+    PrintDebug("route_ipi: src_apic=%p, icr_data=%p\n", 
+              src_apic, (void *)(addr_t)icr->val);
+
+
+    if ((icr->dst_mode == 0) && (icr->dst >= apic_dev->num_apics)) { 
+       PrintError("route_ipi: Attempted send to unregistered apic id=%u\n", 
+                  icr->dst);
+       return -1;
+    }
+
+    dest_apic =  &(apic_dev->apics[icr->dst]);
+
+
+    PrintDebug("route_ipi: IPI %s %u from apic %p to %s %s %u (icr=0x%llx)\n",
+              deliverymode_str[icr->del_mode], 
+              icr->vec, 
+              src_apic,               
+              (icr->dst_mode == 0) ? "(physical)" : "(logical)", 
+              shorthand_str[icr->dst_shorthand], 
+              icr->dst,
+              icr->val);
+
+
+    switch (icr->dst_shorthand) {
+
+       case 0:  // no shorthand
+           if (icr->dst_mode == 0) { 
+               // physical delivery
+
+               if (deliver_ipi(src_apic, dest_apic, 
+                               icr->vec, icr->del_mode) == -1) {
+                   PrintError("Error: Could not deliver IPI\n");
+                   return -1;
+               }
+
+           } else {
+               // logical delivery
+               int i;
+               uint8_t mda = icr->dst;
+
+               for (i = 0; i < apic_dev->num_apics; i++) { 
+                    dest_apic = &(apic_dev->apics[i]);
+                    int del_flag = should_deliver_ipi(dest_apic->core, dest_apic, mda);
+                    
+                    if (del_flag == -1) {
+                        PrintError("Error checking delivery mode\n");
+                        return -1;
+                    } else if (del_flag == 1) {
+                       if (deliver_ipi(src_apic, dest_apic, 
+                                       icr->vec, icr->del_mode) == -1) {
+                           PrintError("Error: Could not deliver IPI\n");
+                           return -1;
+                       }
+                   }
+               }
+           }
+           
+           break;
+           
+       case 1:  // self
+
+           if (src_apic == NULL) {
+               PrintError("Sending IPI to self from generic IPI sender\n");
+               break;
+           }
+
+           if (icr->dst_mode == 0) { 
+               if (deliver_ipi(src_apic, src_apic, icr->vec, icr->del_mode) == -1) {
+                   PrintError("Could not deliver IPI\n");
+                   return -1;
+               }
+           } else {
+               // logical delivery
+               PrintError("use of logical delivery in self is not yet supported.\n");
+               return -1;
+           }
+           break;
+           
+       case 2: 
+       case 3: { // all and all-but-me
+           // assuming that logical verus physical doesn't matter
+           // although it is odd that both are used
+           int i;
+
+           for (i = 0; i < apic_dev->num_apics; i++) { 
+               dest_apic = &(apic_dev->apics[i]);
+
+               if ((dest_apic != src_apic) || (icr->dst_shorthand == 2)) { 
+                   if (deliver_ipi(src_apic, dest_apic, icr->vec, icr->del_mode) == -1) {
+                       PrintError("Error: Could not deliver IPI\n");
+                       return -1;
+                   }
+               }
+           }   
+
+           break;
+       }
+       default:
+           PrintError("Error routing IPI, invalid Mode (%d)\n", icr->dst_shorthand);
+           return -1;
+    }
+    
+
+    return 0;
+}
+
+
+
 static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, uint_t length, void * priv_data) {
-    struct apic_state * apic = (struct apic_state *)(priv_data);
+    struct apic_dev_state * apic_dev = (struct apic_dev_state *)(priv_data);
+    struct apic_state * apic = &(apic_dev->apics[core->cpu_id]);
     addr_t reg_addr  = guest_addr - apic->base_addr;
     struct apic_msr * msr = (struct apic_msr *)&(apic->base_addr_msr.value);
     uint32_t val = 0;
@@ -738,7 +1056,8 @@ static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, ui
  *
  */
 static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, uint_t length, void * priv_data) {
-    struct apic_state * apic = (struct apic_state *)(priv_data);
+    struct apic_dev_state * apic_dev = (struct apic_dev_state *)(priv_data);
+    struct apic_state * apic = &(apic_dev->apics[core->cpu_id]); 
     addr_t reg_addr  = guest_addr - apic->base_addr;
     struct apic_msr * msr = (struct apic_msr *)&(apic->base_addr_msr.value);
     uint32_t op_val = *(uint32_t *)src;
@@ -810,6 +1129,8 @@ static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, u
            apic->task_prio.val = op_val;
            break;
        case LDR_OFFSET:
+           PrintDebug("apic %u: core %u: setting log_dst.val to 0x%x\n",
+                      apic->lapic_id.val, core->cpu_id, op_val);
            apic->log_dst.val = op_val;
            break;
        case DFR_OFFSET:
@@ -900,13 +1221,15 @@ static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, u
        case INT_CMD_LO_OFFSET:
            apic->int_cmd.lo = op_val;
 
-           // ICC???
            PrintDebug("apic %u: core %u: sending cmd 0x%llx to apic %u\n", 
                       apic->lapic_id.val, core->cpu_id,
                       apic->int_cmd.val, apic->int_cmd.dst);
-           if (v3_icc_send_ipi(apic->icc_bus, apic->lapic_id.val, apic->int_cmd.val,apic->dst_fmt.val,0)==-1) { 
+
+           if (route_ipi(apic_dev, apic, &(apic->int_cmd)) == -1) { 
+               PrintError("IPI Routing failure\n");
                return -1;
            }
+
            break;
 
        case INT_CMD_HI_OFFSET:
@@ -934,8 +1257,9 @@ static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, u
 /* Interrupt Controller Functions */
 
 // returns 1 if an interrupt is pending, 0 otherwise
-static int apic_intr_pending(struct guest_info * info, void * private_data) {
-    struct apic_state * apic = (struct apic_state *)private_data;
+static int apic_intr_pending(struct guest_info * core, void * private_data) {
+    struct apic_dev_state * apic_dev = (struct apic_dev_state *)(private_data);
+    struct apic_state * apic = &(apic_dev->apics[core->cpu_id]); 
     int req_irq = get_highest_irr(apic);
     int svc_irq = get_highest_isr(apic);
 
@@ -949,8 +1273,9 @@ static int apic_intr_pending(struct guest_info * info, void * private_data) {
     return 0;
 }
 
-static int apic_get_intr_number(struct guest_info * info, void * private_data) {
-    struct apic_state * apic = (struct apic_state *)private_data;
+static int apic_get_intr_number(struct guest_info * core, void * private_data) {
+    struct apic_dev_state * apic_dev = (struct apic_dev_state *)(private_data);
+    struct apic_state * apic = &(apic_dev->apics[core->cpu_id]); 
     int req_irq = get_highest_irr(apic);
     int svc_irq = get_highest_isr(apic);
 
@@ -964,16 +1289,48 @@ static int apic_get_intr_number(struct guest_info * info, void * private_data) {
 }
 
 
-static int apic_raise_intr(struct guest_info * info, int irq, void * private_data) {
-  struct apic_state * apic = (struct apic_state *)private_data;
+int v3_apic_send_ipi(struct v3_vm_info * vm, struct vm_device * dev, 
+                    struct v3_gen_ipi * ipi) {
+    struct apic_dev_state * apic_dev = (struct apic_dev_state *)(dev->private_data);
+    struct int_cmd_reg tmp_icr;
+
+    // zero out all the fields
+    tmp_icr.val = 0;
+
+
+    tmp_icr.vec = ipi->vector;
+    tmp_icr.del_mode = ipi->mode;
+    tmp_icr.dst_mode = ipi->logical;
+    tmp_icr.trig_mode = ipi->trigger_mode;
+    tmp_icr.dst_shorthand = ipi->dst_shorthand;
+    tmp_icr.dst = ipi->dst;
+    
+
+    return route_ipi(apic_dev, NULL, &tmp_icr);
+}
+
+
+int v3_apic_raise_intr(struct v3_vm_info * vm, struct vm_device * dev, 
+                      uint32_t irq, uint32_t dst) {
+    struct apic_dev_state * apic_dev = (struct apic_dev_state *)(dev->private_data);
+    struct apic_state * apic = &(apic_dev->apics[dst]); 
+
+    PrintDebug("apic %u core ?: raising interrupt IRQ %u (dst = %u).\n", apic->lapic_id.val, irq, dst); 
+
+    activate_apic_irq(apic, irq);
+
+    if (V3_Get_CPU() != dst) {
+       v3_interrupt_cpu(vm, dst, 0);
+    }
 
-  return activate_apic_irq(apic, irq);
+    return 0;
 }
 
 
 
-static int apic_begin_irq(struct guest_info * info, void * private_data, int irq) {
-    struct apic_state * apic = (struct apic_state *)private_data;
+static int apic_begin_irq(struct guest_info * core, void * private_data, int irq) {
+    struct apic_dev_state * apic_dev = (struct apic_dev_state *)(private_data);
+    struct apic_state * apic = &(apic_dev->apics[core->cpu_id]); 
     int major_offset = (irq & ~0x00000007) >> 3;
     int minor_offset = irq & 0x00000007;
     uint8_t * req_location = apic->int_req_reg + major_offset;
@@ -987,8 +1344,8 @@ static int apic_begin_irq(struct guest_info * info, void * private_data, int irq
        *req_location &= ~flag;
     } else {
        // do nothing... 
-       PrintDebug("apic %u: core %u: begin irq for %d ignored since I don't own it\n",
-                  apic->lapic_id.val, info->cpu_id, irq);
+       //PrintDebug("apic %u: core %u: begin irq for %d ignored since I don't own it\n",
+       //         apic->lapic_id.val, core->cpu_id, irq);
     }
 
     return 0;
@@ -998,10 +1355,12 @@ static int apic_begin_irq(struct guest_info * info, void * private_data, int irq
 
 
 /* Timer Functions */
-static void apic_update_time(struct guest_info * info, 
+static void apic_update_time(struct guest_info * core, 
                             uint64_t cpu_cycles, uint64_t cpu_freq, 
                             void * priv_data) {
-    struct apic_state * apic = (struct apic_state *)(priv_data);
+    struct apic_dev_state * apic_dev = (struct apic_dev_state *)(priv_data);
+    struct apic_state * apic = &(apic_dev->apics[core->cpu_id]); 
+
     // The 32 bit GCC runtime is a pile of shit
 #ifdef __V3_64BIT__
     uint64_t tmr_ticks = 0;
@@ -1051,7 +1410,7 @@ static void apic_update_time(struct guest_info * info,
            break;
        default:
            PrintError("apic %u: core %u: Invalid Timer Divider configuration\n",
-                      apic->lapic_id.val, info->cpu_id);
+                      apic->lapic_id.val, core->cpu_id);
            return;
     }
 
@@ -1066,18 +1425,18 @@ static void apic_update_time(struct guest_info * info,
 
        // raise irq
        PrintDebug("apic %u: core %u: Raising APIC Timer interrupt (periodic=%d) (icnt=%d) (div=%d)\n",
-                  apic->lapic_id.val, info->cpu_id,
+                  apic->lapic_id.val, core->cpu_id,
                   apic->tmr_vec_tbl.tmr_mode, apic->tmr_init_cnt, shift_num);
 
-       if (apic_intr_pending(info, priv_data)) {
+       if (apic_intr_pending(core, priv_data)) {
            PrintDebug("apic %u: core %u: Overriding pending IRQ %d\n", 
-                      apic->lapic_id.val, info->cpu_id, 
-                      apic_get_intr_number(info, priv_data));
+                      apic->lapic_id.val, core->cpu_id, 
+                      apic_get_intr_number(core, priv_data));
        }
 
        if (activate_internal_irq(apic, APIC_TMR_INT) == -1) {
            PrintError("apic %u: core %u: Could not raise Timer interrupt\n",
-                      apic->lapic_id.val, info->cpu_id);
+                      apic->lapic_id.val, core->cpu_id);
        }
     
        if (apic->tmr_vec_tbl.tmr_mode == APIC_TMR_PERIODIC) {
@@ -1098,7 +1457,7 @@ static struct intr_ctrl_ops intr_ops = {
 
 
 static struct vm_timer_ops timer_ops = {
-    .update_time = apic_update_time,
+    .update_timer = apic_update_time,
 };
 
 
@@ -1125,57 +1484,21 @@ static struct v3_device_ops dev_ops = {
 
 
 
-static int apic_should_deliver_flat(struct guest_info * core, uint8_t mda, void * private_data)
-{
-  struct apic_state * apic = (struct apic_state *)private_data;
-
-  if (mda==0xff ||                         // broadcast or
-      (apic->log_dst.dst_log_id & mda)) {  // I am in the set 
-      return 1;
-  } else {
-      return 0;
-  }
-}
-
-static int apic_should_deliver_cluster(struct guest_info * core, uint8_t mda, void * private_data)
-{
-  struct apic_state * apic = (struct apic_state *)private_data;
-
-  if (mda==0xff ||                                                 // broadcast or
-      ( ((mda & 0xf0) == (apic->log_dst.dst_log_id & 0xf0)) &&     // (I am in the cluster and
-        ((mda & 0x0f)  & (apic->log_dst.dst_log_id & 0x0f)) ) ) {  //  I am in the set)
-      return 1;
-  } else {
-      return 0;
-  }
-}
-
-static struct v3_icc_ops icc_ops = {
-    .raise_intr = apic_raise_intr,
-    .should_deliver_flat = apic_should_deliver_flat,
-    .should_deliver_cluster = apic_should_deliver_cluster,
-};
-
 
 
 static int apic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
-    PrintDebug("apic: creating an APIC for each core\n");
     char * dev_id = v3_cfg_val(cfg, "ID");
-    char * icc_bus_id = v3_cfg_val(cfg, "bus");
-    struct vm_device * icc = v3_find_dev(vm, icc_bus_id);
-    int i;
+    struct apic_dev_state * apic_dev = NULL;
+    int i = 0;
 
-    if (!icc) {
-        PrintError("apic: Cannot find ICC Bus (%s)\n", icc_bus_id);
-        return -1;
-    }
+    PrintDebug("apic: creating an APIC for each core\n");
+
+    apic_dev = (struct apic_dev_state *)V3_Malloc(sizeof(struct apic_dev_state) + 
+                                                 sizeof(struct apic_state) * vm->num_cores);
 
-    // We allocate one apic per core
-    // APICs are accessed via index which correlates with the core's cpu_id 
-    // 0..num_cores-1   at num_cores is the ioapic (one only)
-    struct apic_state * apic = (struct apic_state *)V3_Malloc(sizeof(struct apic_state) * vm->num_cores);
+    apic_dev->num_apics = vm->num_cores;
 
-    struct vm_device * dev = v3_allocate_device(dev_id, &dev_ops, apic);
+    struct vm_device * dev = v3_allocate_device(dev_id, &dev_ops, apic_dev);
 
     if (v3_attach_device(vm, dev) == -1) {
        PrintError("apic: Could not attach device %s\n", dev_id);
@@ -1184,30 +1507,34 @@ static int apic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
 
     
     for (i = 0; i < vm->num_cores; i++) {
+       struct apic_state * apic = &(apic_dev->apics[i]);
        struct guest_info * core = &(vm->cores[i]);
 
-       init_apic_state(&(apic[i]),i,icc);
-
-       v3_register_intr_controller(core, &intr_ops, &(apic[i]));
+       apic->core = core;
 
-       v3_add_timer(core, &timer_ops, &(apic[i]));
+       init_apic_state(apic, i);
 
-       v3_hook_full_mem(vm, core->cpu_id, apic->base_addr, apic->base_addr + PAGE_SIZE_4KB, apic_read, apic_write, &(apic[i]));
+       v3_register_intr_controller(core, &intr_ops, apic_dev);
 
-       v3_icc_register_apic(core, icc, i, &icc_ops, &(apic[i]));
+       v3_add_timer(core, &timer_ops, apic_dev);
 
-       PrintDebug("apic %u: (setup device): done, my id is %u\n", i, apic[i].lapic_id.val);
+       v3_hook_full_mem(vm, core->cpu_id, apic->base_addr, apic->base_addr + PAGE_SIZE_4KB, apic_read, apic_write, apic_dev);
 
+       PrintDebug("apic %u: (setup device): done, my id is %u\n", i, apic->lapic_id.val);
     }
 
+#ifdef CONFIG_DEBUG_APIC
     for (i = 0; i < vm->num_cores; i++) {
+       struct apic_state * apic = &(apic_dev->apics[i]);
        PrintDebug("apic: sanity check: apic %u (at %p) has id %u and msr value %llx\n",
-                  i, &(apic[i]), apic[i].lapic_id.val, apic[i].base_addr_msr.value);
+                  i, apic, apic->lapic_id.val, apic->base_addr_msr.value);
     }
+#endif
+
 
-    PrintDebug("apic: priv_data is at %p\n", apic);
+    PrintDebug("apic: priv_data is at %p\n", apic_dev);
 
-    v3_hook_msr(vm, BASE_ADDR_MSR, read_apic_msr, write_apic_msr, dev);
+    v3_hook_msr(vm, BASE_ADDR_MSR, read_apic_msr, write_apic_msr, apic_dev);
 
     return 0;
 }
index b30ee02..528b6a0 100644 (file)
@@ -39,8 +39,8 @@
 
 struct cons_state 
 {
-       void *tty;
-       struct vm_device *frontend_dev;
+    void * tty;
+    struct vm_device * frontend_dev;
 };
 
 static int screen_update(uint_t x, uint_t y, uint_t length, void *private_data);
@@ -49,112 +49,110 @@ static uint_t last_offset;
 
 static int cursor_update(uint_t x, uint_t y, void *private_data) 
 {
-       struct vm_device *dev = (struct vm_device *) private_data;
-       struct cons_state *state = (struct cons_state *) dev->private_data;
-       uint_t offset = (x * BYTES_PER_COL) + (y * BYTES_PER_ROW);
-       uint_t last_x, last_y;
-
-       /* unfortunately Palacios sometimes misses some writes, 
-        * but if they are accompanied by a cursor move we may be able to 
-        * detect this
-        */
-       if (offset < last_offset) last_offset = 0;
-       if (offset > last_offset) {
-               last_x = (last_offset % BYTES_PER_ROW) / BYTES_PER_COL;
-               last_y = last_offset / BYTES_PER_ROW;
-               screen_update(last_x, last_y, offset - last_offset, private_data);
-       }
+    struct vm_device *dev = (struct vm_device *) private_data;
+    struct cons_state *state = (struct cons_state *) dev->private_data;
+    uint_t offset = (x * BYTES_PER_COL) + (y * BYTES_PER_ROW);
+    uint_t last_x, last_y;
+
+    /* unfortunately Palacios sometimes misses some writes, 
+     * but if they are accompanied by a cursor move we may be able to 
+     * detect this
+     */
+    if (offset < last_offset) last_offset = 0;
+
+    if (offset > last_offset) {
+       last_x = (last_offset % BYTES_PER_ROW) / BYTES_PER_COL;
+       last_y = last_offset / BYTES_PER_ROW;
+       screen_update(last_x, last_y, offset - last_offset, private_data);
+    }
+    
+    /* adjust cursor */        
+    if (V3_TtyCursorSet(state->tty, x, y) < 0) {
+       PrintError("V3_TtyCursorSet(0x%p, %d, %d) failed\n", state->tty, x, y);
+       return -1;
+    }
+    
+    /* done with console update */
+    if (V3_TtyUpdate(state->tty) < 0) {
+       PrintError("V3_TtyUpdate(0x%p) failed\n", state->tty);
+       return -1;
+    }
+    
+    return 0;
+}
 
-       /* adjust cursor */     
-       if (V3_TtyCursorSet(state->tty, x, y) < 0) {
-               PrintError("V3_TtyCursorSet(0x%p, %d, %d) failed\n", state->tty, x, y);
-               return -1;
+static int screen_update(uint_t x, uint_t y, uint_t length, void * private_data) {
+    struct vm_device *dev = (struct vm_device *)private_data;
+    struct cons_state *state = (struct cons_state *)dev->private_data;
+    uint_t offset = (x * BYTES_PER_COL) + (y * BYTES_PER_ROW);
+    uint8_t fb_buf[length];
+    int i;
+    uint_t cur_x = x;
+    uint_t cur_y = y;
+    
+    /* grab frame buffer */
+    memset(fb_buf, 0, length);
+    v3_cons_get_fb(state->frontend_dev, fb_buf, offset, length);
+    
+    /* update the screen */
+    for (i = 0; i < length; i += 2) {
+       uint_t col_index = i;
+       uint8_t col[2];
+       
+       col[0] = fb_buf[col_index];     // Character
+       col[1] = fb_buf[col_index + 1]; // Attribute
+       
+       /* update current character */
+       if (V3_TtyCharacterSet(state->tty, cur_x, cur_y, col[0], col[1]) < 0) {
+           PrintError("V3_TtyCursorSet(0x%p, %d, %d, %d, %d) failed\n", 
+                      state->tty, cur_x, cur_y, col[1], col[0]);
+           return -1;
        }
+       
+       // CAUTION: the order of these statements is critical
+       // cur_y depends on the previous value of cur_x
+       cur_y = cur_y + ((cur_x + 1) / NUM_COLS);
+       cur_x = (cur_x + 1) % NUM_COLS;
+    }
+    
+    /* done with console update */
+    if (V3_TtyUpdate(state->tty) < 0) {
+       PrintError("V3_TtyUpdate(0x%p) failed\n", state->tty);
+       return -1;
+    }
+    
+    /* store offset to catch missing notifications */
+    last_offset = offset + length;
+    
+    return 0;
+}
 
-       /* done with console update */
-       if (V3_TtyUpdate(state->tty) < 0) {
-               PrintError("V3_TtyUpdate(0x%p) failed\n", state->tty);
-               return -1;
-       }
+static int scroll(int rows, void * private_data) {
+    struct vm_device *dev = (struct vm_device *)private_data;
+    struct cons_state *state = (struct cons_state *)dev->private_data;
 
-       return 0;
-}
+    if (rows < 0) {
+       /* simply update the screen */
+       return screen_update(0, 0, SCREEN_SIZE, private_data);
+    }
 
-static int screen_update(uint_t x, uint_t y, uint_t length, void *private_data) 
-{
-       struct vm_device *dev = (struct vm_device *)private_data;
-       struct cons_state *state = (struct cons_state *)dev->private_data;
-       uint_t offset = (x * BYTES_PER_COL) + (y * BYTES_PER_ROW);
-       uint8_t fb_buf[length];
-       int i;
-       uint_t cur_x = x;
-       uint_t cur_y = y;
-
-       /* grab frame buffer */
-       memset(fb_buf, 0, length);
-       v3_cons_get_fb(state->frontend_dev, fb_buf, offset, length);
-       
-       /* update the screen */
-       for (i = 0; i < length; i += 2) 
-       {
-               uint_t col_index = i;
-               uint8_t col[2];
-
-               col[0] = fb_buf[col_index];     // Character
-               col[1] = fb_buf[col_index + 1]; // Attribute
-
-               /* update current character */
-               if (V3_TtyCharacterSet(state->tty, cur_x, cur_y, col[0], col[1]) < 0) {
-                       PrintError("V3_TtyCursorSet(0x%p, %d, %d, %d, %d) failed\n", 
-                               state->tty, cur_x, cur_y, col[1], col[0]);
-                       return -1;
-               }
-                   
-               // CAUTION: the order of these statements is critical
-               // cur_y depends on the previous value of cur_x
-               cur_y = cur_y + ((cur_x + 1) / NUM_COLS);
-               cur_x = (cur_x + 1) % NUM_COLS;
+    if (rows > 0) {
+       /* scroll requested number of lines*/           
+       if (V3_TtyScroll(state->tty, rows) < 0) {
+           PrintError("V3_TtyScroll(0x%p, %u) failed\n", state->tty, rows);
+           return -1;
        }
 
        /* done with console update */
        if (V3_TtyUpdate(state->tty) < 0) {
-               PrintError("V3_TtyUpdate(0x%p) failed\n", state->tty);
-               return -1;
+           PrintError("V3_TtyUpdate(0x%p) failed\n", state->tty);
+           return -1;
        }
-
-       /* store offset to catch missing notifications */
-       last_offset = offset + length;
-
-       return 0;
-}
-
-static int scroll(int rows, void *private_data) 
-{
-       struct vm_device *dev = (struct vm_device *)private_data;
-       struct cons_state *state = (struct cons_state *)dev->private_data;
-
-       if (rows < 0) {
-               /* simply update the screen */
-               return screen_update(0, 0, SCREEN_SIZE, private_data);
-       }
-
-       if (rows > 0) {
-               /* scroll requested number of lines*/           
-               if (V3_TtyScroll(state->tty, rows) < 0) {
-                       PrintError("V3_TtyScroll(0x%p, %u) failed\n", state->tty, rows);
-                       return -1;
-               }
-
-               /* done with console update */
-               if (V3_TtyUpdate(state->tty) < 0) {
-                       PrintError("V3_TtyUpdate(0x%p) failed\n", state->tty);
-                       return -1;
-               }
                
-               last_offset = BYTES_PER_ROW * (NUM_ROWS - 1);           
-       }
+       last_offset = BYTES_PER_ROW * (NUM_ROWS - 1);           
+    }
        
-       return 0;
+    return 0;
 }
 
 static struct v3_console_ops cons_ops = {
@@ -172,48 +170,49 @@ static struct v3_device_ops dev_ops = {
 
 static int cons_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) 
 {
-       struct cons_state * state = NULL;
-       v3_cfg_tree_t * frontend_cfg = v3_cfg_subtree(cfg, "frontend");
-       const char * frontend_tag = v3_cfg_val(frontend_cfg, "tag");
-       struct vm_device * frontend = v3_find_dev(vm, frontend_tag);
-       char * dev_id = v3_cfg_val(cfg, "ID");
-       char * ttypath = v3_cfg_val(cfg, "tty");
-
-       /* read configuration */
-       V3_ASSERT(frontend_cfg);
-       V3_ASSERT(frontend_tag);
-       V3_ASSERT(frontend);
-
-
-       /* allocate state */
-       state = (struct cons_state *)V3_Malloc(sizeof(struct cons_state));
-       V3_ASSERT(state);
-       state->frontend_dev = frontend;
-       V3_ASSERT(ttypath);
-
-       /* open tty for screen display */
-       state->tty = V3_TtyOpen(ttypath, TTY_OPEN_MODE_READ | TTY_OPEN_MODE_WRITE);
-       if (!state->tty) {
-               PrintError("Could not open tty %s\n", ttypath);
-               V3_Free(state);
-               return -1;
-       }
-
-       /* allocate device */
-       struct vm_device *dev = v3_allocate_device(dev_id, &dev_ops, state);
-       V3_ASSERT(dev);
-
-       /* attach device to virtual machine */
-       if (v3_attach_device(vm, dev) == -1) {
-               PrintError("Could not attach device %s\n", dev_id);
-               V3_Free(state);
-               return -1;
-       }
-
-       /* attach to front-end display adapter */
-       v3_console_register_cga(frontend, &cons_ops, dev);
-
-       return 0;
+    struct cons_state * state = NULL;
+    v3_cfg_tree_t * frontend_cfg = v3_cfg_subtree(cfg, "frontend");
+    const char * frontend_tag = v3_cfg_val(frontend_cfg, "tag");
+    struct vm_device * frontend = v3_find_dev(vm, frontend_tag);
+    char * dev_id = v3_cfg_val(cfg, "ID");
+    char * ttypath = v3_cfg_val(cfg, "tty");
+
+    /* read configuration */
+    V3_ASSERT(frontend_cfg);
+    V3_ASSERT(frontend_tag);
+    V3_ASSERT(frontend);
+
+
+    /* allocate state */
+    state = (struct cons_state *)V3_Malloc(sizeof(struct cons_state));
+    V3_ASSERT(state);
+    state->frontend_dev = frontend;
+    V3_ASSERT(ttypath);
+
+    /* open tty for screen display */
+    state->tty = V3_TtyOpen(vm, ttypath, TTY_OPEN_MODE_READ | TTY_OPEN_MODE_WRITE);
+
+    if (!state->tty) {
+       PrintError("Could not open tty %s\n", ttypath);
+       V3_Free(state);
+       return -1;
+    }
+
+    /* allocate device */
+    struct vm_device * dev = v3_allocate_device(dev_id, &dev_ops, state);
+    V3_ASSERT(dev);
+
+    /* attach device to virtual machine */
+    if (v3_attach_device(vm, dev) == -1) {
+       PrintError("Could not attach device %s\n", dev_id);
+       V3_Free(state);
+       return -1;
+    }
+
+    /* attach to front-end display adapter */
+    v3_console_register_cga(frontend, &cons_ops, dev);
+
+    return 0;
 }
 
 device_register("CURSES_CONSOLE", cons_init)
diff --git a/palacios/src/devices/icc_bus.c b/palacios/src/devices/icc_bus.c
deleted file mode 100644 (file)
index 0a74eee..0000000
+++ /dev/null
@@ -1,440 +0,0 @@
-/*
- * This file is part of the Palacios Virtual Machine Monitor developed
- * by the V3VEE Project with funding from the United States National
- * Science Foundation and the Department of Energy.
- *
- * The V3VEE Project is a joint project between Northwestern University
- * and the University of New Mexico.  You can find out more at
- * http://www.v3vee.org
- *
- * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
- * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
- * All rights reserved.
- *
- * Author: Jack Lange <jarusl@cs.northwestern.edu>
- *
- * This is free software.  You are permitted to use,
- * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
- */
-
-#include <palacios/vmm_dev_mgr.h>
-#include <palacios/vmm_sprintf.h>
-#include <palacios/vm_guest.h>
-#include <devices/icc_bus.h>
-#include <devices/apic_regs.h>
-
-#define MAX_APICS 256
-
-#ifndef CONFIG_DEBUG_ICC_BUS
-#undef PrintDebug
-#define PrintDebug(fmt, args...)
-#endif
-
-
-void v3_force_exit(void *p) {
-#ifdef CONFIG_DEBUG_ICC_BUS
-    struct guest_info *core=(struct guest_info *)p;
-#endif
-    PrintDebug("core %u: Forced to exit!\n",core->cpu_id);
-}
-
-struct ipi_thunk_data {
-    struct vm_device * target;
-    uint64_t val;
-};
-
-
-
-struct apic_data {
-    struct guest_info * core;
-    struct v3_icc_ops * ops;
-    
-    void * priv_data;
-    int present;
-};
-
-
-struct icc_bus_state {
-    struct apic_data apics[MAX_APICS];
-    
-    uint32_t         ioapic_id;
-};
-
-static struct v3_device_ops dev_ops = {
-    .free = NULL,
-    .reset = NULL,
-    .start = NULL,
-    .stop = NULL,
-};
-
-#ifdef CONFIG_DEBUG_ICC_BUS
-static char *shorthand_str[] = { 
-    "(no shorthand)",
-    "(self)",
-    "(all)",
-    "(all-but-me)",
- };
-
-static char *deliverymode_str[] = { 
-    "(fixed)",
-    "(lowest priority)",
-    "(SMI)",
-    "(reserved)",
-    "(NMI)",
-    "(INIT)",
-    "(Start Up)",
-    "(ExtInt)",
-};
-#endif
-
-
-static int deliver(uint32_t src_apic, struct apic_data *dest_apic, struct int_cmd_reg *icr, struct icc_bus_state * state, uint32_t extirq) {
-
-    switch (icr->del_mode) {                                           
-
-       case 0:  //fixed
-       case 1: // lowest priority
-       case 7: // ExtInt
-           PrintDebug("icc_bus: delivering IRQ to core %u\n",dest_apic->core->cpu_id); 
-           dest_apic->ops->raise_intr(dest_apic->core, 
-                                      icr->del_mode!=7 ? icr->vec : extirq,
-                                      dest_apic->priv_data); 
-           if (src_apic!=state->ioapic_id && dest_apic->core->cpu_id != src_apic) { 
-               // Assume core # is same as logical processor for now
-               // TODO FIX THIS FIX THIS
-               // THERE SHOULD BE:  guestapicid->virtualapicid map,
-               //                   cpu_id->logical processor map
-               //     host maitains logical proc->phsysical proc
-               PrintDebug("icc_bus: non-local core, forcing it to exit\n"); 
-               V3_Call_On_CPU(dest_apic->core->cpu_id,v3_force_exit,(void*)(dest_apic->core));
-               // TODO: do what the print says
-           }                                                   
-           break;                                                      
-           
-       case 2:   //SMI                 
-           PrintError("icc_bus: SMI delivery is unsupported\n");       
-           return -1;                                          
-           break;                                                      
-           
-       case 3:  //reserved                                             
-           PrintError("icc_bus: Reserved delivery mode 3 is unsupported\n"); 
-           return -1;                                          
-           break;                                                      
-
-       case 4:  //NMI                                  
-           PrintError("icc_bus: NMI delivery is unsupported\n"); 
-           return -1;                                          
-           break;                                                      
-
-       case 5: { //INIT
-           struct guest_info *core = dest_apic->core;
-
-           PrintDebug("icc_bus: INIT delivery to core %u\n",core->cpu_id);
-
-           // TODO: any APIC reset on dest core (shouldn't be needed, but not sure...)
-
-           // Sanity check
-           if (core->cpu_mode != INIT) { 
-               PrintError("icc_bus: Warning: core %u is not in INIT state, ignored\n",core->cpu_id);
-               // Only a warning, since INIT INIT SIPI is common
-               break;
-           }
-
-           // We transition the target core to SIPI state
-           core->cpu_mode = SIPI;  // note: locking should not be needed here
-
-           // That should be it since the target core should be
-           // waiting in host on this transition
-           // either it's on another core or on a different preemptive thread
-           // in both cases, it will quickly notice this transition 
-           // in particular, we should not need to force an exit here
-
-           PrintDebug("icc_bus: INIT delivery done\n");
-
-       }
-           break;                                                      
-
-       case 6: { //SIPI
-           struct guest_info *core = dest_apic->core;
-
-           // Sanity check
-           if (core->cpu_mode!=SIPI) { 
-               PrintError("icc_bus: core %u is not in SIPI state, ignored!\n",core->cpu_id);
-               break;
-           }
-
-           // Write the RIP, CS, and descriptor
-           // assume the rest is already good to go
-           //
-           // vector VV -> rip at 0
-           //              CS = VV00
-           //  This means we start executing at linear address VV000
-           //
-           // So the selector needs to be VV00
-           // and the base needs to be VV000
-           //
-           core->rip = 0;
-           core->segments.cs.selector = icr->vec << 8;
-           core->segments.cs.limit = 0xffff;
-           core->segments.cs.base = icr->vec << 12;
-
-           PrintDebug("icc_bus: SIPI delivery (0x%x -> 0x%x:0x0) to core %u\n",
-                      icr->vec, core->segments.cs.selector, core->cpu_id);
-           // Maybe need to adjust the APIC?
-           
-           // We transition the target core to SIPI state
-           core->cpu_mode = REAL;  // note: locking should not be needed here
-
-           // As with INIT, we should not need to do anything else
-
-           PrintDebug("icc_bus: SIPI delivery done\n");
-
-       }
-           break;                                                      
-    }
-
-    return 0;
-} 
-
-
-//
-// icr_data contains interrupt vector *except* for ext_int
-// in which case it is given via irq
-//
-
-int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t src_apic, uint64_t icr_data, 
-                   uint32_t dfr_data, uint32_t extirq) {
-
-    PrintDebug("icc_bus: icc_bus=%p, src_apic=%u, icr_data=%llx, extirq=%u\n",icc_bus,src_apic,icr_data,extirq);
-
-    struct int_cmd_reg *icr = (struct int_cmd_reg *)&icr_data;
-    struct dst_fmt_reg *dfr = (struct dst_fmt_reg*)&dfr_data;
-
-    struct icc_bus_state * state = (struct icc_bus_state *)icc_bus->private_data;
-    struct apic_data * dest_apic = NULL;
-    PrintDebug("icc_bus: icc_bus=%p, src_apic=%u, icr_data=%llx, extirq=%u\n", 
-              icc_bus, src_apic, icr_data, extirq);
-
-    // initial sanity checks
-    if ((src_apic >= MAX_APICS) || 
-       ((state->apics[src_apic].present == 0) && 
-        (src_apic != state->ioapic_id))) { 
-       PrintError("icc_bus: Apparently sending from unregistered apic id=%u\n",src_apic);
-       return -1;
-    }
-
-
-    if ((icr->dst_mode == 0) && (state->apics[icr->dst].present == 0)) { 
-       PrintError("icc_bus: Attempted send to unregistered apic id=%u\n", icr->dst);
-       return -1;
-    }
-
-    dest_apic =  &(state->apics[icr->dst]);
-
-
-    PrintDebug("icc_bus: IPI %s %u from %s %u to %s %s %u (icr=0x%llx, dfr=0x%x) (extirq=%u)\n",
-              deliverymode_str[icr->del_mode], icr->vec, 
-              src_apic==state->ioapic_id ? "ioapic" : "apic",
-              src_apic,               
-              icr->dst_mode==0 ? "(physical)" : "(logical)", 
-              shorthand_str[icr->dst_shorthand], icr->dst,icr->val, dfr->val,
-              extirq);
-
-    /*
-
-    if (icr->dst==state->ioapic_id) { 
-       PrintError("icc_bus: Attempted send to ioapic ignored\n");
-       return -1;
-    }
-    */
-
-
-
-    switch (icr->dst_shorthand) {
-
-       case 0:  // no shorthand
-
-           if (icr->dst_mode==0) { 
-               // physical delivery
-               struct apic_data * dest_apic =  &(state->apics[icr->dst]);
-               if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
-                   return -1;
-               }
-           } else {
-               // logical delivery
-               uint8_t mda = icr->dst; // message destination address, not physical address
-               
-               if (dfr->model==0xf) { 
-                   // flat model
-                   // this means we deliver the IPI each destination APIC where
-                   // mda of sender & ldr of receiver is nonzero
-                   // mda=0xff means broadcast to all
-                   //
-                   int i;
-                   for (i=0;i<MAX_APICS;i++) { 
-                       struct apic_data *dest_apic=&(state->apics[i]);
-                       if (dest_apic->present &&
-                           dest_apic->ops->should_deliver_flat(dest_apic->core,
-                                                               mda,
-                                                               dest_apic->priv_data)) {
-                           if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
-                               return -1;
-                           }
-                       }
-                   }
-               } else if (dfr->model==0x0) {
-                   // cluster model
-                   //
-                   // there are two variants of this
-                   //
-                   // 1. (ancient P5/P6) All apics are on one bus
-                   //    mda[31:28] is the target cluster, 
-                   //    mda[27:24] has one bit for each apic in the cluster
-                   //    mda[31:28] of sending apic == ldr[31:28] of dest apic means
-                   //      the dest apic is part of the cluster
-                   //      then mda[27:24] & ldr[27:24] nonzero means to deliver
-                   //    also, mda=0xff still means broadcast 
-                   //    So, basically, you have 15 clusters of 4 apics each + broadcast
-                   //
-                   // 2. (current) hierarchical cluster model
-                   //    This is some hwat unclearly documented in volume 3, 9-32
-                   //    basically, you have a hierarchy of clusters that where
-                   //    each cluster has 4 agents (APICs?) and a cluster manager.
-                   //    The cluster manager is not an apic, though, and outside of
-                   //    scope of documents.  Again, you have 15 clusters of 4 apics
-                   //    each + broadcast.   My impression is that this is identical 
-                   //    to variant 1 for our purposes. 
-                   //
-                   //
-                   // if we are in lowest priorty mode, we should just pick one
-                   // according to the arbitrarion prioty register
-                   int i;
-                   for (i=0;i<MAX_APICS;i++) { 
-                       struct apic_data *dest_apic=&(state->apics[i]);
-                       if (dest_apic->present &&
-                           dest_apic->ops->should_deliver_cluster(dest_apic->core,
-                                                                  mda,
-                                                                  dest_apic->priv_data)) {
-                           if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
-                               return -1;
-                           }
-                       }
-                   }
-               } else {
-                   PrintError("icc_bus: unknown logical delivery model 0x%x\n", dfr->model);
-                   return -1;
-               }
-
-           }
-           
-           break;
-           
-       case 1:  // self
-
-           if (icr->dst_mode==0) { 
-               // physical delivery
-               if (icr->dst==state->ioapic_id) { 
-                   PrintError("icc_bus: ioapic attempting to send to itself\n");
-                   return -1;
-               }
-               struct apic_data *dest_apic=&(state->apics[src_apic]);
-               if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
-                   return -1;
-               }
-           } else {
-               // logical delivery
-               PrintError("icc_bus: use of logical delivery in self is not yet supported.\n");
-
-               return -1;
-           }
-           break;
-           
-       case 2: 
-
-       case 3: { // all and all-but-me
-           // assuming that logical verus physical doesn't matter
-           // although it is odd that both are used
-           int i;
-           for (i=0;i<MAX_APICS;i++) { 
-               struct apic_data *dest_apic=&(state->apics[i]);
-               if (dest_apic->present && (i!=src_apic || icr->dst_shorthand==2)) { 
-                   if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
-                       return -1;
-                   }
-               }
-           }
-       }
-           break;
-
-       default:
-           return -1;
-    }
-    
-
-    return 0;
-}
-
-
-
-/* THIS IS A BIG ASSUMPTION: APIC PHYSID == LOGID == CORENUM */
-
-int v3_icc_register_apic(struct guest_info  * core, struct vm_device * icc_bus, 
-                        uint8_t apic_num, struct v3_icc_ops * ops, void * priv_data) {
-    struct icc_bus_state * icc = (struct icc_bus_state *)icc_bus->private_data;
-    struct apic_data * apic = &(icc->apics[apic_num]);
-
-    if (apic->present == 1) {
-       PrintError("icc_bus: Attempt to re-register apic %u\n", apic_num);
-       return -1;
-    }
-    
-    apic->present = 1;
-    apic->priv_data = priv_data;
-    apic->core = core;
-    apic->ops = ops;
-   
-    PrintDebug("icc_bus: Registered apic %u\n", apic_num);
-
-    return 0;
-}
-
-
-int v3_icc_register_ioapic(struct v3_vm_info *vm, struct vm_device * icc_bus, uint8_t apic_num)
-{
-    struct icc_bus_state * icc = (struct icc_bus_state *)icc_bus->private_data;
-
-    if (icc->ioapic_id) { 
-       PrintError("icc_bus: Attempt to register a second ioapic!\n");
-       return -1;
-    }
-
-    icc->ioapic_id=apic_num;
-
-    PrintDebug("icc_bus: Registered ioapic %u\n", apic_num);
-    
-
-    return 0;
-}
-
-
-static int icc_bus_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
-    PrintDebug("icc_bus: Creating ICC_BUS\n");
-
-    char * dev_id = v3_cfg_val(cfg, "ID");
-
-    struct icc_bus_state * icc_bus = (struct icc_bus_state *)V3_Malloc(sizeof(struct icc_bus_state));
-    memset(icc_bus, 0, sizeof(struct icc_bus_state));
-
-    struct vm_device * dev = v3_allocate_device(dev_id, &dev_ops, icc_bus);
-
-    if (v3_attach_device(vm, dev) == -1) {
-        PrintError("icc_bus: Could not attach device %s\n", dev_id);
-        return -1;
-    }
-
-    return 0;
-}
-
-
-
-device_register("ICC_BUS", icc_bus_init)
index 7a0c6e8..35187af 100644 (file)
@@ -20,8 +20,7 @@
 
 #include <palacios/vmm.h>
 #include <palacios/vmm_dev_mgr.h>
-#include <devices/icc_bus.h>
-#include <devices/apic_regs.h>
+#include <devices/apic.h>
 #include <palacios/vm_guest.h>
 
 #ifndef CONFIG_DEBUG_IO_APIC
@@ -136,7 +135,7 @@ struct io_apic_state {
   
     struct redir_tbl_entry redir_tbl[24];
 
-    struct vm_device * icc_bus;
+    struct vm_device * apic_dev;
   
 };
 
@@ -186,7 +185,7 @@ static int ioapic_read(struct guest_info * core, addr_t guest_addr, void * dst,
                break;
            default: {
                uint_t redir_index = (ioapic->index_reg - IOAPIC_REDIR_BASE_REG) >> 1;
-               uint_t hi_val = (ioapic->index_reg - IOAPIC_REDIR_BASE_REG) % 1;
+               uint_t hi_val = (ioapic->index_reg - IOAPIC_REDIR_BASE_REG) & 1;
                
                if (redir_index > 0x3f) {
                    PrintError("ioapic %u: Invalid redirection table entry %x\n", ioapic->ioapic_id.id, (uint32_t)redir_index);
@@ -217,6 +216,7 @@ static int ioapic_write(struct guest_info * core, addr_t guest_addr, void * src,
     PrintDebug("ioapic %u: IOAPIC Write at %p (val = %d)\n",  ioapic->ioapic_id.id, (void *)guest_addr, *(uint32_t *)src);
 
     if (reg_tgt == 0x00) {
+       PrintDebug("ioapic %u: Setting ioapic index register to 0x%x.\n", ioapic->ioapic_id.id, op_val);
        ioapic->index_reg = op_val;
     } else if (reg_tgt == 0x10) {
        // IOWIN register
@@ -235,17 +235,17 @@ static int ioapic_write(struct guest_info * core, addr_t guest_addr, void * src,
            default:
                {
                    uint_t redir_index = (ioapic->index_reg - IOAPIC_REDIR_BASE_REG) >> 1;
-                   uint_t hi_val = (ioapic->index_reg - IOAPIC_REDIR_BASE_REG) % 1;
-
-
+                   uint_t hi_val = (ioapic->index_reg - IOAPIC_REDIR_BASE_REG) & 1;
 
+                   PrintDebug("ioapic %u: Writing value 0x%x to redirection entry %u (%s)\n",
+                              ioapic->ioapic_id.id, op_val, redir_index, hi_val ? "hi" : "low");
 
                    if (redir_index > 0x3f) {
                        PrintError("ioapic %u: Invalid redirection table entry %x\n", ioapic->ioapic_id.id, (uint32_t)redir_index);
                        return -1;
                    }
                    if (hi_val) {
-                       PrintDebug("ioapic %u: Writing to hi of pin %d\n", ioapic->ioapic_id.val, redir_index);
+                       PrintDebug("ioapic %u: Writing to hi of pin %d\n", ioapic->ioapic_id.id, redir_index);
                        ioapic->redir_tbl[redir_index].hi = op_val;
                    } else {
                        PrintDebug("ioapic %u: Writing to lo of pin %d\n", ioapic->ioapic_id.id, redir_index);
@@ -274,29 +274,24 @@ static int ioapic_raise_irq(struct v3_vm_info * vm, void * private_data, int irq
     irq_entry = &(ioapic->redir_tbl[irq]);
 
     if (irq_entry->mask == 0) {
+       struct v3_gen_ipi ipi;
+
+       PrintDebug("ioapic %u: IOAPIC Signalling APIC to raise INTR %d\n", 
+                  ioapic->ioapic_id.id, irq_entry->vec);
+
+
+       ipi.vector = irq_entry->vec;
+       ipi.mode = irq_entry->del_mode;
+       ipi.logical = irq_entry->dst_mode;
+       ipi.trigger_mode = irq_entry->trig_mode;
+       ipi.dst = irq_entry->dst_field;
+       ipi.dst_shorthand = 0;
 
-       PrintDebug("ioapic %u: IOAPIC Signalling APIC to raise INTR %d\n", ioapic->ioapic_id.id, irq_entry->vec);
-
-
-       // the format of the redirection table entry is just slightly 
-       // different than that of the lapic's cmd register, which is the other
-       // way an IPI is initiated.   So we will translate
-       //
-       struct int_cmd_reg icr;
-       
-       icr.val = irq_entry->val;
-       icr.rsvd1=0;
-       icr.lvl=1;
-       icr.trig_mode=irq_entry->trig_mode;
-       icr.rem_rd_status=0;
-       icr.dst_shorthand=0; // no shorthand
-       icr.rsvd2=0;
-
-       // Note: 0 yhere is "cluster model", but it should be irrelevant
-       // since we are sending this as a physical destination
-       PrintDebug("io apic %u: raising irq %u on ICC bus.\n",
-                  ioapic->ioapic_id.id, irq);
-       v3_icc_send_ipi(ioapic->icc_bus, ioapic->ioapic_id.id,icr.val, 0, irq);
+       // Need to add destination argument here...
+       if (v3_apic_send_ipi(vm, ioapic->apic_dev, &ipi) == -1) {
+           PrintError("Error sending IPI to apic %d\n", ipi.dst);
+           return -1;
+       }
     }
 
     return 0;
@@ -332,19 +327,15 @@ static struct v3_device_ops dev_ops = {
 
 
 static int ioapic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
-    struct vm_device * icc_bus = v3_find_dev(vm, v3_cfg_val(cfg, "bus"));
+    struct vm_device * apic_dev = v3_find_dev(vm, v3_cfg_val(cfg, "apic"));
     char * dev_id = v3_cfg_val(cfg, "ID");
 
-    if (!icc_bus) {
-       PrintError("ioapic: Could not locate ICC BUS device (%s)\n", v3_cfg_val(cfg, "bus"));
-       return -1;
-    }
 
     PrintDebug("ioapic: Creating IO APIC\n");
 
     struct io_apic_state * ioapic = (struct io_apic_state *)V3_Malloc(sizeof(struct io_apic_state));
 
-    ioapic->icc_bus = icc_bus;
+    ioapic->apic_dev = apic_dev;
 
     struct vm_device * dev = v3_allocate_device(dev_id, &dev_ops, ioapic);
 
@@ -359,8 +350,6 @@ static int ioapic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
 
     init_ioapic_state(ioapic,vm->num_cores);
 
-    v3_icc_register_ioapic(vm,icc_bus,ioapic->ioapic_id.id);
-
     v3_hook_full_mem(vm, V3_MEM_CORE_ANY, ioapic->base_addr, ioapic->base_addr + PAGE_SIZE_4KB, 
                     ioapic_read, ioapic_write, dev);
   
index 5d92236..0089d17 100644 (file)
@@ -1,2 +1,3 @@
 obj-$(CONFIG_SHADOW_PAGING_VTLB) += vmm_shdw_pg_tlb.o
 obj-$(CONFIG_SWAPBYPASS) += vmm_shdw_pg_swapbypass.o
+obj-$(CONFIG_SHADOW_PAGING_CACHE1) += vmm_shdw_pg_cache.o
diff --git a/palacios/src/palacios/mmu/vmm_shdw_pg_cache.c b/palacios/src/palacios/mmu/vmm_shdw_pg_cache.c
new file mode 100644 (file)
index 0000000..5ca6a0b
--- /dev/null
@@ -0,0 +1,563 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm_shadow_paging.h>
+#include <palacios/vmm_swapbypass.h>
+#include <palacios/vmm_ctrl_regs.h>
+
+#include <palacios/vm_guest.h>
+#include <palacios/vm_guest_mem.h>
+#include <palacios/vmm_paging.h>
+#include <palacios/vmm_hashtable.h>
+#include <palacios/vmm_list.h>
+
+#define DEFAULT_CACHE_SIZE ((32 * 1024 * 1024) / 4096)
+
+#define V3_CACHED_PG 0x1
+
+#ifndef CONFIG_DEBUG_SHDW_PG_CACHE
+#undef PrintDebug
+#define PrintDebug(fmt, ...)
+#endif
+
+
+struct shdw_back_ptr {
+    addr_t gva;
+    struct shdw_pg_data * pg_data;
+    struct list_head back_ptr_node;
+};
+
+struct guest_pg_tuple {
+    addr_t gpa;
+    page_type_t pt_type;    
+} __attribute__((packed));
+
+
+
+struct rmap_entry {
+    addr_t gva;
+    addr_t gpa;
+    page_type_t pt_type;
+    struct list_head rmap_node;
+};
+
+struct shdw_pg_data {
+    struct guest_pg_tuple tuple;
+
+    addr_t hpa;
+    void * hva;
+
+    struct list_head back_ptrs;
+    struct list_head pg_queue_node;
+
+};
+
+
+
+struct cache_core_state {
+
+
+};
+
+
+struct cache_vm_state {
+    
+    v3_lock_t cache_lock;
+
+    struct hashtable * page_htable; // GPA to shdw_pg_data
+    struct hashtable * reverse_map;
+
+
+    int max_cache_pgs;
+    int pgs_in_cache;
+
+    struct list_head pg_queue;
+
+    int pgs_in_free_list;
+    struct list_head free_list;
+};
+
+
+
+static  inline int evict_pt(void * pt, addr_t va, page_type_t pt_type) {
+    
+    switch (pt_type) {
+       case PAGE_PD32: {
+           pde32_t * pde = pt;
+           pde[PDE32_INDEX(va)].present = 0;
+           break;
+       }
+       case PAGE_4MB: {
+           pde32_4MB_t * pde = pt;
+           pde[PDE32_INDEX(va)].present = 0;
+           break;
+       }
+       case PAGE_PT32: {
+           pte32_t * pte = pt;
+           pte[PTE32_INDEX(va)].present = 0;
+           break;
+       }
+       case PAGE_PML464: {
+           pml4e64_t * pml = pt;
+           pml[PML4E64_INDEX(va)].present = 0;
+           break;
+       }
+       case PAGE_PDP64: {
+           pdpe64_t * pdp = pt;
+           pdp[PDPE64_INDEX(va)].present = 0;
+           break;
+       }
+       case PAGE_PD64: {
+           pde64_t * pde = pt;
+           pde[PDE64_INDEX(va)].present = 0;
+           break;
+       }
+       case PAGE_PT64: {
+           pte64_t * pte = pt;
+           pte[PTE64_INDEX(va)].present = 0;
+           break;
+       }
+       default:
+           PrintError("Invalid page type: %d\n", pt_type);
+           return -1;
+    }
+
+    return 0;
+}
+
+
+
+static  inline int grab_pt(void * pt, addr_t va, page_type_t pt_type) {
+    
+    switch (pt_type) {
+       case PAGE_PD32: {
+           pde32_t * pde = pt;
+           pde[PDE32_INDEX(va)].writable = 0;
+           break;
+       }
+       case PAGE_4MB: {
+           pde32_4MB_t * pde = pt;
+           pde[PDE32_INDEX(va)].writable = 0;
+           break;
+       }
+       case PAGE_PT32: {
+           pte32_t * pte = pt;
+           pte[PTE32_INDEX(va)].writable = 0;
+           break;
+       }
+       case PAGE_PML464: {
+           pml4e64_t * pml = pt;
+           pml[PML4E64_INDEX(va)].writable = 0;
+           break;
+       }
+       case PAGE_PDP64: {
+           pdpe64_t * pdp = pt;
+           pdp[PDPE64_INDEX(va)].writable = 0;
+           break;
+       }
+       case PAGE_PD64: {
+           pde64_t * pde = pt;
+           pde[PDE64_INDEX(va)].writable = 0;
+           break;
+       }
+       case PAGE_PT64: {
+           pte64_t * pte = pt;
+           pte[PTE64_INDEX(va)].writable = 0;
+           break;
+       }
+       default:
+           PrintError("Invalid page type: %d\n", pt_type);
+           return -1;
+    }
+
+    return 0;
+}
+
+
+static int unlink_shdw_pg(struct shdw_pg_data * pg_data) {
+    struct shdw_back_ptr * back_ptr = NULL;
+    struct shdw_back_ptr * tmp_ptr = NULL;
+
+    PrintError("Unlinking gpa=%p, type=%d\n", (void *)pg_data->tuple.gpa, pg_data->tuple.pt_type);
+
+    list_for_each_entry_safe(back_ptr, tmp_ptr, &(pg_data->back_ptrs), back_ptr_node) {
+       struct shdw_pg_data * parent = back_ptr->pg_data;
+       
+       evict_pt(parent->hva, back_ptr->gva, parent->tuple.pt_type);
+       list_del(&(back_ptr->back_ptr_node));
+       V3_Free(back_ptr);
+    }
+    
+
+
+    return 0;
+}
+
+
+static int add_rmap(struct v3_vm_info * vm, struct shdw_pg_data * pg_data, addr_t gpa, addr_t gva) {
+    struct cache_vm_state * cache_state = vm->shdw_impl.impl_data;
+    struct list_head * rmap_list = NULL;
+    struct rmap_entry * entry = NULL;
+
+
+    rmap_list = (struct list_head *)v3_htable_search(cache_state->reverse_map, gpa);
+
+    if (rmap_list == NULL) {
+       rmap_list = V3_Malloc(sizeof(struct list_head));
+       INIT_LIST_HEAD(rmap_list);
+
+       v3_htable_insert(cache_state->reverse_map, gpa, (addr_t)rmap_list);
+    }
+    
+    entry = V3_Malloc(sizeof(struct rmap_entry));
+
+    entry->gva = gva;
+    entry->gpa = pg_data->tuple.gpa;
+    entry->pt_type = pg_data->tuple.pt_type;
+
+    list_add(&(entry->rmap_node), rmap_list);
+
+    return 0;
+}
+
+
+
+static int update_rmap_entries(struct v3_vm_info * vm, addr_t gpa) {
+    struct cache_vm_state * cache_state = vm->shdw_impl.impl_data;
+    struct list_head * rmap_list = NULL;
+    struct rmap_entry * entry = NULL;
+    int i = 0;
+
+    rmap_list = (struct list_head *)v3_htable_search(cache_state->reverse_map, gpa);
+
+    if (rmap_list == NULL) {
+       return 0;
+    }
+
+    PrintError("Updating rmap entries\n\t");
+
+    list_for_each_entry(entry, rmap_list, rmap_node) {
+       struct shdw_pg_data * pg_data = NULL;
+       struct guest_pg_tuple tuple = {entry->gpa, entry->pt_type};
+
+       V3_Print("%d \n", i);
+
+       pg_data = (struct shdw_pg_data *)v3_htable_search(cache_state->page_htable, (addr_t)&tuple);
+
+       if (!pg_data) {
+           PrintError("Invalid PTE reference... Should Delete rmap entry\n");
+           continue;
+       }
+
+       if (grab_pt(pg_data->hva, entry->gva, entry->pt_type) == -1) {
+           PrintError("Could not invalidate reverse map entry\n");
+           return -1;
+       }
+
+       i++;
+       
+    }
+
+    return 0;
+}
+
+
+
+
+static int link_shdw_pg(struct shdw_pg_data * child_pg, struct shdw_pg_data * parent_pg, addr_t gva) {
+    struct shdw_back_ptr * back_ptr = V3_Malloc(sizeof(struct shdw_back_ptr));
+    memset(back_ptr, 0, sizeof(struct shdw_back_ptr));
+
+    back_ptr->pg_data = parent_pg;
+    back_ptr->gva = gva;
+
+    list_add(&(back_ptr->back_ptr_node), &(child_pg->back_ptrs));
+   
+    return 0;
+}
+
+
+
+static struct shdw_pg_data * find_shdw_pt(struct v3_vm_info * vm, addr_t gpa, page_type_t pt_type) {
+    struct cache_vm_state * cache_state = vm->shdw_impl.impl_data;
+    struct shdw_pg_data * pg_data = NULL;
+    struct guest_pg_tuple tuple = {gpa, pt_type};
+    
+    pg_data = (struct shdw_pg_data *)v3_htable_search(cache_state->page_htable, (addr_t)&tuple);
+
+    if (pg_data != NULL) {
+       // move pg_data to head of queue, for LRU policy
+       list_move(&(pg_data->pg_queue_node), &(cache_state->pg_queue));
+    }
+
+    return pg_data;
+}
+
+
+static int evict_shdw_pg(struct v3_vm_info * vm, addr_t gpa, page_type_t pt_type) {
+    struct cache_vm_state * cache_state = vm->shdw_impl.impl_data;
+    struct shdw_pg_data * pg_data = NULL;
+
+    pg_data = find_shdw_pt(vm, gpa, pt_type);
+
+    PrintError("Evicting GPA: %p, type=%d\n", (void *)gpa, pt_type);
+
+    if (pg_data != NULL) {
+       if (unlink_shdw_pg(pg_data) == -1) {
+           PrintError("Error unlinking page...\n");
+           return -1;
+       }
+       
+       v3_htable_remove(cache_state->page_htable, (addr_t)&(pg_data->tuple), 0);
+       
+
+       // Move Page to free list
+       list_move(&(pg_data->pg_queue_node), &(cache_state->free_list));
+       cache_state->pgs_in_free_list++;
+       cache_state->pgs_in_cache--;
+    }
+
+    return 0;
+}
+
+
+static struct shdw_pg_data * pop_queue_pg(struct v3_vm_info * vm, 
+                                         struct cache_vm_state * cache_state) {
+    struct shdw_pg_data * pg_data = NULL;
+
+    PrintError("popping page from queue\n");
+
+    pg_data = list_tail_entry(&(cache_state->pg_queue), struct shdw_pg_data, pg_queue_node);
+
+
+    if (unlink_shdw_pg(pg_data) == -1) {
+       PrintError("Error unlinking cached page\n");
+       return NULL;
+    }
+
+    v3_htable_remove(cache_state->page_htable, (addr_t)&(pg_data->tuple), 0);
+    list_del(&(pg_data->pg_queue_node));
+    
+    cache_state->pgs_in_cache--;
+
+    return pg_data;
+}
+
+static struct shdw_pg_data * create_shdw_pt(struct v3_vm_info * vm, addr_t gpa, page_type_t pt_type) {
+    struct cache_vm_state * cache_state = vm->shdw_impl.impl_data;
+    struct shdw_pg_data * pg_data = NULL;
+
+
+    PrintError("Creating shdw page: gpa=%p, type=%d\n", (void *)gpa, pt_type);
+
+    if (cache_state->pgs_in_cache < cache_state->max_cache_pgs) {
+       pg_data = V3_Malloc(sizeof(struct shdw_pg_data));
+
+       pg_data->hpa = (addr_t)V3_AllocPages(1);
+       pg_data->hva = (void *)V3_VAddr((void *)pg_data->hpa);
+
+    } else if (cache_state->pgs_in_free_list) {
+
+       PrintError("pulling page from free list\n");
+       // pull from free list
+       pg_data = list_tail_entry(&(cache_state->free_list), struct shdw_pg_data, pg_queue_node);
+       
+       list_del(&(pg_data->pg_queue_node));
+       cache_state->pgs_in_free_list--;
+
+    } else {
+       // pull from queue
+       pg_data = pop_queue_pg(vm, cache_state);
+    }
+
+
+    if (pg_data == NULL) {
+       PrintError("Error creating Shadow Page table page\n");
+       return NULL;
+    }
+
+    memset(pg_data->hva, 0, PAGE_SIZE_4KB);
+
+    pg_data->tuple.gpa = gpa;
+    pg_data->tuple.pt_type = pt_type;
+
+    INIT_LIST_HEAD(&(pg_data->back_ptrs));
+
+    v3_htable_insert(cache_state->page_htable, (addr_t)&(pg_data->tuple), (addr_t)pg_data);
+
+    list_add(&(pg_data->pg_queue_node), &(cache_state->pg_queue));
+    cache_state->pgs_in_cache++;
+
+    return pg_data;
+
+}
+
+
+#include "vmm_shdw_pg_cache_32.h"
+//#include "vmm_shdw_pg_cache_32pae.h"
+//#include "vmm_shdw_pg_cache_64.h"
+
+
+static uint_t cache_hash_fn(addr_t key) {
+    struct guest_pg_tuple * tuple = (struct guest_pg_tuple *)key;
+
+    return v3_hash_buffer((uint8_t *)tuple, sizeof(struct guest_pg_tuple));
+}
+
+static int cache_eq_fn(addr_t key1, addr_t key2) {
+    struct guest_pg_tuple * tuple1 = (struct guest_pg_tuple *)key1;
+    struct guest_pg_tuple * tuple2 = (struct guest_pg_tuple *)key2;
+       
+    return ((tuple1->gpa == tuple2->gpa) && (tuple1->pt_type == tuple2->pt_type));
+}
+
+static uint_t rmap_hash_fn(addr_t key) {
+    return v3_hash_long(key, sizeof(addr_t) * 8);
+}
+
+static int rmap_eq_fn(addr_t key1, addr_t key2) {
+    return (key1 == key2);
+}
+
+
+static int cache_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
+    struct v3_shdw_impl_state * vm_state = &(vm->shdw_impl);
+    struct cache_vm_state * cache_state = NULL;
+    int cache_size = DEFAULT_CACHE_SIZE;
+    char * cache_sz_str = v3_cfg_val(cfg, "cache_size");
+
+    if (cache_sz_str != NULL) {
+       cache_size = ((atoi(cache_sz_str) * 1024 * 1024) / 4096);
+    }
+
+    V3_Print("Shadow Page Cache initialization\n");
+
+    cache_state = V3_Malloc(sizeof(struct cache_vm_state));
+    memset(cache_state, 0, sizeof(struct cache_vm_state));
+
+    cache_state->page_htable = v3_create_htable(0, cache_hash_fn, cache_eq_fn);
+    cache_state->reverse_map = v3_create_htable(0, rmap_hash_fn, rmap_eq_fn);
+    v3_lock_init(&(cache_state->cache_lock));
+    INIT_LIST_HEAD(&(cache_state->pg_queue));
+    INIT_LIST_HEAD(&(cache_state->free_list));
+    cache_state->max_cache_pgs = cache_size;
+
+    vm_state->impl_data = cache_state;
+
+    return 0;
+}
+
+
+static int cache_deinit(struct v3_vm_info * vm) {
+    return -1;
+}
+
+
+static int cache_local_init(struct guest_info * core) {
+    //    struct v3_shdw_pg_state * core_state = &(vm->shdw_pg_state);
+
+
+    return 0;
+}
+
+static int cache_activate_shdw_pt(struct guest_info * core) {
+    switch (v3_get_vm_cpu_mode(core)) {
+
+       case PROTECTED:
+           PrintError("Calling 32 bit cache activation\n");
+           return activate_shadow_pt_32(core);
+       case PROTECTED_PAE:
+           //      return activate_shadow_pt_32pae(core);
+       case LONG:
+       case LONG_32_COMPAT:
+       case LONG_16_COMPAT:
+           //      return activate_shadow_pt_64(core);
+       default:
+           PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
+           return -1;
+    }
+
+    return 0;
+}
+
+static int cache_invalidate_shdw_pt(struct guest_info * core) {
+    // wipe everything...
+    V3_Print("Cache invalidation called\n");
+    
+    return cache_activate_shdw_pt(core);
+}
+
+
+
+static int cache_handle_pf(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
+
+       switch (v3_get_vm_cpu_mode(core)) {
+           case PROTECTED:
+               return handle_shadow_pagefault_32(core, fault_addr, error_code);
+               break;
+           case PROTECTED_PAE:
+               //      return handle_shadow_pagefault_32pae(core, fault_addr, error_code);
+           case LONG:
+           case LONG_32_COMPAT:
+           case LONG_16_COMPAT:
+               //      return handle_shadow_pagefault_64(core, fault_addr, error_code);
+           default:
+               PrintError("Unhandled CPU Mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
+               return -1;
+       }
+}
+
+
+static int cache_handle_invlpg(struct guest_info * core, addr_t vaddr) {
+    PrintError("INVLPG called for %p\n", (void *)vaddr);
+
+    switch (v3_get_vm_cpu_mode(core)) {
+       case PROTECTED:
+           return handle_shadow_invlpg_32(core, vaddr);
+       case PROTECTED_PAE:
+           //    return handle_shadow_invlpg_32pae(core, vaddr);
+       case LONG:
+       case LONG_32_COMPAT:
+       case LONG_16_COMPAT:
+           //    return handle_shadow_invlpg_64(core, vaddr);
+       default:
+           PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
+           return -1;
+    }
+}
+
+
+
+
+
+
+static struct v3_shdw_pg_impl cache_impl = {
+    .name = "SHADOW_CACHE",
+    .init = cache_init, 
+    .deinit = cache_deinit, 
+    .local_init = cache_local_init, 
+    .handle_pagefault = cache_handle_pf, 
+    .handle_invlpg = cache_handle_invlpg,
+    .activate_shdw_pt = cache_activate_shdw_pt, 
+    .invalidate_shdw_pt = cache_invalidate_shdw_pt
+};
+
+
+
+register_shdw_pg_impl(&cache_impl);
diff --git a/palacios/src/palacios/mmu/vmm_shdw_pg_cache_32.h b/palacios/src/palacios/mmu/vmm_shdw_pg_cache_32.h
new file mode 100644 (file)
index 0000000..2aae928
--- /dev/null
@@ -0,0 +1,633 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+static inline int activate_shadow_pt_32(struct guest_info * core) {
+    struct cr3_32 * shadow_cr3 = (struct cr3_32 *)&(core->ctrl_regs.cr3);
+    struct cr3_32 * guest_cr3 = (struct cr3_32 *)&(core->shdw_pg_state.guest_cr3);
+    addr_t gpa = BASE_TO_PAGE_ADDR_4KB(guest_cr3->pdt_base_addr);
+    struct shdw_pg_data * shdw_pg = NULL;
+
+    PrintDebug("Activating 32 Bit cacheable page tables\n");
+    shdw_pg = find_shdw_pt(core->vm_info, gpa, PAGE_PD32);
+    
+    PrintError("shdw_pg returned as %p for CR3:%p\n", shdw_pg, (void *)gpa);
+
+    if (shdw_pg == NULL) {
+       shdw_pg = create_shdw_pt(core->vm_info, gpa, PAGE_PD32);
+
+       // update current reverse map entries...
+       // We are now using this page in a PT, so:
+       //     any existing writable mappings must be updated
+       update_rmap_entries(core->vm_info, gpa);
+    }
+
+    PrintDebug("shdw_pg now exists...\n");
+
+    shadow_cr3->pdt_base_addr = PAGE_BASE_ADDR_4KB(shdw_pg->hpa);
+    shadow_cr3->pwt = guest_cr3->pwt;
+    shadow_cr3->pcd = guest_cr3->pcd;
+
+    return 0;
+}
+
+
+
+/* 
+ * *
+ * * 
+ * * 32 bit Page table fault handlers
+ * *
+ * *
+ */
+/*
+static int handle_4MB_shadow_pagefault_pde_32(struct guest_info * core,  addr_t fault_addr, pf_error_t error_code, 
+                                             pt_access_status_t shadow_pde_access, pde32_4MB_t * large_shadow_pde, 
+                                             pde32_4MB_t * large_guest_pde);
+*/
+static int handle_4MB_shadow_pagefault_pte_32(struct guest_info * core,  addr_t fault_addr, pf_error_t error_code, 
+                                             pte32_t * shadow_pt, pde32_4MB_t * large_guest_pde,  struct shdw_pg_data * pt_pg_data);
+
+static int handle_pte_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
+                                         pte32_t * shadow_pt,  pte32_t * guest_pt,  struct shdw_pg_data * pt_pg_data);
+
+
+
+
+static inline int handle_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code) {
+    pde32_t * guest_pd = NULL;
+    pde32_t * shadow_pd = CR3_TO_PDE32_VA(core->ctrl_regs.cr3);
+    addr_t guest_cr3 = CR3_TO_PDE32_PA(core->shdw_pg_state.guest_cr3);
+    pt_access_status_t guest_pde_access;
+    pt_access_status_t shadow_pde_access;
+    pde32_t * guest_pde = NULL;
+    pde32_t * shadow_pde = (pde32_t *)&(shadow_pd[PDE32_INDEX(fault_addr)]);
+
+    PrintDebug("Shadow cache page fault handler: %p\n", (void *)fault_addr );
+    PrintDebug("Handling PDE32 Fault\n");
+
+    if (v3_gpa_to_hva(core, guest_cr3, (addr_t*)&guest_pd) == -1) {
+       PrintError("Invalid Guest PDE Address: 0x%p\n",  (void *)guest_cr3);
+       return -1;
+    } 
+
+    guest_pde = (pde32_t *)&(guest_pd[PDE32_INDEX(fault_addr)]);
+
+ // Check the guest page permissions
+    guest_pde_access = v3_can_access_pde32(guest_pd, fault_addr, error_code);
+
+    // Check the shadow page permissions
+    shadow_pde_access = v3_can_access_pde32(shadow_pd, fault_addr, error_code);
+  
+    /* Was the page fault caused by the Guest's page tables? */
+    if (v3_is_guest_pf(guest_pde_access, shadow_pde_access) == 1) {
+       PrintDebug("Injecting PDE pf to guest: (guest access error=%d) (shdw access error=%d)  (pf error code=%d)\n", 
+                  *(uint_t *)&guest_pde_access, *(uint_t *)&shadow_pde_access, *(uint_t *)&error_code);
+       if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
+           PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
+           return -1;
+       }
+       return 0;
+    }
+
+
+
+    if (shadow_pde_access == PT_ACCESS_USER_ERROR) {
+       // 
+       // PDE Entry marked non user
+       //
+       PrintDebug("Shadow Paging User access error (shadow_pde_access=0x%x, guest_pde_access=0x%x)\n", 
+                  shadow_pde_access, guest_pde_access);
+       
+       if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
+           PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
+           return -1;
+       }
+       return 0;
+    } else if ((shadow_pde_access == PT_ACCESS_WRITE_ERROR) && 
+              (guest_pde->large_page == 1)) {
+       
+       ((pde32_4MB_t *)guest_pde)->dirty = 1;
+       shadow_pde->writable = guest_pde->writable;
+       return 0;
+    } else if ((shadow_pde_access != PT_ACCESS_NOT_PRESENT) &&
+              (shadow_pde_access != PT_ACCESS_OK)) {
+       // inject page fault in guest
+       if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
+           PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
+           return -1;
+       }
+       PrintDebug("Unknown Error occurred (shadow_pde_access=%d)\n", shadow_pde_access);
+       PrintDebug("Manual Says to inject page fault into guest\n");
+       return 0;
+    }
+
+
+    pte32_t * shadow_pt = NULL;
+    pte32_t * guest_pt = NULL;
+
+
+    /*  Set up cache state */
+    addr_t gpa = BASE_TO_PAGE_ADDR_4KB(guest_pde->pt_base_addr);
+
+
+    struct shdw_pg_data * shdw_page = NULL;
+    page_type_t pt_type = PAGE_PT32;
+
+    if (guest_pde->large_page == 1) {
+       // Handle Large pages, for this we use the PAGE_4MB type...
+       pt_type = PAGE_4MB;
+    }
+
+    shdw_page = find_shdw_pt(core->vm_info, gpa, pt_type);
+       
+    if (shdw_page == NULL) {
+       shdw_page = create_shdw_pt(core->vm_info, gpa, pt_type);
+
+       if (pt_type == PAGE_PT32) {
+           // update current reverse map entries...
+           // We are now using this page in a PT, so:
+           //     any existing writable mappings must be updated
+           update_rmap_entries(core->vm_info, gpa);
+       }
+    }    
+
+    
+    struct shdw_pg_data * parent_page = find_shdw_pt(core->vm_info, guest_cr3, PAGE_PD32);
+    
+    if (parent_page != NULL) {
+       // add back pointer to PDE, if it exists
+       link_shdw_pg(shdw_page, parent_page, PAGE_ADDR_4KB(fault_addr));
+    }
+
+
+    // Get the next shadow page  level, allocate if not present
+
+    if (shadow_pde_access == PT_ACCESS_NOT_PRESENT) {
+
+       /* Currently we do not support large pages
+          This requires us to scan the large page for Page table pages, and split the entries if they exist. 
+          Its easier to just ignore this for now...
+          if ((core->use_large_pages == 1) && (guest_pde->large_page == 1)) {
+          // Check underlying physical memory map to see if a large page is viable
+          addr_t gpa_4MB = BASE_TO_PAGE_ADDR_4MB(((pde32_4MB_t *)guest_pde)->page_base_addr);
+          uint32_t page_size = v3_get_max_page_size(core, gpa_4MB, PROTECTED);
+          
+          if (page_size == PAGE_SIZE_4MB) {
+          PrintDebug("using large page for fault_addr %p (gpa=%p)\n", (void *)fault_addr, (void *)gpa_4MB); 
+          if (handle_4MB_shadow_pagefault_pde_32(core, fault_addr, error_code, shadow_pde_access,
+          (pde32_4MB_t *)shadow_pde, (pde32_4MB_t *)guest_pde) == -1) {
+          PrintError("Error handling large pagefault with large page\n");
+          return -1;
+          }
+          
+          return 0;
+          }
+          }
+       */
+
+
+
+       
+       shadow_pt = (pte32_t *)(shdw_page->hva);
+
+
+
+       shadow_pde->present = 1;
+       shadow_pde->user_page = guest_pde->user_page;
+
+
+       if (guest_pde->large_page == 0) {
+           shadow_pde->writable = guest_pde->writable;
+       } else {
+           // This large page flag is temporary until we can get a working cache....
+           ((pde32_4MB_t *)guest_pde)->vmm_info = V3_LARGE_PG;
+
+           if (error_code.write) {
+               shadow_pde->writable = guest_pde->writable;
+               ((pde32_4MB_t *)guest_pde)->dirty = 1;
+           } else {
+               shadow_pde->writable = 0;
+               ((pde32_4MB_t *)guest_pde)->dirty = 0;
+           }
+       }
+      
+       // VMM Specific options
+       shadow_pde->write_through = guest_pde->write_through;
+       shadow_pde->cache_disable = guest_pde->cache_disable;
+       shadow_pde->global_page = guest_pde->global_page;
+       //
+      
+       guest_pde->accessed = 1;
+      
+       shadow_pde->pt_base_addr = PAGE_BASE_ADDR(shdw_page->hpa);
+    } else {
+       shadow_pt = (pte32_t *)V3_VAddr((void *)BASE_TO_PAGE_ADDR(shadow_pde->pt_base_addr));
+    }
+
+    
+    if (guest_pde->large_page == 0) {
+       if (v3_gpa_to_hva(core, BASE_TO_PAGE_ADDR(guest_pde->pt_base_addr), (addr_t*)&guest_pt) == -1) {
+           // Machine check the guest
+           PrintDebug("Invalid Guest PTE Address: 0x%p\n", (void *)BASE_TO_PAGE_ADDR(guest_pde->pt_base_addr));
+           v3_raise_exception(core, MC_EXCEPTION);
+           return 0;
+       }
+
+       if (handle_pte_shadow_pagefault_32(core, fault_addr, error_code, shadow_pt, guest_pt, shdw_page)  == -1) {
+           PrintError("Error handling Page fault caused by PTE\n");
+           return -1;
+       }
+    } else {
+       if (handle_4MB_shadow_pagefault_pte_32(core, fault_addr, error_code, shadow_pt, (pde32_4MB_t *)guest_pde, shdw_page) == -1) {
+           PrintError("Error handling large pagefault\n");
+           return -1;
+       }       
+    }
+
+    return 0;
+}
+
+
+
+
+static int handle_pte_shadow_pagefault_32(struct guest_info * core, addr_t fault_addr, pf_error_t error_code,
+                                         pte32_t * shadow_pt, pte32_t * guest_pt, struct shdw_pg_data * pt_pg_data) {
+
+    pt_access_status_t guest_pte_access;
+    pt_access_status_t shadow_pte_access;
+    pte32_t * guest_pte = (pte32_t *)&(guest_pt[PTE32_INDEX(fault_addr)]);;
+    pte32_t * shadow_pte = (pte32_t *)&(shadow_pt[PTE32_INDEX(fault_addr)]);
+    addr_t guest_pa = BASE_TO_PAGE_ADDR((addr_t)(guest_pte->page_base_addr)) +  PAGE_OFFSET(fault_addr);
+
+    struct v3_mem_region * shdw_reg =  v3_get_mem_region(core->vm_info, core->cpu_id, guest_pa);
+
+    if (shdw_reg == NULL) {
+       // Inject a machine check in the guest
+       PrintDebug("Invalid Guest Address in page table (0x%p)\n", (void *)guest_pa);
+       v3_raise_exception(core, MC_EXCEPTION);
+       return 0;
+    }
+
+    // Check the guest page permissions
+    guest_pte_access = v3_can_access_pte32(guest_pt, fault_addr, error_code);
+
+    // Check the shadow page permissions
+    shadow_pte_access = v3_can_access_pte32(shadow_pt, fault_addr, error_code);
+  
+  
+    /* Was the page fault caused by the Guest's page tables? */
+    if (v3_is_guest_pf(guest_pte_access, shadow_pte_access) == 1) {
+
+       PrintDebug("Access error injecting pf to guest (guest access error=%d) (pf error code=%d)\n", 
+                  guest_pte_access, *(uint_t*)&error_code);
+       
+
+       //   inject:
+       if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
+           PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
+           return -1;
+       }       
+
+       return 0; 
+    }
+
+  
+  
+    if (shadow_pte_access == PT_ACCESS_OK) {
+       // Inconsistent state...
+       // Guest Re-Entry will flush page tables and everything should now work
+       PrintDebug("Inconsistent state... Guest re-entry should flush tlb\n");
+       return 0;
+    }
+
+
+    if (shadow_pte_access == PT_ACCESS_NOT_PRESENT) {
+       // Page Table Entry Not Present
+       PrintDebug("guest_pa =%p\n", (void *)guest_pa);
+
+       if ((shdw_reg->flags.alloced == 1) && (shdw_reg->flags.read == 1)) {
+           addr_t shadow_pa = 0;
+
+           if (v3_gpa_to_hpa(core, guest_pa, &shadow_pa) == -1) {
+               PrintError("could not translate page fault address (%p)\n", (void *)guest_pa);
+               return -1;
+           }
+
+           shadow_pte->page_base_addr = PAGE_BASE_ADDR(shadow_pa);
+
+           PrintDebug("\tMapping shadow page (%p)\n", (void *)BASE_TO_PAGE_ADDR(shadow_pte->page_base_addr));
+      
+           shadow_pte->present = guest_pte->present;
+           shadow_pte->user_page = guest_pte->user_page;
+      
+           //set according to VMM policy
+           shadow_pte->write_through = guest_pte->write_through;
+           shadow_pte->cache_disable = guest_pte->cache_disable;
+           shadow_pte->global_page = guest_pte->global_page;
+           //
+      
+           guest_pte->accessed = 1;
+      
+           if (guest_pte->dirty == 1) {
+               shadow_pte->writable = guest_pte->writable;
+           } else if ((guest_pte->dirty == 0) && (error_code.write == 1)) {
+               shadow_pte->writable = guest_pte->writable;
+               guest_pte->dirty = 1;
+           } else if ((guest_pte->dirty == 0) && (error_code.write == 0)) {
+               shadow_pte->writable = 0;
+           }
+
+
+           if (shdw_reg->flags.write == 0) {
+               shadow_pte->writable = 0;
+           }
+
+
+           // Add this PTE to the reverse map...
+           // This allows us to update this PTE entry if it gets turned into a PT page
+           add_rmap(core->vm_info, pt_pg_data, PAGE_ADDR_4KB(guest_pa), PAGE_ADDR_4KB(fault_addr));
+
+           // Check for cache entries and mark page read-only, plus tag
+           {
+               struct shdw_pg_data * pt_page = NULL;
+               addr_t pg_gpa = PAGE_ADDR_4KB(guest_pa);
+
+               pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PT32);
+               
+               if (pt_page == NULL) {
+                   pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PD32);
+               }
+
+               if (pt_page != NULL) {
+                   PrintError("Found PT page (small), marking RD-ONLY (va=%p), (gpa=%p)\n", 
+                              (void *)fault_addr, (void *)pg_gpa);
+                   // This is a page table page... 
+                   shadow_pte->writable = 0;
+                   shadow_pte->vmm_info = V3_CACHED_PG;
+               }
+           }
+
+       } else {
+           // Page fault on unhandled memory region
+           
+           if (shdw_reg->unhandled(core, fault_addr, guest_pa, shdw_reg, error_code) == -1) {
+               PrintError("Special Page fault handler returned error for address: %p\n",  (void *)fault_addr);
+               return -1;
+           }
+       }
+    } else if (shadow_pte_access == PT_ACCESS_WRITE_ERROR) {
+       guest_pte->dirty = 1;
+
+       // check for cache tag and handle invalidations if it exists.
+       if (shadow_pte->vmm_info == V3_CACHED_PG) {
+           addr_t pg_gpa = PAGE_ADDR_4KB(guest_pa);
+
+           PrintError("Evicting on a small page\n");
+
+
+           if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PD32) == -1) {
+               PrintError("Error Evicting PAGE_PD32 cache entry\n");
+               return -1;
+           }
+
+           if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PT32) == -1) {
+               PrintError("Error Evicting PAGE_PT32 cache entry\n");
+               return -1;
+           }
+
+           shadow_pte->vmm_info &= ~V3_CACHED_PG;
+       }
+
+
+       if (shdw_reg->flags.write == 1) {
+           PrintDebug("Shadow PTE Write Error\n");
+           shadow_pte->writable = guest_pte->writable;
+       } else {
+           if (shdw_reg->unhandled(core, fault_addr, guest_pa, shdw_reg, error_code) == -1) {
+               PrintError("Special Page fault handler returned error for address: %p\n",  (void *)fault_addr);
+               return -1;
+           }
+       }
+
+
+       return 0;
+
+    } else {
+       // Inject page fault into the guest     
+       if (v3_inject_guest_pf(core, fault_addr, error_code) == -1) {
+           PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr);
+           return -1;
+       }
+
+       PrintError("PTE Page fault fell through... Not sure if this should ever happen\n");
+       PrintError("Manual Says to inject page fault into guest\n");
+       return -1;
+    }
+
+    return 0;
+}
+
+// Handle a 4MB page fault with small pages in the PTE
+static int handle_4MB_shadow_pagefault_pte_32(struct guest_info * core, 
+                                             addr_t fault_addr, pf_error_t error_code, 
+                                             pte32_t * shadow_pt, pde32_4MB_t * large_guest_pde, 
+                                             struct shdw_pg_data * pt_pg_data) 
+{
+    pt_access_status_t shadow_pte_access = v3_can_access_pte32(shadow_pt, fault_addr, error_code);
+    pte32_t * shadow_pte = (pte32_t *)&(shadow_pt[PTE32_INDEX(fault_addr)]);
+    addr_t guest_fault_pa = BASE_TO_PAGE_ADDR_4MB(large_guest_pde->page_base_addr) + PAGE_OFFSET_4MB(fault_addr);  
+
+
+    PrintDebug("Handling 4MB fault (guest_fault_pa=%p) (error_code=%x)\n", (void *)guest_fault_pa, *(uint_t*)&error_code);
+    PrintDebug("ShadowPT=%p, LargeGuestPDE=%p\n", shadow_pt, large_guest_pde);
+
+    struct v3_mem_region * shdw_reg = v3_get_mem_region(core->vm_info, core->cpu_id, guest_fault_pa);
+
+    if (shdw_reg == NULL) {
+       // Inject a machine check in the guest
+       PrintDebug("Invalid Guest Address in page table (0x%p)\n", (void *)guest_fault_pa);
+       v3_raise_exception(core, MC_EXCEPTION);
+       return -1;
+    }
+
+    if (shadow_pte_access == PT_ACCESS_OK) {
+       // Inconsistent state...
+       // Guest Re-Entry will flush tables and everything should now workd
+       PrintDebug("Inconsistent state... Guest re-entry should flush tlb\n");
+       return 0;
+    }
+
+  
+    if (shadow_pte_access == PT_ACCESS_NOT_PRESENT) {
+       // Get the guest physical address of the fault
+
+       if ((shdw_reg->flags.alloced == 1) && 
+           (shdw_reg->flags.read  == 1)) {
+           addr_t shadow_pa = 0;
+
+
+           if (v3_gpa_to_hpa(core, guest_fault_pa, &shadow_pa) == -1) {
+               PrintError("could not translate page fault address (%p)\n", (void *)guest_fault_pa);
+               return -1;
+           }
+
+           shadow_pte->page_base_addr = PAGE_BASE_ADDR(shadow_pa);
+
+           PrintDebug("\tMapping shadow page (%p)\n", (void *)BASE_TO_PAGE_ADDR(shadow_pte->page_base_addr));
+
+           shadow_pte->present = 1;
+
+           /* We are assuming that the PDE entry has precedence
+            * so the Shadow PDE will mirror the guest PDE settings, 
+            * and we don't have to worry about them here
+            * Allow everything
+            */
+           shadow_pte->user_page = 1;
+
+           //set according to VMM policy
+           shadow_pte->write_through = large_guest_pde->write_through;
+           shadow_pte->cache_disable = large_guest_pde->cache_disable;
+           shadow_pte->global_page = large_guest_pde->global_page;
+           //
+      
+
+           if (shdw_reg->flags.write == 0) {
+               shadow_pte->writable = 0;
+           } else {
+               shadow_pte->writable = 1;
+           }
+
+
+           // Add this PTE to the reverse map...
+           // This allows us to update this PTE entry if it gets turned into a PT page sometime in the future
+           add_rmap(core->vm_info, pt_pg_data, PAGE_ADDR_4KB(guest_fault_pa), PAGE_ADDR_4KB(fault_addr));
+
+           // Check for cache entries and mark page read-only, plus tag
+           {
+               struct shdw_pg_data * pt_page = NULL;
+               addr_t pg_gpa = PAGE_ADDR_4KB(guest_fault_pa);
+
+               pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PT32);
+               
+               if (pt_page == NULL) {
+                   pt_page = find_shdw_pt(core->vm_info, pg_gpa, PAGE_PD32);
+               }
+
+               if (pt_page != NULL) {
+                   // This is a page table page... 
+                   PrintError("Found PT page (large), marking RD-ONLY (va=%p), (gpa=%p)\n", 
+                              (void *)fault_addr, (void *)pg_gpa);
+
+                   shadow_pte->writable = 0;
+                   shadow_pte->vmm_info = V3_CACHED_PG;
+               }
+
+           }
+
+       } else {
+           if (shdw_reg->unhandled(core, fault_addr, guest_fault_pa, shdw_reg, error_code) == -1) {
+               PrintError("Special Page Fault handler returned error for address: %p\n", (void *)fault_addr);
+               return -1;
+           }
+       }
+    } else if (shadow_pte_access == PT_ACCESS_WRITE_ERROR) {
+
+       // check for cache tag and handle invalidations if it exists.
+       if (shadow_pte->vmm_info == V3_CACHED_PG) {
+           addr_t pg_gpa = PAGE_ADDR_4KB(guest_fault_pa);
+           PrintError("Evicting on a large page\n");
+
+           if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PD32) == -1) {
+               PrintError("Error Evicting PAGE_PD32 cache entry\n");
+               return -1;
+           }
+
+           if (evict_shdw_pg(core->vm_info, pg_gpa, PAGE_PT32) == -1) {
+               PrintError("Error Evicting PAGE_PT32 cache entry\n");
+               return -1;
+           }
+
+           shadow_pte->vmm_info &= ~V3_CACHED_PG;
+       }
+
+
+       if (shdw_reg->flags.write == 0) {
+           if (shdw_reg->unhandled(core, fault_addr, guest_fault_pa, shdw_reg, error_code) == -1) {
+               PrintError("Special Page Fault handler returned error for address: %p\n", (void *)fault_addr);
+               return -1;
+           }
+       } else {
+           // set writable after cache eviction, unless overruled by region setting
+           shadow_pte->writable = 1;
+       }
+
+    } else {
+       PrintError("Error in large page fault handler...\n");
+       PrintError("This case should have been handled at the top level handler\n");
+       return -1;
+    }
+
+    PrintDebug("Returning from large page->small page fault handler\n");
+    return 0;
+}
+
+
+/* If we start to optimize we should look up the guest pages in the cache... */
+static inline int handle_shadow_invlpg_32(struct guest_info * core, addr_t vaddr) {
+    pde32_t * shadow_pd = (pde32_t *)CR3_TO_PDE32_VA(core->ctrl_regs.cr3);
+    pde32_t * shadow_pde = (pde32_t *)&shadow_pd[PDE32_INDEX(vaddr)];
+
+    addr_t guest_cr3 =  CR3_TO_PDE32_PA(core->shdw_pg_state.guest_cr3);
+    pde32_t * guest_pd = NULL;
+    pde32_t * guest_pde;
+
+    if (v3_gpa_to_hva(core, guest_cr3, (addr_t*)&guest_pd) == -1) {
+       PrintError("Invalid Guest PDE Address: 0x%p\n",  (void *)guest_cr3);
+       return -1;
+    }
+  
+    guest_pde = (pde32_t *)&(guest_pd[PDE32_INDEX(vaddr)]);
+  
+    // Should we back propagate the invalidations, because they might be cached...?? 
+    
+
+    if (guest_pde->large_page == 1) {
+       shadow_pde->present = 0;
+       PrintError("\tInvalidating Large Page (gpa=%p)\n", (void *)BASE_TO_PAGE_ADDR_4MB(guest_pde->pt_base_addr));
+    } else if (shadow_pde->present == 1) {
+       pte32_t * shadow_pt = (pte32_t *)(addr_t)BASE_TO_PAGE_ADDR_4KB(shadow_pde->pt_base_addr);
+       pte32_t * shadow_pte = (pte32_t *) V3_VAddr( (void*) &shadow_pt[PTE32_INDEX(vaddr)] );
+
+       
+
+       //          PrintError("\tInvalidating small page\n");
+
+
+       shadow_pte->present = 0;
+    } else {
+
+       PrintError("What the fuck?\n");
+    }
+    return 0;
+}
+
+
index af23fed..e8c7209 100644 (file)
@@ -205,7 +205,7 @@ static addr_t map_swp_page(struct v3_vm_info * vm, pte32_t * shadow_pte, pte32_t
     shdw_ptr_list = (struct list_head *)v3_htable_search(swap_state->shdw_ptr_ht, (addr_t)*(uint32_t *)guest_pte);
 
     if (shdw_ptr_list == NULL) {
-       shdw_ptr_list = (struct list_head *)V3_Malloc(sizeof(struct list_head *));
+       shdw_ptr_list = (struct list_head *)V3_Malloc(sizeof(struct list_head));
 #ifdef CONFIG_SWAPBYPASS_TELEMETRY
        swap_state->list_size++;
 #endif
index 83bfb10..b3f4e43 100644 (file)
@@ -133,25 +133,21 @@ static inline int handle_shadow_pagefault_32(struct guest_info * info, addr_t fa
 
     if (shadow_pde_access == PT_ACCESS_NOT_PRESENT) {
 
-        if (info->use_large_pages && guest_pde->large_page) {
+        if ((info->use_large_pages == 1) && (guest_pde->large_page == 1)) {
             // Check underlying physical memory map to see if a large page is viable
-            addr_t guest_pa = BASE_TO_PAGE_ADDR_4MB(((pde32_4MB_t *)guest_pde)->page_base_addr);
-            addr_t host_pa;
-            if (v3_get_max_page_size(info, guest_pa, PAGE_SIZE_4MB) < PAGE_SIZE_4MB) {
-                PrintDebug("Underlying physical memory map doesn't allow use of a large page.\n");
-                // Fallthrough to small pages
-            } else if ((v3_gpa_to_hpa(info, guest_pa, &host_pa) != 0)
-                       || (v3_compute_page_alignment(host_pa) < PAGE_SIZE_4MB)) {
-                PrintDebug("Host memory alignment doesn't allow use of a large page.\n");
-                // Fallthrough to small pages
-            } else if (handle_4MB_shadow_pagefault_pde_32(info, fault_addr, error_code, shadow_pde_access,
-                                                          (pde32_4MB_t *)shadow_pde, (pde32_4MB_t *)guest_pde) == 0) {
+           addr_t guest_pa = BASE_TO_PAGE_ADDR_4MB(((pde32_4MB_t *)guest_pde)->page_base_addr);
+           uint32_t page_size = v3_get_max_page_size(info, guest_pa, PROTECTED);
+           
+           if (page_size == PAGE_SIZE_4MB) {
+               PrintDebug("using large page for fault_addr %p (gpa=%p)\n", (void *)fault_addr, (void *)guest_pa); 
+               if (handle_4MB_shadow_pagefault_pde_32(info, fault_addr, error_code, shadow_pde_access,
+                                                      (pde32_4MB_t *)shadow_pde, (pde32_4MB_t *)guest_pde) == -1) {
+                   PrintError("Error handling large pagefault with large page\n");
+                   return -1;
+               }
+               
                 return 0;
-            } else {
-                PrintError("Error handling large pagefault with large page\n");
-                return -1;
-            }
-            // Fallthrough to handle the region with small pages
+           }
         }
 
        struct shadow_page_data * shdw_page =  create_new_shadow_pt(info);
@@ -176,7 +172,6 @@ static inline int handle_shadow_pagefault_32(struct guest_info * info, addr_t fa
            }
        }
       
-
        // VMM Specific options
        shadow_pde->write_through = guest_pde->write_through;
        shadow_pde->cache_disable = guest_pde->cache_disable;
@@ -185,14 +180,12 @@ static inline int handle_shadow_pagefault_32(struct guest_info * info, addr_t fa
       
        guest_pde->accessed = 1;
       
-
        shadow_pde->pt_base_addr = PAGE_BASE_ADDR(shdw_page->page_pa);
     } else {
        shadow_pt = (pte32_t *)V3_VAddr((void *)BASE_TO_PAGE_ADDR(shadow_pde->pt_base_addr));
     }
 
-
-      
+    
     if (guest_pde->large_page == 0) {
        if (v3_gpa_to_hva(info, BASE_TO_PAGE_ADDR(guest_pde->pt_base_addr), (addr_t*)&guest_pt) == -1) {
            // Machine check the guest
@@ -486,7 +479,8 @@ static int handle_4MB_shadow_pagefault_pde_32(struct guest_info * info,
                return -1;
            }
 
-           PrintDebug("\tMapping shadow page (%p)\n", (void *)BASE_TO_PAGE_ADDR(shadow_pte->page_base_addr));
+           PrintDebug("shadow PA = %p\n", (void *)shadow_pa);
+
 
             large_guest_pde->vmm_info = V3_LARGE_PG; /* For invalidations */
             large_shadow_pde->page_base_addr = PAGE_BASE_ADDR_4MB(shadow_pa);
@@ -494,6 +488,8 @@ static int handle_4MB_shadow_pagefault_pde_32(struct guest_info * info,
             large_shadow_pde->present = 1;
             large_shadow_pde->user_page = 1;
 
+           PrintDebug("\tMapping shadow page (%p)\n", (void *)BASE_TO_PAGE_ADDR_4MB(large_shadow_pde->page_base_addr));
+
             if (shdw_reg->flags.write == 0) {
                 large_shadow_pde->writable = 0;
             } else {
index 011a882..38aebe6 100644 (file)
@@ -338,23 +338,19 @@ static int handle_pde_shadow_pagefault_64(struct guest_info * info, addr_t fault
     if (shadow_pde_access == PT_ACCESS_NOT_PRESENT) {
         // Check if  we can use large pages and the guest memory is properly aligned
         // to potentially use a large page
-        if (info->use_large_pages && guest_pde->large_page) {
-            // Check underlying physical memory map to see if a large page is viable
+
+        if ((info->use_large_pages == 1) && (guest_pde->large_page == 1)) {
            addr_t guest_pa = BASE_TO_PAGE_ADDR_2MB(((pde64_2MB_t *)guest_pde)->page_base_addr);
-           addr_t host_pa;
-           if (v3_get_max_page_size(info, guest_pa, PAGE_SIZE_2MB) < PAGE_SIZE_2MB) {
-               PrintDebug("Underlying physical memory map doesn't allow use of a large page.\n");
-               // Fallthrough to small pages
-           } else if ((v3_gpa_to_hpa(info, guest_pa, &host_pa) != 0)
-                      || (v3_compute_page_alignment(host_pa) < PAGE_SIZE_2MB)) {
-               PrintDebug("Host memory alignment doesn't allow use of a large page.\n");
-               // Fallthrough to small pages
-           } else if (handle_2MB_shadow_pagefault_pde_64(info, fault_addr, error_code, shadow_pde_access,
-                                                      (pde64_2MB_t *)shadow_pde, (pde64_2MB_t *)guest_pde) == 0) {
-               return 0;
-           } else {
-               PrintError("Error handling large pagefault with large page\n");
-               return -1;
+           uint32_t page_size = v3_get_max_page_size(info, guest_pa, LONG);
+           
+           if (page_size == PAGE_SIZE_2MB) {
+               if (handle_2MB_shadow_pagefault_pde_64(info, fault_addr, error_code, shadow_pde_access,
+                                                      (pde64_2MB_t *)shadow_pde, (pde64_2MB_t *)guest_pde) == -1) {
+                   PrintError("Error handling large pagefault with large page\n");
+                   return -1;
+               }
+
+               return 0;
            }
            // Fallthrough to handle the region with small pages
        }
@@ -367,7 +363,6 @@ static int handle_pde_shadow_pagefault_64(struct guest_info * info, addr_t fault
        shadow_pde->present = 1;
        shadow_pde->user_page = guest_pde->user_page;
 
-
        if (guest_pde->large_page == 0) {
            shadow_pde->writable = guest_pde->writable;
        } else {
index 7b33c8c..4ba619b 100644 (file)
@@ -80,8 +80,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
 
 
     //
-
-
     ctrl_area->svm_instrs.VMRUN = 1;
     ctrl_area->svm_instrs.VMMCALL = 1;
     ctrl_area->svm_instrs.VMLOAD = 1;
@@ -89,7 +87,6 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
     ctrl_area->svm_instrs.STGI = 1;
     ctrl_area->svm_instrs.CLGI = 1;
     ctrl_area->svm_instrs.SKINIT = 1;
-    ctrl_area->svm_instrs.RDTSCP = 1;
     ctrl_area->svm_instrs.ICEBP = 1;
     ctrl_area->svm_instrs.WBINVD = 1;
     ctrl_area->svm_instrs.MONITOR = 1;
@@ -99,6 +96,12 @@ static void Init_VMCB_BIOS(vmcb_t * vmcb, struct guest_info * core) {
     ctrl_area->instrs.CPUID = 1;
 
     ctrl_area->instrs.HLT = 1;
+
+#ifdef CONFIG_TIME_VIRTUALIZE_TSC
+    ctrl_area->instrs.RDTSC = 1;
+    ctrl_area->svm_instrs.RDTSCP = 1;
+#endif
+
     // guest_state->cr0 = 0x00000001;    // PE 
   
     /*
@@ -422,7 +425,6 @@ static int update_irq_entry_state(struct guest_info * info) {
 int v3_svm_enter(struct guest_info * info) {
     vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
     vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data)); 
-    ullong_t tmp_tsc;
     addr_t exit_code = 0, exit_info1 = 0, exit_info2 = 0;
 
     // Conditionally yield the CPU if the timeslice has expired
@@ -475,32 +477,26 @@ int v3_svm_enter(struct guest_info * info) {
     }
 #endif
 
+    v3_update_timers(info);
 
-    rdtscll(tmp_tsc);
-    v3_update_time(info, (tmp_tsc - info->time_state.cached_host_tsc));
-    rdtscll(info->time_state.cached_host_tsc);
-    //    guest_ctrl->TSC_OFFSET = info->time_state.guest_tsc - info->time_state.cached_host_tsc;
+    /* If this guest is frequency-lagged behind host time, wait 
+     * for the appropriate host time before resuming the guest. */
+    v3_adjust_time(info);
 
-    //V3_Print("Calling v3_svm_launch\n");
+    guest_ctrl->TSC_OFFSET = v3_tsc_host_offset(&info->time_state);
 
+    //V3_Print("Calling v3_svm_launch\n");
 
     v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[info->cpu_id]);
-    
-    //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
 
+    //V3_Print("SVM Returned: Exit Code: %x, guest_rip=%lx\n", (uint32_t)(guest_ctrl->exit_code), (unsigned long)guest_state->rip);
 
     v3_last_exit = (uint32_t)(guest_ctrl->exit_code);
 
-    //rdtscll(tmp_tsc);
-    //    v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
-
     //PrintDebug("SVM Returned\n");
     
     info->num_exits++;
 
-
-
-
     // Save Guest state from VMCB
     info->rip = guest_state->rip;
     info->vm_regs.rsp = guest_state->rsp;
@@ -558,46 +554,36 @@ int v3_svm_enter(struct guest_info * info) {
 }
 
 
-int v3_start_svm_guest(struct guest_info *info) {
+int v3_start_svm_guest(struct guest_info * info) {
     //    vmcb_saved_state_t * guest_state = GET_VMCB_SAVE_STATE_AREA((vmcb_t*)(info->vmm_data));
     //  vmcb_ctrl_t * guest_ctrl = GET_VMCB_CTRL_AREA((vmcb_t*)(info->vmm_data));
 
+    PrintDebug("Starting SVM core %u\n", info->cpu_id);
+
+    if (info->cpu_id == 0) {
+       info->core_run_state = CORE_RUNNING;
+       info->vm_info->run_state = VM_RUNNING;
+    } else  { 
+       PrintDebug("SVM core %u: Waiting for core initialization\n", info->cpu_id);
 
-    PrintDebug("Starting SVM core %u\n",info->cpu_id);
-    if (info->cpu_mode==INIT) { 
-       PrintDebug("SVM core %u: I am an AP in INIT mode, waiting for that to change\n",info->cpu_id);
-       while (info->cpu_mode==INIT) {
+       while (info->core_run_state == CORE_STOPPED) {
            v3_yield(info);
            //PrintDebug("SVM core %u: still waiting for INIT\n",info->cpu_id);
        }
-       PrintDebug("SVM core %u: I am out of INIT\n",info->cpu_id);
-       if (info->cpu_mode==SIPI) { 
-           PrintDebug("SVM core %u: I am waiting on a SIPI to set my starting address\n",info->cpu_id);
-           while (info->cpu_mode==SIPI) {
-               v3_yield(info);
-               //PrintDebug("SVM core %u: still waiting for SIPI\n",info->cpu_id);
-           }
-       }
-       PrintDebug("SVM core %u: I have my SIPI\n", info->cpu_id);
-    }
 
-    if (info->cpu_mode!=REAL) { 
-       PrintError("SVM core %u: I am not in REAL mode at launch!  Huh?!\n", info->cpu_id);
-       return -1;
-    }
+       PrintDebug("SVM core %u initialized\n", info->cpu_id);
+    } 
 
     PrintDebug("SVM core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n", 
-              info->cpu_id, info->segments.cs.selector, (void*)(info->segments.cs.base), 
-              info->segments.cs.limit,(void*)(info->rip));
+              info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base), 
+              info->segments.cs.limit, (void *)(info->rip));
 
 
 
     PrintDebug("SVM core %u: Launching SVM VM (vmcb=%p)\n", info->cpu_id, (void *)info->vmm_data);
     //PrintDebugVMCB((vmcb_t*)(info->vmm_data));
     
-    info->vm_info->run_state = VM_RUNNING;
-    rdtscll(info->yield_start_cycle);
-
+    v3_start_time(info);
 
     while (1) {
        if (v3_svm_enter(info) == -1) {
index d8b47c5..171d2b5 100644 (file)
@@ -246,9 +246,24 @@ int v3_handle_svm_exit(struct guest_info * info, addr_t exit_code, addr_t exit_i
                return -1;
            }
            break;
-       
-
-
+        case VMEXIT_RDTSC:
+#ifdef CONFIG_DEBUG_TIME
+           PrintDebug("RDTSC/RDTSCP\n");
+#endif 
+           if (v3_handle_rdtsc(info) == -1) {
+               PrintError("Error Handling RDTSC instruction\n");
+               return -1;
+           }
+           break;
+        case VMEXIT_RDTSCP:
+#ifdef CONFIG_DEBUG_TIME
+           PrintDebug("RDTSCP\n");
+#endif 
+           if (v3_handle_rdtscp(info) == -1) {
+               PrintError("Error Handling RDTSCP instruction\n");
+               return -1;
+           }
+           break;
 
 
            /* Exits Following this line are NOT HANDLED */
index 4dddf6e..dcd471a 100644 (file)
@@ -365,23 +365,25 @@ void v3_print_GPRs(struct guest_info * info) {
 #include <palacios/vmcb.h>
 static int info_hcall(struct guest_info * core, uint_t hcall_id, void * priv_data) {
     v3_cpu_arch_t cpu_type = v3_get_cpu_type(v3_get_cpu_id());
-    
+    int cpu_valid = 0;
+
     v3_print_guest_state(core);
     
-
     // init SVM/VMX
 #ifdef CONFIG_SVM
     if ((cpu_type == V3_SVM_CPU) || (cpu_type == V3_SVM_REV3_CPU)) {
+       cpu_valid = 1;
        PrintDebugVMCB((vmcb_t *)(core->vmm_data));
     }
 #endif
 #ifdef CONFIG_VMX
-    else if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
+    if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
+       cpu_valid = 1;
        v3_print_vmcs();
     }
 #endif
-    else {
-       PrintError("Invalid CPU Type\n");
+    if (!cpu_valid) {
+       PrintError("Invalid CPU Type 0x%x\n", cpu_type);
        return -1;
     }
     
@@ -405,7 +407,7 @@ static int info_hcall(struct guest_info * core, uint_t hcall_id, void * priv_dat
 
 int v3_init_vm(struct v3_vm_info * vm) {
     v3_cpu_arch_t cpu_type = v3_get_cpu_type(v3_get_cpu_id());
-
+    int cpu_valid = 0;
 
     if (v3_get_foreground_vm() == NULL) {
        v3_set_foreground_vm(vm);
@@ -449,24 +451,23 @@ int v3_init_vm(struct v3_vm_info * vm) {
     if ((cpu_type == V3_SVM_CPU) || (cpu_type == V3_SVM_REV3_CPU)) {
        v3_init_svm_io_map(vm);
        v3_init_svm_msr_map(vm);
-    }
+       cpu_valid = 1;
+    } 
 #endif
 #ifdef CONFIG_VMX
-    else if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
+    if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
        v3_init_vmx_io_map(vm);
        v3_init_vmx_msr_map(vm);
+       cpu_valid = 1;
     }
 #endif
-    else {
-       PrintError("Invalid CPU Type\n");
+    if (!cpu_valid) {
+       PrintError("Invalid CPU Type 0x%x\n", cpu_type);
        return -1;
     }
     
-
-
     v3_register_hypercall(vm, GUEST_INFO_HCALL, info_hcall, NULL);
 
-
     V3_Print("GUEST_INFO_HCALL=%x\n", GUEST_INFO_HCALL);
 
     return 0;
@@ -499,25 +500,30 @@ int v3_init_core(struct guest_info * core) {
 #endif
 
     // init SVM/VMX
+
+
+    switch (cpu_type) {
 #ifdef CONFIG_SVM
-    if ((cpu_type == V3_SVM_CPU) || (cpu_type == V3_SVM_REV3_CPU)) {
-       if (v3_init_svm_vmcb(core, vm->vm_class) == -1) {
-           PrintError("Error in SVM initialization\n");
-           return -1;
-       }
-    }
+       case V3_SVM_CPU:
+       case V3_SVM_REV3_CPU:
+           if (v3_init_svm_vmcb(core, vm->vm_class) == -1) {
+               PrintError("Error in SVM initialization\n");
+               return -1;
+           }
+           break;
 #endif
 #ifdef CONFIG_VMX
-    else if ((cpu_type == V3_VMX_CPU) || (cpu_type == V3_VMX_EPT_CPU)) {
-       if (v3_init_vmx_vmcs(core, vm->vm_class) == -1) {
-           PrintError("Error in VMX initialization\n");
-           return -1;
-       }
-    }
+       case V3_VMX_CPU:
+       case V3_VMX_EPT_CPU:
+           if (v3_init_vmx_vmcs(core, vm->vm_class) == -1) {
+               PrintError("Error in VMX initialization\n");
+               return -1;
+           }
+           break;
 #endif
-    else {
-       PrintError("Invalid CPU Type\n");
-       return -1;
+       default:
+           PrintError("Invalid CPU Type 0x%x\n", cpu_type);
+           return -1;
     }
 
     return 0;
index 811e45c..92fbcbf 100644 (file)
@@ -98,7 +98,7 @@ void Init_V3(struct v3_os_hooks * hooks, int num_cpus) {
 
 
 #ifdef CONFIG_VNET
-    v3_init_vnet();
+    V3_init_vnet();
 #endif
 
     if ((hooks) && (hooks->call_on_cpu)) {
@@ -117,42 +117,43 @@ v3_cpu_arch_t v3_get_cpu_type(int cpu_id) {
 }
 
 
-struct v3_vm_info * v3_create_vm(void * cfg) {
+struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data) {
     struct v3_vm_info * vm = v3_config_guest(cfg);
 
+    V3_Print("CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
+
     if (vm == NULL) {
        PrintError("Could not configure guest\n");
        return NULL;
     }
 
+    vm->host_priv_data = priv_data;
+
     return vm;
 }
 
 
-static int start_core(void *p)
+static int start_core(void * p)
 {
-    struct guest_info * info = (struct guest_info*)p;
+    struct guest_info * core = (struct guest_info *)p;
 
 
-    PrintDebug("core %u: in start_core\n",info->cpu_id);
-    
-    // we assume here that the APs are in INIT mode
-    // and only the BSP is in REAL
-    // the per-architecture code will rely on this
-    // assumption
+    PrintDebug("core %u: in start_core (RIP=%p)\n", 
+              core->cpu_id, (void *)(addr_t)core->rip);
 
 
-    switch (v3_cpu_types[info->cpu_id]) {
+    // JRL: Whoa WTF? cpu_types are tied to the vcoreID????
+    switch (v3_cpu_types[core->cpu_id]) {
 #ifdef CONFIG_SVM
        case V3_SVM_CPU:
        case V3_SVM_REV3_CPU:
-           return v3_start_svm_guest(info);
+           return v3_start_svm_guest(core);
            break;
 #endif
 #if CONFIG_VMX
        case V3_VMX_CPU:
        case V3_VMX_EPT_CPU:
-           return v3_start_vmx_guest(info);
+           return v3_start_vmx_guest(core);
            break;
 #endif
        default:
@@ -164,71 +165,68 @@ static int start_core(void *p)
 }
 
 
-static uint32_t get_next_core(unsigned int cpu_mask, uint32_t last_proc)
-{
-    uint32_t proc_to_use;
-
-    PrintDebug("In get_next_core cpu_mask=0x%x last_proc=%u\n",cpu_mask,last_proc);
-
-    proc_to_use=(last_proc+1) % 32; // only 32 procs
-    // This will wrap around, and eventually we can use proc 0, 
-    // since that's clearly available
-    while (!((cpu_mask >> proc_to_use)&0x1)) {
-       proc_to_use=(proc_to_use+1)%32;
-    }
-    return proc_to_use;
-}
+// For the moment very ugly. Eventually we will shift the cpu_mask to an arbitrary sized type...
+#define MAX_CORES 32
 
 int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
     uint32_t i;
-    uint32_t last_proc;
-    uint32_t proc_to_use;
     char tname[16];
+    int vcore_id = 0;
+    uint8_t * core_mask = (uint8_t *)&cpu_mask; // This is to make future expansion easier
+    uint32_t avail_cores = 0;
 
-    V3_Print("V3 --  Starting VM (%u cores)\n",vm->num_cores);
 
-    // We assume that we are running on CPU 0 of the underlying system
-    last_proc=0;
 
-    // We will fork off cores 1..n first, then boot core zero
-    
-    // for the AP, we need to create threads
-    for (i = 1; i < vm->num_cores; i++) {
-       if (!os_hooks->start_thread_on_cpu) { 
-           PrintError("Host OS does not support start_thread_on_cpu - FAILING\n");
-           return -1;
+    /// CHECK IF WE ARE MULTICORE ENABLED....
+
+    V3_Print("V3 --  Starting VM (%u cores)\n", vm->num_cores);
+    V3_Print("CORE 0 RIP=%p\n", (void *)(addr_t)(vm->cores[0].rip));
+
+    // Check that enough cores are present in the mask to handle vcores
+    for (i = 0; i < MAX_CORES; i++) {
+       int major = i / 8;
+       int minor = i % 8;
+
+       if (core_mask[major] & (0x1 << minor)) {
+           avail_cores++;
        }
+       
+    }
+    
+    if (vm->num_cores > avail_cores) {
+       PrintError("Attempted to start a VM with too many cores (MAX=%d)\n", MAX_CORES);
+       return -1;
+    }
 
-       proc_to_use=get_next_core(cpu_mask,last_proc);
-       last_proc=proc_to_use;
 
-       // vm->cores[i].cpu_id=i;
-       // vm->cores[i].physical_cpu_id=proc_to_use;
+    for (i = 0; (i < MAX_CORES) && (vcore_id < vm->num_cores); i++) {
+       int major = i / 8;
+       int minor = i % 8;
+       void * core_thread = NULL;
 
-       PrintDebug("Starting virtual core %u on logical core %u\n",i,proc_to_use);
+       if ((core_mask[major] & (0x1 << minor)) == 0) {
+           // cpuid not set in cpu_mask
+           continue;
+       } 
+
+       PrintDebug("Starting virtual core %u on logical core %u\n", 
+                  vcore_id, i);
        
-       sprintf(tname,"core%u",i);
+       sprintf(tname, "core%u", vcore_id);
 
        PrintDebug("run: core=%u, func=0x%p, arg=0x%p, name=%s\n",
-                  proc_to_use, start_core, &(vm->cores[i]), tname);
+                  i, start_core, &(vm->cores[vcore_id]), tname);
 
        // TODO: actually manage these threads instead of just launching them
-       if (!(os_hooks->start_thread_on_cpu(proc_to_use,start_core,&(vm->cores[i]),tname))) { 
+       core_thread = V3_CREATE_THREAD_ON_CPU(i, start_core, 
+                                             &(vm->cores[vcore_id]), tname);
+
+       if (core_thread == NULL) {
            PrintError("Thread launch failed\n");
            return -1;
        }
-    }
 
-    // vm->cores[0].cpu_id=0;
-    // vm->cores[0].physical_cpu_id=0;
-
-    // Finally launch the BSP on core 0
-    sprintf(tname,"core%u",0);
-
-    if (!os_hooks->start_thread_on_cpu(0,start_core,&(vm->cores[0]),tname)) { 
-       PrintError("Thread launch failed\n");
-       return -1;
+       vcore_id++;
     }
 
     return 0;
@@ -278,7 +276,7 @@ v3_cpu_mode_t v3_get_host_cpu_mode() {
 
 void v3_yield_cond(struct guest_info * info) {
     uint64_t cur_cycle;
-    rdtscll(cur_cycle);
+    cur_cycle = v3_get_host_time(&info->time_state);
 
     if (cur_cycle > (info->yield_start_cycle + info->vm_info->yield_cycle_period)) {
 
@@ -287,7 +285,7 @@ void v3_yield_cond(struct guest_info * info) {
          (void *)cur_cycle, (void *)info->yield_start_cycle, (void *)info->yield_cycle_period);
        */
        V3_Yield();
-       rdtscll(info->yield_start_cycle);
+       info->yield_start_cycle = v3_get_host_time(&info->time_state);
     }
 }
 
@@ -301,7 +299,7 @@ void v3_yield(struct guest_info * info) {
     V3_Yield();
 
     if (info) {
-       rdtscll(info->yield_start_cycle);
+       info->yield_start_cycle = v3_get_host_time(&info->time_state);
     }
 }
 
index 6ccf509..ecbb996 100644 (file)
@@ -282,10 +282,10 @@ static int determine_paging_mode(struct guest_info *info, v3_cfg_tree_t * core_c
 {
     extern v3_cpu_arch_t v3_cpu_types[];
 
-    v3_cfg_tree_t *vm_tree = info->vm_info->cfg_data->cfg;
-    v3_cfg_tree_t *pg_tree = v3_cfg_subtree(vm_tree, "paging");
-    char *pg_mode          = v3_cfg_val(pg_tree, "mode");
-    char *page_size        = v3_cfg_val(pg_tree, "page_size");
+    v3_cfg_tree_t * vm_tree = info->vm_info->cfg_data->cfg;
+    v3_cfg_tree_t * pg_tree = v3_cfg_subtree(vm_tree, "paging");
+    char * pg_mode          = v3_cfg_val(pg_tree, "mode");
+    char * page_size        = v3_cfg_val(pg_tree, "page_size");
     
     PrintDebug("Paging mode specified as %s\n", pg_mode);
 
@@ -327,11 +327,12 @@ static int determine_paging_mode(struct guest_info *info, v3_cfg_tree_t * core_c
        return -1;
     }
 
-    if (strcasecmp(v3_cfg_val(pg_tree, "large_pages"), "true") == 0) {
-       info->use_large_pages = 1;
-       PrintDebug("Use of large pages in memory virtualization enabled.\n");
+    if (v3_cfg_val(pg_tree, "large_pages") != NULL) {
+       if (strcasecmp(v3_cfg_val(pg_tree, "large_pages"), "true") == 0) {
+           info->use_large_pages = 1;
+           PrintDebug("Use of large pages in memory virtualization enabled.\n");
+       }
     }
-
     return 0;
 }
 
@@ -389,12 +390,6 @@ static int post_config_vm(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
        return -1;
     }
 
-    if (v3_inject_mptable(vm) == -1) { 
-       PrintError("Failed to inject mptable during configuration\n");
-       return -1;
-    }
-
-
     return 0;
 }
 
@@ -402,7 +397,7 @@ static int post_config_vm(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
 
 static int post_config_core(struct guest_info * info, v3_cfg_tree_t * cfg) {
 
-
+    info->core_run_state = CORE_STOPPED;
  
     if (info->vm_info->vm_class == V3_PC_VM) {
        if (post_config_pc_core(info, cfg) == -1) {
@@ -496,6 +491,7 @@ struct v3_vm_info * v3_config_guest(void * cfg_blob) {
 
        info->cpu_id = i;
        info->vm_info = vm;
+       info->core_cfg_data = per_core_cfg;
 
        if (pre_config_core(info, per_core_cfg) == -1) {
            PrintError("Error in core %d preconfiguration\n", i);
index 1770a6f..9fcf197 100644 (file)
 
 static int pre_config_pc_core(struct guest_info * info, v3_cfg_tree_t * cfg) { 
 
-    if (info->cpu_id!=0) { 
-       // I am an AP, so I will start in INIT mode,
-       // not in real mode.   This means I will wait for
-       // an INIT and then for a SIPI.   The SIPI will
-       // tell me where to start executing in real mode
-       info->cpu_mode = INIT;
-    } else {
-       // I am the MP, so I will start as normal
-       info->cpu_mode = REAL;
-    }
-
     info->mem_mode = PHYSICAL_MEM;
 
 
@@ -87,6 +76,14 @@ static int post_config_pc(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
        memcpy(rombios_dst, v3_rombios_start, v3_rombios_end - v3_rombios_start);
     }
 
+
+    if (vm->num_cores > 1) {
+       if (v3_inject_mptable(vm) == -1) { 
+           PrintError("Failed to inject mptable during configuration\n");
+           return -1;
+       }
+    }
+
     return 0;
 }
 
index 97324d4..baae5d5 100644 (file)
@@ -54,8 +54,8 @@ static inline int handle_passthrough_pagefault_64(struct guest_info * core, addr
      *  1. the guest is configured to use large pages and 
      *         2. the memory regions can be referenced by a large page
      */
-    if ((core->use_large_pages == 1) ) {
-       page_size = v3_get_max_page_size(core, fault_addr, PAGE_SIZE_2MB);
+    if ((core->use_large_pages == 1) || (core->use_giant_pages == 1)) {
+       page_size = v3_get_max_page_size(core, fault_addr, LONG);
     }
 
     PrintDebug("Using page size of %dKB\n", page_size / 1024);
index 5015046..7970a40 100644 (file)
@@ -38,17 +38,12 @@ int v3_handle_halt(struct guest_info * info) {
     if (info->cpl != 0) { 
        v3_raise_exception(info, GPF_EXCEPTION);
     } else {
-       uint64_t yield_start = 0;
-       
        PrintDebug("CPU Yield\n");
 
        while (!v3_intr_pending(info)) {
-           rdtscll(yield_start);
+           /* Since we're in an exit, time is already paused here, so no need to pause again. */
            v3_yield(info);
-           
-           v3_update_time(info, yield_start - info->time_state.cached_host_tsc);
-           
-           rdtscll(info->time_state.cached_host_tsc);
+           v3_update_timers(info);
            
            /* At this point, we either have some combination of 
               interrupts, including perhaps a timer interrupt, or 
index 7994cc4..a1505c3 100644 (file)
@@ -139,7 +139,6 @@ int v3_add_shadow_mem( struct v3_vm_info * vm, uint16_t core_id,
 
     entry->host_addr = host_addr;
 
-
     entry->flags.read = 1;
     entry->flags.write = 1;
     entry->flags.exec = 1;
@@ -157,7 +156,7 @@ int v3_add_shadow_mem( struct v3_vm_info * vm, uint16_t core_id,
 
 static inline 
 struct v3_mem_region * __insert_mem_region(struct v3_vm_info * vm, 
-                                                struct v3_mem_region * region) {
+                                          struct v3_mem_region * region) {
     struct rb_node ** p = &(vm->mem_map.mem_regions.rb_node);
     struct rb_node * parent = NULL;
     struct v3_mem_region * tmp_region;
@@ -291,52 +290,119 @@ struct v3_mem_region * v3_get_mem_region(struct v3_vm_info * vm, uint16_t core_i
 
 
 
-/* Given an address, find the successor region. If the address is within a region, return that
- * region. Input is an address, because the address may not have a region associated with it.
- *
- * Returns a region following or touching the given address. If address is invalid, NULL is
- * returned, else the base region is returned if no region exists at or after the given address.
+/* This returns the next memory region based on a given address. 
+ * If the address falls inside a sub region, that region is returned. 
+ * If the address falls outside a sub region, the next sub region is returned
+ * NOTE that we have to be careful about core_ids here...
  */
-struct v3_mem_region * v3_get_next_mem_region( struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr) {
-    struct rb_node * current_n         = vm->mem_map.mem_regions.rb_node;
-    struct rb_node * successor_n       = NULL; /* left-most node greater than guest_addr */
-    struct v3_mem_region * current_r   = NULL;
-
-    /* current_n tries to find the region containing guest_addr, going right when smaller and left when
-     * greater. Each time current_n becomes greater than guest_addr, update successor <- current_n.
-     * current_n becomes successively closer to guest_addr than the previous time it was greater
-     * than guest_addr.
-     */
-
-    /* | is address, ---- is region, + is intersection */
-    while (current_n) {
-        current_r = rb_entry(current_n, struct v3_mem_region, tree_node);
-       if (current_r->guest_start > guest_addr) { /* | ---- */
-           successor_n = current_n;
-           current_n = current_n->rb_left;
+static struct v3_mem_region * get_next_mem_region( struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr) {
+    struct rb_node * n = vm->mem_map.mem_regions.rb_node;
+    struct v3_mem_region * reg = NULL;
+    struct v3_mem_region * parent = NULL;
+
+    while (n) {
+
+       reg = rb_entry(n, struct v3_mem_region, tree_node);
+
+       if (guest_addr < reg->guest_start) {
+           n = n->rb_left;
+       } else if (guest_addr >= reg->guest_end) {
+           n = n->rb_right;
        } else {
-           if (current_r->guest_end > guest_addr) {
-               return current_r; /* +--- or --+- */
+           if (reg->core_id == V3_MEM_CORE_ANY) {
+               // found relevant region, it's available on all cores
+               return reg;
+           } else if (core_id == reg->core_id) { 
+               // found relevant region, it's available on the indicated core
+               return reg;
+           } else if (core_id < reg->core_id) { 
+               // go left, core too big
+               n = n->rb_left;
+           } else if (core_id > reg->core_id) { 
+               // go right, core too small
+               n = n->rb_right;
+           } else {
+               PrintError("v3_get_mem_region: Impossible!\n");
+               return NULL;
            }
-           current_n = current_n->rb_right; /* ---- | */
+       }
+
+       if ((reg->core_id == core_id) || (reg->core_id == V3_MEM_CORE_ANY)) {
+           parent = reg;
        }
     }
 
-    /* Address does not have its own region. Check if it's a valid address in the base region */
 
-    if (guest_addr >= vm->mem_map.base_region.guest_end) {
-       PrintError("%s: Guest Address Exceeds Base Memory Size (ga=%p), (limit=%p)\n",
-               __FUNCTION__, (void *)guest_addr, (void *)vm->mem_map.base_region.guest_end);
-        v3_print_mem_map(vm);
-        return NULL;
+    if (parent->guest_start > guest_addr) {
+       return parent;
+    } else if (parent->guest_end < guest_addr) {
+       struct rb_node * node = &(parent->tree_node);
+
+       while ((node = v3_rb_next(node)) != NULL) {
+           struct v3_mem_region * next_reg = rb_entry(node, struct v3_mem_region, tree_node);
+
+           if ((next_reg->core_id == V3_MEM_CORE_ANY) ||
+               (next_reg->core_id == core_id)) {
+
+               // This check is not strictly necessary, but it makes it clearer
+               if (next_reg->guest_start > guest_addr) {
+                   return next_reg;
+               }
+           }
+       }
     }
 
-    return &(vm->mem_map.base_region);
+    return NULL;
+}
+
+
+
+
+/* Given an address region of memory, find if there are any regions that overlap with it. 
+ * This checks that the range lies in a single region, and returns that region if it does, 
+ * this can be either the base region or a sub region. 
+ * IF there are multiple regions in the range then it returns NULL
+ */
+static struct v3_mem_region * get_overlapping_region(struct v3_vm_info * vm, uint16_t core_id, 
+                                                    addr_t start_gpa, addr_t end_gpa) {
+    struct v3_mem_region * start_region = v3_get_mem_region(vm, core_id, start_gpa);
+
+    if (start_region == NULL) {
+       PrintError("Invalid memory region\n");
+       return NULL;
+    }
+
+
+    if (start_region->guest_end < end_gpa) {
+       // Region ends before range
+       return NULL;
+    } else if (start_region->flags.base == 0) {
+       // sub region overlaps range
+       return start_region;
+    } else {
+       // Base region, now we have to scan forward for the next sub region
+       struct v3_mem_region * next_reg = get_next_mem_region(vm, core_id, start_gpa);
+       
+       if (next_reg == NULL) {
+           // no sub regions after start_addr, base region is ok
+           return start_region;
+       } else if (next_reg->guest_start >= end_gpa) {
+           // Next sub region begins outside range
+           return start_region;
+       } else {
+           return NULL;
+       }
+    }
+
+
+    // Should never get here
+    return NULL;
 }
 
 
 
 
+
 void v3_delete_mem_region(struct v3_vm_info * vm, struct v3_mem_region * reg) {
     int i = 0;
 
@@ -387,110 +453,72 @@ void v3_delete_mem_region(struct v3_vm_info * vm, struct v3_mem_region * reg) {
 }
 
 // Determine if a given address can be handled by a large page of the requested size
-uint32_t v3_get_max_page_size(struct guest_info * core, addr_t fault_addr, uint32_t req_size) {
-    addr_t pg_start = 0UL, pg_end = 0UL; // large page containing the faulting addres
-    struct v3_mem_region * pg_next_reg = NULL; // next immediate mem reg after page start addr
+uint32_t v3_get_max_page_size(struct guest_info * core, addr_t page_addr, v3_cpu_mode_t mode) {
+    addr_t pg_start = 0;
+    addr_t pg_end = 0; 
     uint32_t page_size = PAGE_SIZE_4KB;
+    struct v3_mem_region * reg = NULL;
+    
+    switch (mode) {
+        case PROTECTED:
+           if (core->use_large_pages == 1) {
+               pg_start = PAGE_ADDR_4MB(page_addr);
+               pg_end = (pg_start + PAGE_SIZE_4MB);
 
-   /* If the guest has been configured for large pages, then we must check for hooked regions of
-     * memory which may overlap with the large page containing the faulting address (due to
-     * potentially differing access policies in place for e.g. i/o devices and APIC). A large page
-     * can be used if a) no region overlaps the page [or b) a region does overlap but fully contains
-     * the page]. The [bracketed] text pertains to the #if 0'd code below, state D. TODO modify this
-     * note if someone decides to enable this optimization. It can be tested with the SeaStar
-     * mapping.
-     *
-     * Examples: (CAPS regions are returned by v3_get_next_mem_region; state A returns the base reg)
-     *
-     *    |region| |region|                               2MiB mapped (state A)
-     *                   |reg|          |REG|             2MiB mapped (state B)
-     *   |region|     |reg|   |REG| |region|   |reg|      4KiB mapped (state C)
-     *        |reg|  |reg|   |--REGION---|                [2MiB mapped (state D)]
-     * |--------------------------------------------|     RAM
-     *                             ^                      fault addr
-     * |----|----|----|----|----|page|----|----|----|     2MB pages
-     *                           >>>>>>>>>>>>>>>>>>>>     search space
-     */
-
-
-    // guest page maps to a host page + offset (so when we shift, it aligns with a host page)
-    switch (req_size) {
-       case PAGE_SIZE_4KB:
-               return PAGE_SIZE_4KB;
-       case PAGE_SIZE_2MB:
-               pg_start = PAGE_ADDR_2MB(fault_addr);
-               pg_end = (pg_start + PAGE_SIZE_2MB);
-               break;
-       case PAGE_SIZE_4MB:
-               pg_start = PAGE_ADDR_4MB(fault_addr);
-               pg_end = (pg_start + PAGE_SIZE_4MB);
-               break;
-       case PAGE_SIZE_1GB:
-               pg_start = PAGE_ADDR_1GB(fault_addr);
-               pg_end = (pg_start + PAGE_SIZE_1GB);
-               break;
-       default:
-               PrintError("Invalid large page size requested.\n");
-               return -1;
-    }
-
-    //PrintDebug("%s: page   [%p,%p) contains address\n", __FUNCTION__, (void *)pg_start, (void *)pg_end);
-
-    pg_next_reg = v3_get_next_mem_region(core->vm_info, core->cpu_id, pg_start);
-
-    if (pg_next_reg == NULL) {
-       PrintError("%s: Error: address not in base region, %p\n", __FUNCTION__, (void *)fault_addr);
-       return PAGE_SIZE_4KB;
-    }
+               reg = get_overlapping_region(core->vm_info, core->cpu_id, pg_start, pg_end); 
 
-    if (pg_next_reg->flags.base == 1) {
-       page_size = req_size; // State A
-       //PrintDebug("%s: base region [%p,%p) contains page.\n", __FUNCTION__,
-       //         (void *)pg_next_reg->guest_start, (void *)pg_next_reg->guest_end);
-    } else {
-#if 0       // State B/C and D optimization
-       if ((pg_next_reg->guest_end >= pg_end) &&
-           ((pg_next_reg->guest_start >= pg_end) || (pg_next_reg->guest_start <= pg_start))) {     
-           page_size = req_size;
-       }
+               if ((reg) && ((reg->host_addr % PAGE_SIZE_4MB) == 0)) {
+                   page_size = PAGE_SIZE_4MB;
+               }
+           }
+           break;
+        case PROTECTED_PAE:
+           if (core->use_large_pages == 1) {
+               pg_start = PAGE_ADDR_2MB(page_addr);
+               pg_end = (pg_start + PAGE_SIZE_2MB);
 
-       PrintDebug("%s: region [%p,%p) %s partially overlap with page\n", __FUNCTION__,
-                  (void *)pg_next_reg->guest_start, (void *)pg_next_reg->guest_end, 
-                  (page_size == req_size) ? "does not" : "does");
+               reg = get_overlapping_region(core->vm_info, core->cpu_id, pg_start, pg_end);
 
-#else       // State B/C
-       if (pg_next_reg->guest_start >= pg_end) {
-           
-           page_size = req_size;
-       }
+               if ((reg) && ((reg->host_addr % PAGE_SIZE_2MB) == 0)) {
+                   page_size = PAGE_SIZE_2MB;
+               }
+           }
+           break;
+        case LONG:
+        case LONG_32_COMPAT:
+        case LONG_16_COMPAT:
+           if (core->use_giant_pages == 1) {
+               pg_start = PAGE_ADDR_1GB(page_addr);
+               pg_end = (pg_start + PAGE_SIZE_1GB);
+               
+               reg = get_overlapping_region(core->vm_info, core->cpu_id, pg_start, pg_end);
+               
+               if ((reg) && ((reg->host_addr % PAGE_SIZE_1GB) == 0)) {
+                   page_size = PAGE_SIZE_1GB;
+                   break;
+               }
+           }
 
-       PrintDebug("%s: region [%p,%p) %s overlap with page\n", __FUNCTION__,
-                  (void *)pg_next_reg->guest_start, (void *)pg_next_reg->guest_end,
-                  (page_size == req_size) ? "does not" : "does");
+           if (core->use_large_pages == 1) {
+               pg_start = PAGE_ADDR_2MB(page_addr);
+               pg_end = (pg_start + PAGE_SIZE_2MB);
 
-#endif
+               reg = get_overlapping_region(core->vm_info, core->cpu_id, pg_start, pg_end);
+               
+               if ((reg) && ((reg->host_addr % PAGE_SIZE_2MB) == 0)) {
+                   page_size = PAGE_SIZE_2MB;
+               }
+           }
+           break;
+        default:
+            PrintError("Invalid CPU mode: %s\n", v3_cpu_mode_to_str(v3_get_vm_cpu_mode(core)));
+            return -1;
     }
 
     return page_size;
 }
 
-// For an address on a page of size page_size, compute the actual alignment
-// of the physical page it maps to
-uint32_t v3_compute_page_alignment(addr_t page_addr)
-{
-    if (PAGE_OFFSET_1GB(page_addr) == 0) {
-        return PAGE_SIZE_1GB;
-    } else if (PAGE_OFFSET_4MB(page_addr) == 0) {
-        return PAGE_SIZE_4MB;
-    } else if (PAGE_OFFSET_2MB(page_addr) == 0) {
-       return PAGE_SIZE_2MB;
-    } else if (PAGE_OFFSET_4KB(page_addr) == 0) {
-       return PAGE_SIZE_4KB;
-    } else {
-        PrintError("Non-page aligned address passed to %s.\n", __FUNCTION__);
-       return 0;
-    }
-}
+
 
 void v3_print_mem_map(struct v3_vm_info * vm) {
     struct rb_node * node = v3_rb_first(&(vm->mem_map.mem_regions));
index 96d3ddc..66a14d4 100644 (file)
@@ -26,6 +26,8 @@
 void v3_init_msr_map(struct v3_vm_info * vm) {
     struct v3_msr_map * msr_map  = &(vm->msr_map);
 
+    PrintDebug("Initializing MSR map.\n");
+
     INIT_LIST_HEAD(&(msr_map->hook_list));
     msr_map->num_hooks = 0;
 
index 434b34e..65d19ee 100644 (file)
@@ -47,6 +47,8 @@
 #endif
 
 
+static const char default_strategy[] = "VTLB";
+
 
 static struct hashtable * master_shdw_pg_table = NULL;
 
@@ -146,10 +148,14 @@ int v3_init_shdw_impl(struct v3_vm_info * vm) {
     struct v3_shdw_pg_impl * impl = NULL;
    
     PrintDebug("Checking if shadow paging requested.\n");
-    if (pg_mode && (strcasecmp(pg_mode, "nested") == 0)) {
+    if ((pg_mode != NULL) && (strcasecmp(pg_mode, "nested") == 0)) {
        PrintDebug("Nested paging specified - not initializing shadow paging.\n");
        return 0;
     }
+
+    if (pg_strat == NULL) {
+       pg_strat = (char *)default_strategy;
+    }
        
     V3_Print("Initialization of Shadow Paging implementation\n");
 
index b169669..44affe4 100644 (file)
@@ -12,6 +12,7 @@
  * All rights reserved.
  *
  * Author: Jack Lange <jarusl@cs.northwestern.edu>
+ *         Patrick G. Bridges <bridges@cs.unm.edu>
  *
  * This is free software.  You are permitted to use,
  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
 #include <palacios/vmm.h>
 #include <palacios/vm_guest.h>
 
+#ifndef CONFIG_DEBUG_TIME
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+/* Overview 
+ *
+ * Time handling in VMMs is challenging, and Palacios uses the highest 
+ * resolution, lowest overhead timer on modern CPUs that it can - the 
+ * processor timestamp counter (TSC). Note that on somewhat old processors
+ * this can be problematic; in particular, older AMD processors did not 
+ * have a constant rate timestamp counter in the face of power management
+ * events. However, the latest Intel and AMD CPUs all do (should...) have a 
+ * constant rate TSC, and Palacios relies on this fact.
+ * 
+ * Basically, Palacios keeps track of three quantities as it runs to manage
+ * the passage of time:
+ * (1) The host timestamp counter - read directly from HW and never written
+ * (2) A monotonic guest timestamp counter used to measure the progression of
+ *     time in the guest. This is computed using an offsets from (1) above.
+ * (3) The actual guest timestamp counter (which can be written by
+ *     writing to the guest TSC MSR - MSR 0x10) from the monotonic guest TSC.
+ *     This is also computed as an offset from (2) above when the TSC and
+ *     this offset is updated when the TSC MSR is written.
+ *
+ * The value used to offset the guest TSC from the host TSC is the *sum* of all
+ * of these offsets (2 and 3) above
+ * 
+ * Because all other devices are slaved off of the passage of time in the guest,
+ * it is (2) above that drives the firing of other timers in the guest, 
+ * including timer devices such as the Programmable Interrupt Timer (PIT).
+ *
+ * Future additions:
+ * (1) Add support for temporarily skewing guest time off of where it should
+ *     be to support slack simulation of guests. The idea is that simulators
+ *     set this skew to be the difference between how much time passed for a 
+ *     simulated feature and a real implementation of that feature, making 
+ *     pass at a different rate from real time on this core. The VMM will then
+ *     attempt to move this skew back towards 0 subject to resolution/accuracy
+ *     constraints from various system timers.
+ *   
+ *     The main effort in doing this will be to get accuracy/resolution 
+ *     information from each local timer and to use this to bound how much skew
+ *     is removed on each exit.
+ */
+
+
 static int handle_cpufreq_hcall(struct guest_info * info, uint_t hcall_id, void * priv_data) {
     struct vm_time * time_state = &(info->time_state);
 
-    info->vm_regs.rbx = time_state->cpu_freq;
+    info->vm_regs.rbx = time_state->guest_cpu_freq;
 
     PrintDebug("Guest request cpu frequency: return %ld\n", (long)info->vm_regs.rbx);
     
@@ -33,23 +81,49 @@ static int handle_cpufreq_hcall(struct guest_info * info, uint_t hcall_id, void
 
 
 
-void v3_init_time(struct guest_info * info) {
+int v3_start_time(struct guest_info * info) {
+    /* We start running with guest_time == host_time */
+    uint64_t t = v3_get_host_time(&info->time_state); 
+
+    PrintDebug("Starting initial guest time as %llu\n", t);
+    info->time_state.last_update = t;
+    info->time_state.initial_time = t;
+    info->yield_start_cycle = t;
+    return 0;
+}
+
+// If the guest is supposed to run slower than the host, yield out until
+// the host time is appropriately far along;
+int v3_adjust_time(struct guest_info * info) {
     struct vm_time * time_state = &(info->time_state);
 
-    time_state->cpu_freq = V3_CPU_KHZ();
-    time_state->guest_tsc = 0;
-    time_state->cached_host_tsc = 0;
-    // time_state->pending_cycles = 0;
-  
-    INIT_LIST_HEAD(&(time_state->timers));
-    time_state->num_timers = 0;
+    if (time_state->host_cpu_freq == time_state->guest_cpu_freq) {
+       time_state->guest_host_offset = 0;
+    } else {
+       uint64_t guest_time, guest_elapsed, desired_elapsed;
+       uint64_t host_time, target_host_time;
 
-    v3_register_hypercall(info->vm_info, TIME_CPUFREQ_HCALL, handle_cpufreq_hcall, NULL);
-}
+       guest_time = v3_get_guest_time(time_state);
 
+       /* Compute what host time this guest time should correspond to. */
+       guest_elapsed = (guest_time - time_state->initial_time);
+       desired_elapsed = (guest_elapsed * time_state->host_cpu_freq) / time_state->guest_cpu_freq;
+       target_host_time = time_state->initial_time + desired_elapsed;
 
-int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, void * private_data) {
+       /* Yield until that host time is reached */
+       host_time = v3_get_host_time(time_state);
+       while (host_time < target_host_time) {
+           v3_yield(info);
+           host_time = v3_get_host_time(time_state);
+       }
+
+       time_state->guest_host_offset = (sint64_t)guest_time - (sint64_t)host_time;
+    }
+    return 0;
+}
+
+int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, 
+            void * private_data) {
     struct vm_timer * timer = NULL;
     timer = (struct vm_timer *)V3_Malloc(sizeof(struct vm_timer));
     V3_ASSERT(timer != NULL);
@@ -63,7 +137,6 @@ int v3_add_timer(struct guest_info * info, struct vm_timer_ops * ops, void * pri
     return 0;
 }
 
-
 int v3_remove_timer(struct guest_info * info, struct vm_timer * timer) {
     list_del(&(timer->timer_link));
     info->time_state.num_timers--;
@@ -72,34 +145,182 @@ int v3_remove_timer(struct guest_info * info, struct vm_timer * timer) {
     return 0;
 }
 
+void v3_update_timers(struct guest_info * info) {
+    struct vm_timer * tmp_timer;
+    uint64_t old_time = info->time_state.last_update;
+    uint64_t cycles;
 
+    info->time_state.last_update = v3_get_guest_time(&info->time_state);
+    cycles = info->time_state.last_update - old_time;
 
-void v3_update_time(struct guest_info * info, uint64_t cycles) {
-    struct vm_timer * tmp_timer;
+    list_for_each_entry(tmp_timer, &(info->time_state.timers), timer_link) {
+       tmp_timer->ops->update_timer(info, cycles, info->time_state.guest_cpu_freq, tmp_timer->private_data);
+    }
+}
+
+/* 
+ * Handle full virtualization of the time stamp counter.  As noted
+ * above, we don't store the actual value of the TSC, only the guest's
+ * offset from monotonic guest's time. If the guest writes to the TSC, we
+ * handle this by changing that offset.
+ *
+ * Possible TODO: Proper hooking of TSC read/writes?
+ */ 
+
+int v3_rdtsc(struct guest_info * info) {
+    uint64_t tscval = v3_get_guest_tsc(&info->time_state);
+    info->vm_regs.rdx = tscval >> 32;
+    info->vm_regs.rax = tscval & 0xffffffffLL;
+    return 0;
+}
+
+int v3_handle_rdtsc(struct guest_info * info) {
+    v3_rdtsc(info);
     
-    //   cycles *= 8;
+    info->vm_regs.rax &= 0x00000000ffffffffLL;
+    info->vm_regs.rdx &= 0x00000000ffffffffLL;
 
-//    cycles /= 150;
+    info->rip += 2;
+    
+    return 0;
+}
 
-    info->time_state.guest_tsc += cycles;
+int v3_rdtscp(struct guest_info * info) {
+    int ret;
+    /* First get the MSR value that we need. It's safe to futz with
+     * ra/c/dx here since they're modified by this instruction anyway. */
+    info->vm_regs.rcx = TSC_AUX_MSR; 
+    ret = v3_handle_msr_read(info);
+    if (ret) return ret;
+    info->vm_regs.rcx = info->vm_regs.rax;
 
-    list_for_each_entry(tmp_timer, &(info->time_state.timers), timer_link) {
-       tmp_timer->ops->update_time(info, cycles, info->time_state.cpu_freq, tmp_timer->private_data);
-    }
-  
+    /* Now do the TSC half of the instruction */
+    ret = v3_rdtsc(info);
+    if (ret) return ret;
+    
+    return 0;
+}
+
+
+int v3_handle_rdtscp(struct guest_info * info) {
+
+    v3_rdtscp(info);
+    
+    info->vm_regs.rax &= 0x00000000ffffffffLL;
+    info->vm_regs.rcx &= 0x00000000ffffffffLL;
+    info->vm_regs.rdx &= 0x00000000ffffffffLL;
+
+    info->rip += 3;
+    
+    return 0;
+}
 
+static int tsc_aux_msr_read_hook(struct guest_info *info, uint_t msr_num, 
+                                struct v3_msr *msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+
+    V3_ASSERT(msr_num == TSC_AUX_MSR);
+    msr_val->lo = time_state->tsc_aux.lo;
+    msr_val->hi = time_state->tsc_aux.hi;
 
-    //info->time_state.pending_cycles = 0;
+    return 0;
+}
+
+static int tsc_aux_msr_write_hook(struct guest_info *info, uint_t msr_num, 
+                             struct v3_msr msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+
+    V3_ASSERT(msr_num == TSC_AUX_MSR);
+    time_state->tsc_aux.lo = msr_val.lo;
+    time_state->tsc_aux.hi = msr_val.hi;
+
+    return 0;
+}
+
+static int tsc_msr_read_hook(struct guest_info *info, uint_t msr_num,
+                            struct v3_msr *msr_val, void *priv) {
+    uint64_t time = v3_get_guest_tsc(&info->time_state);
+
+    V3_ASSERT(msr_num == TSC_MSR);
+    msr_val->hi = time >> 32;
+    msr_val->lo = time & 0xffffffffLL;
+    
+    return 0;
+}
+
+static int tsc_msr_write_hook(struct guest_info *info, uint_t msr_num,
+                            struct v3_msr msr_val, void *priv) {
+    struct vm_time * time_state = &(info->time_state);
+    uint64_t guest_time, new_tsc;
+    V3_ASSERT(msr_num == TSC_MSR);
+    new_tsc = (((uint64_t)msr_val.hi) << 32) | (uint64_t)msr_val.lo;
+    guest_time = v3_get_guest_time(time_state);
+    time_state->tsc_guest_offset = (sint64_t)new_tsc - (sint64_t)guest_time; 
+
+    return 0;
 }
 
-void v3_advance_time(struct guest_info * core) {
-    struct vm_timer * tmp_timer;
 
+static int init_vm_time(struct v3_vm_info *vm_info) {
+    int ret;
 
-    list_for_each_entry(tmp_timer, &(core->time_state.timers), timer_link) {
-       tmp_timer->ops->advance_timer(core, tmp_timer->private_data);
+    PrintDebug("Installing TSC MSR hook.\n");
+    ret = v3_hook_msr(vm_info, TSC_MSR, 
+                     tsc_msr_read_hook, tsc_msr_write_hook, NULL);
+
+    PrintDebug("Installing TSC_AUX MSR hook.\n");
+    if (ret) return ret;
+    ret = v3_hook_msr(vm_info, TSC_AUX_MSR, tsc_aux_msr_read_hook, 
+                     tsc_aux_msr_write_hook, NULL);
+    if (ret) return ret;
+
+    PrintDebug("Registering TIME_CPUFREQ hypercall.\n");
+    ret = v3_register_hypercall(vm_info, TIME_CPUFREQ_HCALL, 
+                               handle_cpufreq_hcall, NULL);
+    return ret;
+}
+
+void v3_init_time(struct guest_info * info) {
+    struct vm_time * time_state = &(info->time_state);
+    v3_cfg_tree_t * cfg_tree = info->core_cfg_data;
+    static int one_time = 0;
+    char *khz;
+
+    time_state->host_cpu_freq = V3_CPU_KHZ();
+    khz = v3_cfg_val(cfg_tree, "khz");
+    if (khz) {
+       time_state->guest_cpu_freq = atoi(khz);
+       PrintDebug("Core %d CPU frequency requested at %d khz.\n", 
+                  info->cpu_id, time_state->guest_cpu_freq);
+    }
+    
+    if (!khz || time_state->guest_cpu_freq > time_state->host_cpu_freq) {
+       time_state->guest_cpu_freq = time_state->host_cpu_freq;
     }
-  
+    PrintDebug("Core %d CPU frequency set to %d KHz (host CPU frequency = %d KHz).\n", info->cpu_id, time_state->guest_cpu_freq, time_state->host_cpu_freq);
 
+    time_state->initial_time = 0;
+    time_state->last_update = 0;
+    time_state->guest_host_offset = 0;
+    time_state->tsc_guest_offset = 0;
 
+    INIT_LIST_HEAD(&(time_state->timers));
+    time_state->num_timers = 0;
+    
+    time_state->tsc_aux.lo = 0;
+    time_state->tsc_aux.hi = 0;
+
+    if (!one_time) {
+       init_vm_time(info->vm_info);
+       one_time = 1;
+    }
 }
+
+
+
+
+
+
+
+
+
index 18d183b..d565d1f 100644 (file)
@@ -225,6 +225,10 @@ static int init_vmcs_bios(struct guest_info * info, struct vmx_data * vmx_state)
     vmx_state->pri_proc_ctrls.invlpg_exit = 1;
     vmx_state->pri_proc_ctrls.use_msr_bitmap = 1;
     vmx_state->pri_proc_ctrls.pause_exit = 1;
+    vmx_state->pri_proc_ctrls.tsc_offset = 1;
+#ifdef CONFIG_TIME_VIRTUALIZE_TSC
+    vmx_state->pri_proc_ctrls.rdtsc_exit = 1;
+#endif
 
     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_A_ADDR, (addr_t)V3_PAddr(info->vm_info->io_map.arch_data));
     vmx_ret |= check_vmcs_write(VMCS_IO_BITMAP_B_ADDR, 
@@ -636,13 +640,12 @@ static void print_exit_log(struct guest_info * info) {
  */
 int v3_vmx_enter(struct guest_info * info) {
     int ret = 0;
-    uint64_t tmp_tsc = 0;
+    uint32_t tsc_offset_low, tsc_offset_high;
     struct vmx_exit_info exit_info;
 
     // Conditionally yield the CPU if the timeslice has expired
     v3_yield_cond(info);
 
-
     // v3_print_guest_state(info);
 
     // disable global interrupts for vm state transition
@@ -665,10 +668,16 @@ int v3_vmx_enter(struct guest_info * info) {
        vmcs_write(VMCS_GUEST_CR3, guest_cr3);
     }
 
-    // We do timer injection here to track real host time.
-    rdtscll(tmp_tsc);
-    v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
-    rdtscll(info->time_state.cached_host_tsc);
+    v3_update_timers(info);
+
+    /* If this guest is frequency-lagged behind host time, wait 
+     * for the appropriate host time before resuming the guest. */
+    v3_adjust_time(info);
+
+    tsc_offset_high = (uint32_t)((v3_tsc_host_offset(&info->time_state) >> 32) & 0xffffffff);
+    tsc_offset_low = (uint32_t)(v3_tsc_host_offset(&info->time_state) & 0xffffffff);
+    check_vmcs_write(VMCS_TSC_OFFSET_HIGH, tsc_offset_high);
+    check_vmcs_write(VMCS_TSC_OFFSET, tsc_offset_low);
 
     if (info->vm_info->run_state == VM_STOPPED) {
        info->vm_info->run_state = VM_RUNNING;
@@ -688,12 +697,8 @@ int v3_vmx_enter(struct guest_info * info) {
        return -1;
     }
 
-    //   rdtscll(tmp_tsc);
-    //    v3_update_time(info, tmp_tsc - info->time_state.cached_host_tsc);
-
     info->num_exits++;
 
-
     /* Update guest state */
     v3_vmx_save_vmcs(info);
 
@@ -739,13 +744,34 @@ int v3_vmx_enter(struct guest_info * info) {
 }
 
 
-int v3_start_vmx_guest(struct guest_info* info) {
+int v3_start_vmx_guest(struct guest_info * info) {
+
+    PrintDebug("Starting VMX core %u\n", info->cpu_id);
+
+    if (info->cpu_id == 0) {
+       info->core_run_state = CORE_RUNNING;
+       info->vm_info->run_state = VM_RUNNING;
+    } else {
+
+        PrintDebug("VMX core %u: Waiting for core initialization\n", info->cpu_id);
+
+        while (info->core_run_state == CORE_STOPPED) {
+            v3_yield(info);
+            //PrintDebug("VMX core %u: still waiting for INIT\n",info->cpu_id);
+        }
+       
+       PrintDebug("VMX core %u initialized\n", info->cpu_id);
+    }
+
 
+    PrintDebug("VMX core %u: I am starting at CS=0x%x (base=0x%p, limit=0x%x),  RIP=0x%p\n",
+               info->cpu_id, info->segments.cs.selector, (void *)(info->segments.cs.base),
+               info->segments.cs.limit, (void *)(info->rip));
 
-    PrintDebug("Launching VMX guest\n");
 
-    rdtscll(info->time_state.cached_host_tsc);
+    PrintDebug("VMX core %u: Launching VMX VM\n", info->cpu_id);
 
+    v3_start_time(info);
 
     while (1) {
        if (v3_vmx_enter(info) == -1) {
index d88210d..357f0d2 100644 (file)
@@ -99,6 +99,18 @@ int v3_handle_vmx_exit(struct guest_info * info, struct vmx_exit_info * exit_inf
             }
 
             break;
+
+        case VMEXIT_RDTSC:
+#ifdef CONFIG_DEBUG_TIME
+           PrintDebug("RDTSC\n");
+#endif 
+           if (v3_handle_rdtsc(info) == -1) {
+               PrintError("Error Handling RDTSC instruction\n");
+               return -1;
+           }
+           
+           break;
+
         case VMEXIT_CPUID:
            if (v3_handle_cpuid(info) == -1) {
                PrintError("Error Handling CPUID instruction\n");
index c38b9f4..ff3dbb6 100644 (file)
@@ -81,12 +81,15 @@ KERNEL_C_SRCS := idt.c int.c trap.c irq.c io.c \
        gdt.c tss.c segment.c \
        bget.c malloc.c \
        synch.c kthread.c \
-       vm_cons.c debug.c \
+        debug.c \
+       vm_cons.c \
        pci.c \
        serial.c  reboot.c \
         paging.c \
        main.c
 
+ # 
+
 # Kernel object files built from C source files
 KERNEL_C_OBJS := $(KERNEL_C_SRCS:%.c=geekos/%.o)
 
@@ -124,17 +127,17 @@ COMMON_C_OBJS := $(COMMON_C_SRCS:%.c=common/%.o)
 
 # Uncomment if cross compiling
 #TARGET_CC_PREFIX := i386-elf-
-TARGET_CC_PREFIX :=  $(PROJECT_ROOT)/../../devtools/i386/bin/i386-elf-
+#TARGET_CC_PREFIX :=  $(PROJECT_ROOT)/../../devtools/i386/bin/i386-elf-
 
 # Target C compiler.  gcc 2.95.2 or later should work.
-TARGET_CC := $(TARGET_CC_PREFIX)gcc
+TARGET_CC := $(TARGET_CC_PREFIX)gcc -m32
 
 
 # Host C compiler.  This is used to compile programs to execute on
 # the host platform, not the target (x86) platform.  On x86/ELF
 # systems, such as Linux and FreeBSD, it can generally be the same
 # as the target C compiler.
-HOST_CC := gcc
+HOST_CC := gcc 
 
 # Target linker.  GNU ld is probably to only one that will work.
 TARGET_LD := $(TARGET_CC_PREFIX)ld -melf_i386
@@ -152,8 +155,8 @@ TARGET_NM := $(TARGET_CC_PREFIX)nm
 TARGET_OBJCOPY := $(TARGET_CC_PREFIX)objcopy
 
 # Nasm (http://nasm.sourceforge.net)
-NASM := $(PROJECT_ROOT)/../../devtools/bin/nasm
-#NASM := /opt/vmm-tools/bin/nasm
+#NASM := $(PROJECT_ROOT)/../../devtools/bin/nasm
+NASM := nasm -f elf
 
 AS = as --32
 
index 052def9..e69de29 100644 (file)
@@ -1,268 +0,0 @@
-geekos/idt.o: ../src/geekos/idt.c ../include/geekos/kassert.h \
-  ../include/geekos/screen.h ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/defs.h ../include/geekos/idt.h \
-  ../include/geekos/int.h ../include/geekos/debug.h \
-  ../include/geekos/string.h ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h
-geekos/int.o: ../src/geekos/int.c ../include/geekos/idt.h \
-  ../include/geekos/int.h ../include/geekos/kassert.h \
-  ../include/geekos/screen.h ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/defs.h ../include/geekos/irq.h \
-  ../include/geekos/debug.h ../include/geekos/string.h \
-  ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/cpu.h
-geekos/trap.o: ../src/geekos/trap.c ../include/geekos/idt.h \
-  ../include/geekos/int.h ../include/geekos/kassert.h \
-  ../include/geekos/screen.h ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/defs.h ../include/geekos/kthread.h \
-  ../include/geekos/list.h ../include/geekos/trap.h \
-  ../include/geekos/debug.h ../include/geekos/string.h \
-  ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h
-geekos/irq.o: ../src/geekos/irq.c ../include/geekos/kassert.h \
-  ../include/geekos/screen.h ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/idt.h ../include/geekos/int.h \
-  ../include/geekos/defs.h ../include/geekos/io.h ../include/geekos/irq.h
-geekos/io.o: ../src/geekos/io.c ../include/geekos/io.h \
-  ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h
-geekos/blockdev.o: ../src/geekos/blockdev.c ../include/geekos/errno.h \
-  ../include/geekos/screen.h ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/string.h ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/malloc.h ../include/geekos/int.h \
-  ../include/geekos/kassert.h ../include/geekos/defs.h \
-  ../include/geekos/kthread.h ../include/geekos/list.h \
-  ../include/geekos/synch.h ../include/geekos/blockdev.h \
-  ../include/geekos/fileio.h
-geekos/ide.o: ../src/geekos/ide.c ../include/geekos/serial.h \
-  ../include/geekos/irq.h ../include/geekos/int.h \
-  ../include/geekos/kassert.h ../include/geekos/screen.h \
-  ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/defs.h ../include/geekos/string.h \
-  ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/io.h ../include/geekos/errno.h \
-  ../include/geekos/malloc.h ../include/geekos/timer.h \
-  ../include/geekos/kthread.h ../include/geekos/list.h \
-  ../include/geekos/blockdev.h ../include/geekos/fileio.h \
-  ../include/geekos/ide.h
-geekos/keyboard.o: ../src/geekos/keyboard.c ../include/geekos/kthread.h \
-  ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/list.h ../include/geekos/kassert.h \
-  ../include/geekos/screen.h ../include/geekos/fmtout.h \
-  ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/irq.h ../include/geekos/int.h \
-  ../include/geekos/defs.h ../include/geekos/io.h \
-  ../include/geekos/keyboard.h
-geekos/screen.o: ../src/geekos/screen.c \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/kassert.h ../include/geekos/screen.h \
-  ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  ../include/geekos/io.h ../include/geekos/int.h ../include/geekos/defs.h
-geekos/timer.o: ../src/geekos/timer.c \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/limits.h \
-  ../include/geekos/io.h ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/int.h ../include/geekos/kassert.h \
-  ../include/geekos/screen.h ../include/geekos/fmtout.h \
-  ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/defs.h ../include/geekos/irq.h \
-  ../include/geekos/kthread.h ../include/geekos/list.h \
-  ../include/geekos/timer.h ../include/geekos/debug.h \
-  ../include/geekos/string.h ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h
-geekos/mem.o: ../src/geekos/mem.c ../include/geekos/defs.h \
-  ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/kassert.h ../include/geekos/screen.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/bootinfo.h ../include/geekos/gdt.h \
-  ../include/geekos/int.h ../include/geekos/malloc.h \
-  ../include/geekos/string.h ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/paging.h ../include/geekos/list.h \
-  ../include/geekos/mem.h
-geekos/crc32.o: ../src/geekos/crc32.c ../include/geekos/crc32.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/kassert.h ../include/geekos/screen.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/serial.h ../include/geekos/irq.h \
-  ../include/geekos/int.h ../include/geekos/defs.h \
-  ../include/geekos/string.h ../include/geekos/../libc/string.h \
-  ../include/geekos/io.h
-geekos/gdt.o: ../src/geekos/gdt.c ../include/geekos/kassert.h \
-  ../include/geekos/screen.h ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/segment.h ../include/geekos/int.h \
-  ../include/geekos/defs.h ../include/geekos/tss.h \
-  ../include/geekos/gdt.h ../include/libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h
-geekos/tss.o: ../src/geekos/tss.c ../include/geekos/kassert.h \
-  ../include/geekos/screen.h ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/defs.h ../include/geekos/gdt.h \
-  ../include/geekos/segment.h ../include/geekos/string.h \
-  ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/tss.h ../include/geekos/serial.h \
-  ../include/geekos/irq.h ../include/geekos/int.h ../include/geekos/io.h
-geekos/segment.o: ../src/geekos/segment.c ../include/geekos/kassert.h \
-  ../include/geekos/screen.h ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/string.h ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/tss.h ../include/geekos/segment.h
-geekos/bget.o: ../src/geekos/bget.c ../include/geekos/string.h \
-  ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/kassert.h ../include/geekos/screen.h \
-  ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/bget.h
-geekos/malloc.o: ../src/geekos/malloc.c ../include/geekos/screen.h \
-  ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/int.h ../include/geekos/kassert.h \
-  ../include/geekos/defs.h ../include/geekos/bget.h \
-  ../include/geekos/malloc.h
-geekos/synch.o: ../src/geekos/synch.c ../include/geekos/kthread.h \
-  ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/list.h ../include/geekos/kassert.h \
-  ../include/geekos/screen.h ../include/geekos/fmtout.h \
-  ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/int.h ../include/geekos/defs.h \
-  ../include/geekos/synch.h
-geekos/kthread.o: ../src/geekos/kthread.c ../include/geekos/kassert.h \
-  ../include/geekos/screen.h ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/defs.h ../include/geekos/int.h \
-  ../include/geekos/mem.h ../include/geekos/list.h \
-  ../include/geekos/paging.h ../include/geekos/bootinfo.h \
-  ../include/geekos/symbol.h ../include/geekos/string.h \
-  ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/kthread.h ../include/geekos/malloc.h \
-  ../include/geekos/serial.h ../include/geekos/irq.h \
-  ../include/geekos/io.h
-geekos/vm_cons.o: ../src/geekos/vm_cons.c ../include/geekos/fmtout.h \
-  ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/string.h ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/idt.h ../include/geekos/int.h \
-  ../include/geekos/kassert.h ../include/geekos/screen.h \
-  ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/defs.h ../include/geekos/vm_cons.h \
-  ../include/geekos/io.h
-geekos/debug.o: ../src/geekos/debug.c ../include/geekos/string.h \
-  ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/debug.h ../include/geekos/fmtout.h \
-  ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/vm_cons.h ../include/geekos/io.h \
-  ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/screen.h
-geekos/serial.o: ../src/geekos/serial.c ../include/geekos/serial.h \
-  ../include/geekos/irq.h ../include/geekos/int.h \
-  ../include/geekos/kassert.h ../include/geekos/screen.h \
-  ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/defs.h ../include/geekos/string.h \
-  ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/io.h ../include/geekos/reboot.h \
-  ../include/geekos/gdt.h ../include/geekos/idt.h
-geekos/reboot.o: ../src/geekos/reboot.c ../include/geekos/reboot.h \
-  ../include/libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h
-geekos/paging.o: ../src/geekos/paging.c ../include/geekos/string.h \
-  ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/int.h ../include/geekos/kassert.h \
-  ../include/geekos/screen.h ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/defs.h ../include/geekos/idt.h \
-  ../include/geekos/kthread.h ../include/geekos/list.h \
-  ../include/geekos/mem.h ../include/geekos/paging.h \
-  ../include/geekos/bootinfo.h ../include/geekos/malloc.h \
-  ../include/geekos/gdt.h ../include/geekos/segment.h \
-  ../include/geekos/crc32.h ../include/geekos/debug.h
-geekos/main.o: ../src/geekos/main.c ../include/geekos/bootinfo.h \
-  ../include/geekos/string.h ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/screen.h ../include/geekos/ktypes.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdbool.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/geekos/mem.h ../include/geekos/defs.h \
-  ../include/geekos/list.h ../include/geekos/kassert.h \
-  ../include/geekos/paging.h ../include/geekos/crc32.h \
-  ../include/geekos/tss.h ../include/geekos/int.h \
-  ../include/geekos/kthread.h ../include/geekos/trap.h \
-  ../include/geekos/timer.h ../include/geekos/keyboard.h \
-  ../include/geekos/io.h ../include/geekos/serial.h \
-  ../include/geekos/irq.h ../include/geekos/reboot.h \
-  ../include/geekos/ide.h ../include/geekos/vm_cons.h \
-  ../include/geekos/debug.h ../include/geekos/gdt.h
-common/fmtout.o: ../src/common/fmtout.c \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h \
-  ../include/geekos/string.h ../include/geekos/../libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/limits.h \
-  ../include/geekos/fmtout.h ../include/geekos/../libc/fmtout.h
-common/string.o: ../src/common/string.c ../include/libc/fmtout.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stdarg.h \
-  ../include/libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h
-common/memmove.o: ../src/common/memmove.c ../include/libc/string.h \
-  /home/jarusl/palacios/devtools/i386/lib/gcc/i386-elf/3.4.6/include/stddef.h
index 06f302a..5277f8e 100644 (file)
 
 struct TSS;
 
-#if __TINYC__
-#define PACKED
-#else
-#define PACKED __attribute__((packed))
-#endif
-
 /*
  * The general format of a segment descriptor.
  */
 struct Segment_Descriptor {
-    ushort_t sizeLow        PACKED ;
-    uint_t baseLow     : 24 PACKED ;
-    uint_t type        : 4  PACKED ;
-    uint_t system      : 1  PACKED ;
-    uint_t dpl         : 2  PACKED ;
-    uint_t present     : 1  PACKED ;
-    uint_t sizeHigh    : 4  PACKED ;
-    uint_t avail       : 1  PACKED ;
-    uint_t reserved    : 1  PACKED ;  /* set to zero */
-    uint_t dbBit       : 1  PACKED ;
-    uint_t granularity : 1  PACKED ;
-    uchar_t baseHigh        PACKED ;
-};
+    ushort_t sizeLow;
+    uint_t baseLow     : 24;
+    uint_t type        : 4;
+    uint_t system      : 1;
+    uint_t dpl         : 2;
+    uint_t present     : 1;
+    uint_t sizeHigh    : 4;
+    uint_t avail       : 1;
+    uint_t reserved    : 1;  /* set to zero */
+    uint_t dbBit       : 1;
+    uint_t granularity : 1;
+    uchar_t baseHigh ;
+} __attribute__((packed));
 
 /**
  * Construct a segment selector.
index c097eb4..ca7f6c0 100644 (file)
@@ -78,7 +78,8 @@ enum ranks {
 #define PTRDIFF_T_RANK rank_long
 
 /* DHH */
-#define EMIT(x) do { (q)->Emit((q), (x)); } while (0)
+#include <geekos/vm_cons.h>
+#define EMIT(x) do { if (q == NULL) {(q)->Emit((q), (x));} } while (0)
 
 /*
  * DHH - As a hack, we buffer this many digits when generating
index cccbe16..d13c3e0 100644 (file)
@@ -525,10 +525,9 @@ struct bfhead {
 };
 #define BFH(p) ((struct bfhead *) (p))
 
-static struct bfhead freelist = {     /* List of free buffers */
-    {0, 0},
-    {&freelist, &freelist}
-};
+static struct bfhead freelist;     /* List of free buffers */
+
+
 
 
 #ifdef BufStats
@@ -1017,6 +1016,14 @@ void bpool(buf, len)
 
     assert(len - sizeof(struct bhead) <= -((bufsize) ESent + 1));
 
+    /* Initialize Free list since compile time static initializations appear to be broken */
+    freelist.bh.prevfree = 0;
+    freelist.bh.bsize = 0;
+    freelist.ql.flink = &freelist;
+    freelist.ql.blink = &freelist;
+
+
+
     /* Clear  the  backpointer at  the start of the block to indicate that
        there  is  no  free  block  prior  to  this   one.    That   blocks
        recombination when the first block in memory is released. */
index a163378..8dd05ab 100644 (file)
@@ -241,18 +241,15 @@ void Main(struct Boot_Info* bootInfo)
 {
   struct Kernel_Thread * key_thread;
   struct Kernel_Thread * spkr_thread;
-
-
-  //  VMConsPutLineN("hello\n", 6);
-
   ulong_t doIBuzz = 0;
 
   Init_BSS();
+  Init_VMCons();
   Init_Screen();
   InitSerial();
-
-  Init_VMCons();
+  Print("Initializing Memory\n");
   Init_Mem(bootInfo);
+  Print("Memory Done\n");
   Init_CRC32();
   Init_TSS();
   Init_Interrupts();
@@ -268,10 +265,6 @@ void Main(struct Boot_Info* bootInfo)
 
 
 
-
-
-
-
   PrintBoth("\n\nHello, Welcome to this horrid output-only serial interface\n");
   PrintBoth("Eventually, this will let us control the VMM\n\n");
  
index ea76959..269f0a2 100644 (file)
@@ -19,7 +19,7 @@
  */
 void Init_Heap(ulong_t start, ulong_t size)
 {
-    /*Print("Creating kernel heap: start=%lx, size=%ld\n", start, size);*/
+    Print("Creating kernel heap: start=%lx, size=%ld\n", start, size);
     bpool((void*) start, size);
 }
 
index 2f44520..6ab2051 100644 (file)
@@ -109,7 +109,11 @@ void Init_Mem(struct Boot_Info* bootInfo)
     ulong_t pageListAddr;
     ulong_t kernEnd;
 
+
+    memset(&s_freeList, 0, sizeof(struct Page_List));
+    
     KASSERT(bootInfo->memSizeKB > 0);
+    Print("Booting with %d KB memory\n", bootInfo->memSizeKB);
 
     /*
      * Before we do anything, switch from setup.asm's temporary GDT
@@ -180,10 +184,12 @@ void Init_Mem(struct Boot_Info* bootInfo)
 
     /* Initialize the kernel heap */
     //    Init_Heap(HIGHMEM_START, KERNEL_HEAP_SIZE);
+
+    Print("Initing heap\n");
     Init_Heap(kernEnd, KERNEL_HEAP_SIZE);
 
     Print("%uKB memory detected, %u pages in freelist, %d bytes in kernel heap\n",
-       bootInfo->memSizeKB, g_freePageCount, KERNEL_HEAP_SIZE);
+         bootInfo->memSizeKB, g_freePageCount, KERNEL_HEAP_SIZE);
 }
 
 /*
index 077112f..5323e3c 100644 (file)
@@ -14,6 +14,7 @@
 #include <geekos/int.h>
 #include <geekos/fmtout.h>
 #include <geekos/screen.h>
+#include <geekos/vm_cons.h>
 
 /*
  * Information sources for VT100 and ANSI escape sequences:
@@ -387,20 +388,6 @@ static void Update_Cursor(void)
  * Public functions
  * ---------------------------------------------------------------------- */
 
-/*
- * Initialize the screen module.
- */
-void Init_Screen(void)
-{
-    bool iflag = Begin_Int_Atomic();
-
-    s_cons.row = s_cons.col = 0;
-    s_cons.currentAttr = DEFAULT_ATTRIBUTE;
-    Clear_Screen();
-
-    End_Int_Atomic(iflag);
-    Print("Screen Inited\n");
-}
 
 /*
  * Clear the screen using the current attribute.
@@ -512,7 +499,7 @@ void Put_Buf(const char* buf, ulong_t length)
 /* Support for Print(). */
 static void Print_Emit(struct Output_Sink *o, int ch) { Put_Char_Imp(ch); }
 static void Print_Finish(struct Output_Sink *o) { Update_Cursor(); }
-static struct Output_Sink s_outputSink = { &Print_Emit, &Print_Finish };
+static struct Output_Sink s_outputSink;
 
 /*
  * Print to console using printf()-style formatting.
@@ -542,3 +529,22 @@ void PrintList(const char * fmt, va_list ap) {
     PrintInternal(fmt, ap);
     End_Int_Atomic(iflag);
 }
+
+
+/*
+ * Initialize the screen module.
+ */
+void Init_Screen(void)
+{
+    bool iflag = Begin_Int_Atomic();
+    
+    s_outputSink.Emit = &Print_Emit;
+    s_outputSink.Finish = &Print_Finish;
+
+    s_cons.row = s_cons.col = 0;
+    s_cons.currentAttr = DEFAULT_ATTRIBUTE;
+    Clear_Screen();
+
+    End_Int_Atomic(iflag);
+    Print("Screen Inited\n");
+}
index 0729501..e583b1f 100644 (file)
@@ -117,7 +117,7 @@ void SerialMemDump(unsigned char *start, int n)
   int i, j;
 
   for (i=0;i<n;i+=16) {
-    SerialPrint("%8x", (unsigned)(start+i));
+    SerialPrint("%8x", *(unsigned char *)(start+i));
     for (j=i; j<i+16 && j<n; j+=2) {
       SerialPrint(" ");
       SerialPrintHex(*((unsigned char *)(start+j)));
index 9c2e5a4..24b62c1 100644 (file)
@@ -44,7 +44,7 @@ void VMConsMemDump(unsigned char *start, int n)
   int i, j;
 
   for (i=0;i<n;i+=16) {
-    VMConsPrint("%8x", (unsigned)(start+i));
+    VMConsPrint("%8x", *(uchar_t*)(start+i));
     for (j=i; j<i+16 && j<n; j+=2) {
       VMConsPrint(" ");
       VMConsPrintHex(*((unsigned char *)(start+j)));
index 4327985..fe5e67b 100644 (file)
 
 
 <!--
-               <device class="ICC_BUS" id="icc"/>
-               <device class="LAPIC" id="apic">
-                       <bus>icc</bus>
-               </device>
+               <device class="LAPIC" id="apic"/>
                <device class="IOAPIC" id="ioapic">
-                       <bus>icc</bus>
+                       <apic>apic</apic>
                </device>
 -->
 <!--