Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Add Virtual PCI and Virtual NIC
Lei Xia [Tue, 3 Mar 2009 20:38:56 +0000 (14:38 -0600)]
palacios/build/Makefile
palacios/include/devices/vnic.h [new file with mode: 0644]
palacios/include/devices/vpci.h [new file with mode: 0644]
palacios/include/palacios/vmm.h
palacios/src/devices/vnic.c [new file with mode: 0644]
palacios/src/devices/vpci.c [new file with mode: 0644]

index 96bd523..0d114cd 100644 (file)
@@ -320,6 +320,8 @@ DEVICES_OBJS := \
        devices/os_debug.o \
        devices/apic.o  \
        devices/io_apic.o \
+       devices/vnic.o \
+       devices/vpci.o \
 
 $(DEVICES_OBJS) :: EXTRA_CFLAGS = \
        $(JRLDEBUG) \
diff --git a/palacios/include/devices/vnic.h b/palacios/include/devices/vnic.h
new file mode 100644 (file)
index 0000000..0ffbcb1
--- /dev/null
@@ -0,0 +1,144 @@
+/*\r
+ * This file is part of the Palacios Virtual Machine Monitor developed\r
+ * by the V3VEE Project with funding from the United States National \r
+ * Science Foundation and the Department of Energy.  \r
+ *\r
+ * The V3VEE Project is a joint project between Northwestern University\r
+ * and the University of New Mexico.  You can find out more at \r
+ * http://www.v3vee.org\r
+ *\r
+ * Copyright (c) 2008, Lei Xia <lxia@northwestern.edu> \r
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> \r
+ * All rights reserved.\r
+ *\r
+ * Author: Lei Xia <lxia@northwestern.edu>\r
+ *\r
+ * This is free software.  You are permitted to use,\r
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".\r
+ */\r
+\r
+#ifndef __VNIC_H_\r
+#define __VNIC_H_\r
+\r
+#include <palacios/vm_dev.h>\r
+\r
+#define NIC_BASE_ADDR  0xc100\r
+\r
+#define NIC_IRQ        11              /* Interrupt channel */\r
+\r
+#define MAX_ETH_FRAME_SIZE 1514\r
+\r
+#define NE2K_PMEM_SIZE    (32*1024)\r
+#define NE2K_PMEM_START   (16*1024)\r
+#define NE2K_PMEM_END     (NE2K_PMEM_SIZE+NE2K_PMEM_START)\r
+#define NE2K_MEM_SIZE     NE2K_PMEM_END\r
+\r
+#define EN0_COMMAND (0x00)  // The command register (for all pages) \r
+\r
+#define NIC_DATA_PORT (0x10)  // The data read/write port\r
+\r
+#define NIC_RESET_PORT (0x1f)  // The data read/write port\r
+\r
+// Page 0 registers\r
+#define EN0_CLDALO     (0x01)    // Low byte of current local dma addr  RD \r
+#define EN0_STARTPG     (0x01)  // Starting page of ring bfr WR \r
+#define EN0_CLDAHI      (0x02) // High byte of current local dma addr  RD \r
+#define EN0_STOPPG      (0x02)    //Ending page +1 of ring bfr WR \r
+#define EN0_BOUNDARY   (0x03)    //Boundary page of ring bfr RD WR \r
+#define EN0_TSR                (0x04)  //Transmit status reg RD \r
+#define EN0_TPSR       (0x04)  //Transmit starting page WR \r
+#define EN0_NCR                (0x05)  //Number of collision reg RD \r
+#define EN0_TCNTLO     (0x05)  //Low  byte of tx byte count WR \r
+#define EN0_FIFO       (0x06)  //FIFO RD \r
+#define EN0_TCNTHI     (0x06)         //High byte of tx byte count WR \r
+#define EN0_ISR                (0x07)  //Interrupt status reg RD WR \r
+#define EN0_CRDALO     (0x08)  //low byte of current remote dma address RD \r
+#define EN0_RSARLO     (0x08)  //Remote start address reg 0 \r
+#define EN0_CRDAHI     (0x09)  //high byte, current remote dma address RD \r
+#define EN0_RSARHI     (0x09)  //Remote start address reg 1 \r
+#define EN0_RCNTLO     (0x0a)  //Remote byte count reg WR \r
+#define EN0_RTL8029ID0 (0x0a)  //Realtek ID byte #1 RD \r
+#define EN0_RCNTHI     (0x0b)  //Remote byte count reg WR \r
+#define EN0_RTL8029ID1 (0x0b)  //Realtek ID byte #2 RD \r
+#define EN0_RSR                (0x0c)  //rx status reg RD \r
+#define EN0_RXCR       (0x0c)  //RX configuration reg WR \r
+#define EN0_TXCR       (0x0d)  //TX configuration reg WR \r
+#define EN0_COUNTER0   (0x0d)  //Rcv alignment error counter RD \r
+#define EN0_DCFG       (0x0e)  //Data configuration reg WR \r
+#define EN0_COUNTER1   (0x0e)  //Rcv CRC error counter RD \r
+#define EN0_IMR                (0x0f)   //Interrupt mask reg WR \r
+#define EN0_COUNTER2   (0x0f)  //Rcv missed frame error counter RD \r
+\r
+//Page 1 registers\r
+#define EN1_PHYS        (0x01)\r
+#define EN1_CURPAG      (0x07)\r
+#define EN1_MULT       (0x08)\r
+\r
+//Page 2 registers\r
+#define EN2_STARTPG     (0x01) //Starting page of ring bfr RD \r
+#define EN2_STOPPG     (0x02)  //Ending page +1 of ring bfr RD \r
+#define EN2_LDMA0  (0x01)   //Current Local DMA Address 0 WR \r
+#define EN2_LDMA1  (0x02)   //Current Local DMA Address 1 WR \r
+#define EN2_RNPR  (0x03)   //Remote Next Packet Pointer RD WR \r
+#define EN2_TPSR  (0x04)    //Transmit Page Start Address RD \r
+#define EN2_LNRP  (0x05)   // Local Next Packet Pointer RD WR \r
+#define EN2_ACNT0  (0x06)  // Address Counter Upper WR \r
+#define EN2_ACNT1  (0x07)  // Address Counter Lower WR \r
+#define EN2_RCR  (0x0c)  // Receive Configuration Register RD \r
+#define EN2_TCR  (0x0d)  // Transmit Configuration Register RD \r
+#define EN2_DCR  (0x0e)  // Data Configuration Register RD \r
+#define EN2_IMR  (0x0f)  // Interrupt Mask Register RD \r
+\r
+//Page 3 registers\r
+#define EN3_CONFIG0     (0x03)\r
+#define EN3_CONFIG1     (0x04)\r
+#define EN3_CONFIG2     (0x05)\r
+#define EN3_CONFIG3     (0x06)\r
+\r
+//Bits in EN0_ISR - Interrupt status register\r
+#define ENISR_RX       0x01    //Receiver, no error \r
+#define ENISR_TX       0x02    //Transmitter, no error \r
+#define ENISR_RX_ERR   0x04    //Receiver, with error \r
+#define ENISR_TX_ERR   0x08    //Transmitter, with error \r
+#define ENISR_OVER     0x10    //Receiver overwrote the ring \r
+#define ENISR_COUNTERS 0x20    //Counters need emptying \r
+#define ENISR_RDC      0x40    //remote dma complete \r
+#define ENISR_RESET    0x80    //Reset completed \r
+#define ENISR_ALL      0x3f    //Interrupts we will enable \r
+\r
+//Bits in received packet status byte and EN0_RSR\r
+#define ENRSR_RXOK     0x01    //Received a good packet \r
+#define ENRSR_CRC      0x02    //CRC error \r
+#define ENRSR_FAE      0x04    //frame alignment error \r
+#define ENRSR_FO       0x08    //FIFO overrun \r
+#define ENRSR_MPA      0x10    //missed pkt \r
+#define ENRSR_PHY      0x20    //physical/multicast address \r
+#define ENRSR_DIS      0x40    //receiver disable. set in monitor mode \r
+#define ENRSR_DEF      0x80    //deferring \r
+\r
+//Transmitted packet status, EN0_TSR\r
+#define ENTSR_PTX 0x01 //Packet transmitted without error \r
+#define ENTSR_ND  0x02 //The transmit wasn't deferred. \r
+#define ENTSR_COL 0x04 //The transmit collided at least once. \r
+#define ENTSR_ABT 0x08  //The transmit collided 16 times, and was deferred. \r
+#define ENTSR_CRS 0x10 //The carrier sense was lost. \r
+#define ENTSR_FU  0x20  //A "FIFO underrun" occurred during transmit. \r
+#define ENTSR_CDH 0x40 //The collision detect "heartbeat" signal was lost. \r
+#define ENTSR_OWC 0x80  //There was an out-of-window collision. \r
+\r
+//command, Register accessed at EN0_COMMAND\r
+#define NE2K_STOP 0x01\r
+#define NE2K_START 0x02\r
+#define NE2K_TRANSMIT 0x04\r
+#define NE2K_DMAREAD   0x08    /* Remote read */\r
+#define NE2K_DMAWRITE  0x10    /* Remote write  */\r
+#define NE2K_DMASEND     0x18\r
+#define NE2K_ABORTDMA  0x20    /* Abort/Complete DMA */\r
+#define NE2K_PAGE0     0x00    /* Select page chip registers */\r
+#define NE2K_PAGE1     0x40    /* using the two high-order bits */\r
+#define NE2K_PAGE2     0x80\r
+#define NE2K_PAGE    0xc0\r
+\r
+struct vm_device *v3_create_vnic();\r
+\r
+#endif\r
diff --git a/palacios/include/devices/vpci.h b/palacios/include/devices/vpci.h
new file mode 100644 (file)
index 0000000..e4f893b
--- /dev/null
@@ -0,0 +1,168 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2009, Lei Xia <lxia@northwestern.edu>
+ * Copyright (c) 2009, Chang Seok Bae <jhuell@gmail.com>
+ * Copyright (c) 2009, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author:  Lei Xia <lxia@northwestern.edu>
+ *             Chang Seok Bae <jhuell@gmail.com>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef _VPCI_H__
+#define _VPCI_H__
+
+#include <palacios/vm_dev.h>
+#include <palacios/vmm_types.h>
+
+#define PROG_INTERFACE(x) ((x)[0])
+#define SUBCLASS(x) ((x)[1])
+#define CLASSCODE(x) ((x)[2])
+  
+#define HEADER_TYPE(x) ((x)&0x7f)
+  
+#define PCI_DEVICE 0x0
+
+#define IS_DEVICE(x) (HEADER_TYPE(x)==0x0)
+      
+#define IS_IO_ADDR(x)   ((x)&0x1)
+#define IS_MEM_ADDR(x)  (!((x)&0x1))
+#define GET_IO_ADDR(x)  (((uint_t)(x))&0xfffffffc) 
+#define GET_MEM_ADDR(x) (((uint_t)(x))&0xfffffff0)
+#define GET_MEM_TYPE(x) (((x)&0x6)>>2)
+
+#define PCI_CONFIG_ADDRESS 0xcf8  // 32 bit, little endian
+#define PCI_CONFIG_DATA    0xcfc  // 32 bit, little endian
+
+#define PCI_IO_REGIONS 6
+
+struct pci_device_config {
+  uint16_t   vendor_id;
+  uint16_t   device_id;
+  uint16_t   command;
+  uint16_t   status;
+  uchar_t    revision;
+  uchar_t    class_code[3];  // in order: programming interface, subclass, class code
+  uchar_t    cache_line_size;
+  uchar_t    latency_time;
+  uchar_t    header_type; // bits 6-0: 00: other, 01: pci-pci bridge, 02: pci-cardbus; bit 7: 1=multifunction
+  uchar_t    BIST;  
+  uint32_t   BAR[6];
+  uint32_t   cardbus_cis_pointer;
+  uint16_t   subsystem_vendor_id;
+  uint16_t   subsystem_id;
+  uint32_t   expansion_rom_address;
+  uchar_t    cap_ptr;  // capabilities list offset in config space
+  uchar_t    reserved[7];
+  uchar_t    intr_line; // 00=none, 01=IRQ1, etc.
+  uchar_t    intr_pin;  // 00=none, otherwise INTA# to INTD#
+  uchar_t    min_grant; // min busmaster time - units of 250ns
+  uchar_t    max_latency; // units of 250ns - busmasters
+  uint32_t   device_data[48]; 
+};
+
+struct pci_device;
+
+ typedef void pci_mapioregion_fn(struct pci_device *pci_dev, int region_num,
+                                uint32_t addr, uint32_t size, int type); 
+
+typedef int port_read_fn(ushort_t port, void * dst, uint_t length, struct vm_device *vmdev); 
+typedef int port_write_fn(ushort_t port, void * src, uint_t length, struct vm_device *vmdev);
+
+#define PCI_ADDRESS_SPACE_MEM          0x00
+#define PCI_ADDRESS_SPACE_IO           0x01
+#define PCI_ADDRESS_SPACE_MEM_PREFETCH 0x08
+
+struct pci_ioregion {
+    uint32_t addr; //current PCI mapping address. -1 means not mapped 
+    uint32_t size;  //actual ports/memories needed by device
+    uint32_t mapped_size;  //mapped size, usually bigger than needed size, -1 not mapped
+    uint8_t type;
+    uchar_t reg_num;  //correponding to which BAR register it is
+    pci_mapioregion_fn *map_func;
+
+    port_read_fn **port_reads;   //array of read functions, hooked for each port in order, if NULL, do not hook that port
+    port_write_fn **port_writes; 
+};
+
+
+struct pci_device {
+    struct pci_device_config config; 
+    struct pci_bus *bus;
+    struct pci_device *next;
+
+    int dev_num;
+    char name[64];
+    int irqline;
+
+    struct pci_ops {
+        void (*raise_irq)(struct pci_device *dev, void *data);
+        void (*config_write)(struct pci_device *pci_dev, uchar_t addr, uint32_t val, int len);
+        uint32_t (*config_read)(struct pci_device *pci_dev, uchar_t addr, int len);
+    }ops;    
+
+    struct pci_ioregion *ioregion[PCI_IO_REGIONS];
+};
+
+
+/*
+struct pci_class_desc {
+    uint16_t class;
+    const char *desc;
+};
+
+static struct pci_class_desc pci_class_descriptions[] =
+{
+    { 0x0100, "SCSI controller"},
+    { 0x0101, "IDE controller"},
+    { 0x0102, "Floppy controller"},
+    { 0x0103, "IPI controller"},
+    { 0x0104, "RAID controller"},
+    { 0x0106, "SATA controller"},
+    { 0x0107, "SAS controller"},
+    { 0x0180, "Storage controller"},
+    { 0x0200, "Ethernet controller"},
+    { 0x0201, "Token Ring controller"},
+    { 0x0202, "FDDI controller"},
+    { 0x0203, "ATM controller"},
+    { 0x0280, "Network controller"},
+    { 0x0300, "VGA controller"},
+    { 0x0301, "XGA controller"},
+    { 0x0302, "3D controller"},
+    { 0x0380, "Display controller"},
+    { 0x0400, "Video controller"},
+    { 0x0401, "Audio controller"},
+    { 0x0402, "Phone"},
+    { 0x0480, "Multimedia controller"},
+    { 0x0500, "RAM controller"},
+    { 0x0501, "Flash controller"},
+    { 0x0580, "Memory controller"},
+    { 0x0600, "Host bridge"},
+    { 0x0601, "ISA bridge"},
+    { 0x0602, "EISA bridge"},
+    { 0x0603, "MC bridge"},
+    { 0x0604, "PCI bridge"},
+    { 0x0605, "PCMCIA bridge"},
+    { 0x0606, "NUBUS bridge"},
+    { 0x0607, "CARDBUS bridge"},
+    { 0x0608, "RACEWAY bridge"},
+    { 0x0680, "Bridge"},
+    { 0x0c03, "USB controller"},
+    { 0, NULL}
+};
+
+*/
+struct vm_device *v3_create_vpci();
+
+#endif
+
index 9fae52e..af932df 100644 (file)
@@ -23,6 +23,8 @@
 
 #include <palacios/vm_guest.h>
 #include <palacios/vmm_mem.h>
+#include <palacios/vmm_types.h>
+#include <devices/vnic.h>
 
 
 #ifdef __V3VEE__
   } while(0)                                                           \
     
 
-
+#define V3_REGISTER_PKT_DELIVERY(x) \
+  ({\
+    int ret = 0;\
+    extern struct v3_os_hooks * os_hooks;              \
+    if ((os_hooks) && (os_hooks)->register_pkt_delivery) {             \
+     ret = (os_hooks)->register_pkt_delivery(x);                               \
+    }\
+    ret; \
+  } )          
+
+#define V3_SEND_PKT(x, y) \
+  ({\
+    int ret=0; \
+    extern struct v3_os_hooks * os_hooks;              \
+    if ((os_hooks) && (os_hooks)->ne2k_send_packet) {          \
+      ret = (os_hooks)->ne2k_send_packet(x, y);                                \
+    }\
+    ret; \
+  })
+  
 
 #define VMM_INVALID_CPU 0
 #define VMM_VMX_CPU 1
@@ -244,6 +265,10 @@ struct v3_os_hooks {
 
   void (*yield_cpu)(void);
 
+  //function by network card driver
+  int (*register_pkt_delivery)(int (*rcvd_fn)(uchar_t *packet, uint_t size));
+  int (*ne2k_send_packet)(uchar_t *packet, uint_t size);
 };
 
 
diff --git a/palacios/src/devices/vnic.c b/palacios/src/devices/vnic.c
new file mode 100644 (file)
index 0000000..286a5c1
--- /dev/null
@@ -0,0 +1,983 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2009, Lei Xia <lxia@northwestern.edu> 
+ * Copyright (c) 2009, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Lei Xia <lxia@northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+/*
+* Virtual NE2K Network Card 
+*/
+
+#include <devices/vnic.h>
+#include <palacios/vmm.h>
+#include <palacios/vmm_types.h>
+#include <palacios/vmm_io.h>
+#include <palacios/vmm_debug.h>
+
+#define DEBUG_NIC
+
+#ifndef DEBUG_NIC
+#undef PrintDebug()
+#define PrintDebug(fmts, args...)
+#endif
+
+typedef enum {NIC_READY, NIC_REG_POSTED} nic_state_t;
+
+struct nic_regs {
+    uchar_t cmd;
+    uchar_t pgstart;
+    uchar_t pgstop;
+    ushort_t clda;
+    uchar_t boundary;
+    uchar_t tsr;
+    uchar_t tpsr;
+    uchar_t ncr;
+    ushort_t tbcr;
+    uchar_t fifo;
+    uchar_t isr;
+    ushort_t crda;
+    ushort_t rsar;
+    ushort_t rbcr;
+    uchar_t rsr;
+    uchar_t rcr;
+    uint32_t cntr;
+    uchar_t tcr;
+    uchar_t dcr;
+    uchar_t imr;
+    
+    uchar_t phys[6]; //mac address 
+    uchar_t curpag;
+    uchar_t mult[8]; //multicast mask array 
+    uchar_t rnpp;
+    uchar_t lnpp;
+    ushort_t addcnt;
+    
+    uchar_t macaddr[6];
+};
+
+struct nic_context{
+    struct guest_info *vm;
+
+    nic_state_t dev_state;
+
+    struct nic_regs regs;
+
+    uchar_t mac[6]; //the mac address of this nic
+
+    uchar_t mem[NE2K_MEM_SIZE];        
+};
+
+struct vm_device *current_vnic;
+
+#define compare_mac(src, dst) ({ \
+       ((src[0] == dst[0]) && \
+         (src[1] == dst[1]) && \
+         (src[2] == dst[2]) && \
+         (src[3] == dst[3]) && \
+         (src[4] == dst[4]) && \
+         (src[5] == dst[5]))? 1:0; \
+       })
+
+static void dump_state(struct vm_device *dev)
+{
+  int i;
+  uchar_t *p;
+  struct nic_context *nic_state = (struct nic_context *)dev->private_data;
+
+  PrintDebug("====VNIC: Dumping state Begin ==========\n");
+  PrintDebug("Registers:\n");
+
+  p = (uchar_t *)&nic_state->regs;
+  for(i = 0; i < sizeof(struct nic_regs); i++)
+     PrintDebug("Regs[i] = 0x%2x\n", (int)p[i]);       
+  
+  PrintDebug("Memory:\n");     
+  for(i = 0; i < 32; i++)
+        PrintDebug("0x%02x ", nic_state->mem[i]);
+  PrintDebug("\n");
+  PrintDebug("====VNIC: Dumping state End==========\n");
+}
+
+static void vnic_update_irq(struct vm_device *dev)
+{
+    int isr;
+    struct nic_context *nic_state = (struct nic_context *)dev->private_data;
+    struct guest_info *guest = dev->vm;
+       
+    isr = ((nic_state->regs.isr & nic_state->regs.imr) & 0x7f);
+
+    if ((isr & 0x7f) != 0x0) {
+       v3_raise_irq(guest, NIC_IRQ);
+       PrintDebug("VNIC: RaiseIrq: isr: 0x%02x imr: 0x%02x\n", nic_state->regs.isr, nic_state->regs.imr);
+    }   
+}
+
+static void init_vnic_context(struct vm_device *dev)
+{
+    struct nic_context *nic_state = (struct nic_context *)dev->private_data;
+    int i;
+    uchar_t mac[6] = {0x52, 0x54, 0x0, 0x12, 0x34, 0x56};
+
+    nic_state->vm = dev->vm;
+
+    nic_state->regs.isr = ENISR_RESET;
+    nic_state->regs.imr = 0x00;
+    nic_state->regs.cmd = 0x22;
+
+    for (i = 0; i < 6; i++)
+       nic_state->regs.macaddr[i] = nic_state->mac[i] = mac[i];
+
+    for (i = 0; i < 8; i++)
+       nic_state->regs.mult[i] = 0xff;
+
+    for(i = 0; i < 32; i++) {
+        nic_state->mem[i] = 0xff;
+    }
+
+    memcpy(nic_state->mem, nic_state->mac, 6);
+    nic_state->mem[14] = 0x57;
+    nic_state->mem[15] = 0x57;
+
+    dump_state(dev);
+
+}
+static int vnic_send_packet(struct vm_device *dev, uchar_t *pkt, int length)
+{
+    int i;
+  
+    PrintDebug("\nVNIC: Sending Packet\n");
+
+    for (i = 0; i<length; i++)
+           PrintDebug("%x ",pkt[i]);
+    PrintDebug("\n");
+       
+    return V3_SEND_PKT(pkt, length);
+}
+
+
+struct vm_device * get_rx_dev(uchar_t *dst_mac)
+{
+    struct nic_context *nic_state = (struct nic_context *)current_vnic->private_data;
+    struct nic_regs *nregs = &(nic_state->regs);
+
+    static const uchar_t brocast_mac[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+    if (nregs->rcr & 0x10) {
+        // promiscuous model    
+    } else {
+        if (compare_mac(dst_mac,  brocast_mac)) { //broadcast address
+            if (!(nregs->rcr & 0x04))
+                return NULL;
+        } else if (dst_mac[0] & 0x01) {
+            // multicast packet, not fully done here
+            // ==========
+            if (!(nregs->rcr & 0x08))
+                return NULL;
+        } else if (!compare_mac(dst_mac, nic_state->mac)) {
+            return NULL;
+        } else {
+            
+        }
+    }
+
+    return current_vnic;
+}
+
+static int vnic_rxbuf_full(struct vm_device *dev)
+{
+    int empty, index, boundary;
+    struct nic_context *nic_state = (struct nic_context *)dev->private_data;
+
+    index = nic_state->regs.curpag << 8;
+    boundary = nic_state->regs.boundary << 8;
+    if (index < boundary)
+        empty = boundary - index;
+    else
+        empty = ((nic_state->regs.pgstop - nic_state->regs.pgstart) << 8) - (index - boundary);
+       
+    if (empty < (MAX_ETH_FRAME_SIZE + 4))
+        return 1;
+       
+    return 0;
+}
+
+#define MIN_BUF_SIZE 60
+
+static void vnic_receive(struct vm_device *dev, const uchar_t *pkt, int length)
+{
+    struct nic_context *nic_state = (struct nic_context *)dev->private_data;
+    struct nic_regs *nregs = &(nic_state->regs);
+    uchar_t *p;
+    uint32_t total_len, next, len, index, empty;
+    uchar_t buf[60];
+    uint32_t start, stop;
+    
+    
+    //PrintDebug("VNIC: received packet, len=%d\n", length);
+
+    start = nregs->pgstart << 8;
+    stop = nregs->pgstop << 8;
+   
+    if (nregs->cmd & NE2K_STOP)
+        return;
+
+    if (vnic_rxbuf_full(dev)){
+         PrintDebug("VNIC: received buffer overflow\n");
+        return;
+    }
+
+    // if too small buffer, expand it
+    if (length < MIN_BUF_SIZE) {
+        memcpy(buf, pkt, length);
+        memset(buf + length, 0, MIN_BUF_SIZE - length);
+        pkt = buf;
+        length = MIN_BUF_SIZE;
+    }
+
+    index = nregs->curpag << 8;
+    // 4 bytes header 
+    total_len = length + 4;
+    // address for next packet (4 bytes for CRC)
+    next = index + ((total_len + 4 + 255) & ~0xff);
+    if (next >= stop)
+        next -= stop - start;
+
+    p = nic_state->mem + index;
+    nregs->rsr = ENRSR_RXOK;
+
+    if (pkt[0] & 0x01)
+        nregs->rsr |= ENRSR_PHY;
+       
+    p[0] = nregs->rsr;
+    p[1] = next >> 8;
+    p[2] = total_len;
+    p[3] = total_len >> 8;
+    index += 4;
+
+    while (length > 0) {
+        if (index <= stop)
+            empty = stop - index;
+        else
+            empty = 0;
+        len = length;
+        if (len > empty)
+            len = empty;
+        memcpy(nic_state->mem + index, pkt, len);
+        pkt += len;
+        index += len;
+        if (index == stop)
+            index = start;
+        length -= len;
+    }
+    nregs->curpag = next >> 8;
+
+    nregs->isr |= ENISR_RX;
+    vnic_update_irq(dev);
+}
+
+// =====begin here
+#if 0
+void pci_vnic_init(PCIBus *bus, NICInfo *nd, int devfn)
+{
+    PCINE2000State *d;
+    NE2000State *s;
+    uint8_t *pci_conf;
+    struct pci_device *pdev;
+
+    pdev = pci_register_device(bus,
+                                              "NE2000", sizeof(PCINE2000State),
+                                              devfn,
+                                              NULL, NULL);
+    pci_conf = d->dev.config;
+    pci_conf[0x00] = 0xec; // Realtek 8029
+    pci_conf[0x01] = 0x10;
+    pci_conf[0x02] = 0x29;
+    pci_conf[0x03] = 0x80;
+    pci_conf[0x0a] = 0x00; // ethernet network controller
+    pci_conf[0x0b] = 0x02;
+    pci_conf[0x0e] = 0x00; // header_type
+    pci_conf[0x3d] = 1; // interrupt pin 0
+
+    pci_register_io_region(&d->dev, 0, 0x100,
+                           PCI_ADDRESS_SPACE_IO, ne2000_map);
+    s = &d->ne2000;
+    s->irq = d->dev.irq[0];
+    s->pci_dev = (PCIDevice *)d;
+    memcpy(s->macaddr, nd->macaddr, 6);
+    ne2000_reset(s);
+    s->vc = qemu_new_vlan_client(nd->vlan, ne2000_receive,
+                                 ne2000_can_receive, s);
+
+    snprintf(s->vc->info_str, sizeof(s->vc->info_str),
+             "ne2000 pci macaddr=%02x:%02x:%02x:%02x:%02x:%02x",
+             s->macaddr[0],
+             s->macaddr[1],
+             s->macaddr[2],
+             s->macaddr[3],
+             s->macaddr[4],
+             s->macaddr[5]);
+
+    /* XXX: instance number ? */
+    register_savevm("ne2000", 0, 3, ne2000_save, ne2000_load, s);
+}
+#endif
+//End Here====================================
+
+static int netif_input(uchar_t * pkt, uint_t size)
+{
+  uint_t i;
+  struct vm_device *dev;
+  
+  PrintDebug("\nVNIC: Packet Received:\nSource:");
+  for (i = 6; i < 12; i++) {
+       PrintDebug("%x ", pkt[i]);
+  }
+
+  dev = get_rx_dev(pkt);
+
+  if (dev == NULL) 
+       return 0;
+
+  PrintDebug("\n");
+  for(i= 0; i<size; i++)
+       PrintDebug("%x ", pkt[i]);
+  
+  vnic_receive(dev, pkt, size);
+
+  return 0;
+}
+
+
+static int vnic_ioport_write(ushort_t port,
+                                                       void * src,
+                                                       uint_t length,
+                                                       struct vm_device *dev)
+{
+    uchar_t  page;
+    struct nic_context *nic_state = (struct nic_context* )dev->private_data;
+    uchar_t val;
+    int index;
+       
+    if (length == 1) {
+         memcpy(&val, src, 1);
+    } else {
+         PrintDebug("vnic_write error: length %d\n", length);  
+         return length;
+    }
+       
+    port &= 0x1f;
+       
+    PrintDebug("vnic_write: port:0x%x (%u bytes): 0x%x\n", port, length, (int)val);
+       
+    if (port == EN0_COMMAND) {
+        nic_state->regs.cmd = val;
+        if (!(val & NE2K_STOP)) {
+            nic_state->regs.isr &= ~ENISR_RESET; 
+            if ((val & (NE2K_DMAREAD | NE2K_DMAWRITE)) &&
+                       nic_state->regs.rbcr == 0) {
+                nic_state->regs.isr |= ENISR_RDC;
+                vnic_update_irq(dev);
+            }
+            if (val & NE2K_TRANSMIT) {
+                index = (nic_state->regs.tpsr << 8);
+                if (index >= NE2K_PMEM_END)
+                    index -= NE2K_PMEM_SIZE;
+                if (index + nic_state->regs.tbcr <= NE2K_PMEM_END) {
+                    vnic_send_packet(dev, nic_state->mem + index, nic_state->regs.tbcr);
+                }
+                nic_state->regs.tsr = ENTSR_PTX;
+                nic_state->regs.isr |= ENISR_TX;
+                nic_state->regs.cmd &= ~NE2K_TRANSMIT;
+                vnic_update_irq(dev);
+            }
+        }
+    } else {
+        page = nic_state->regs.cmd >> 6;
+        if(page == 0){
+               switch(port) {
+                       case EN0_STARTPG:
+                           nic_state->regs.pgstart = val;
+                           break;
+                       case EN0_STOPPG:
+                           nic_state->regs.pgstop = val;
+                           break;
+                       case EN0_BOUNDARY:
+                           nic_state->regs.boundary = val;
+                           break;
+                        case EN0_TPSR:
+                           nic_state->regs.tpsr = val;
+                           break;
+                        case EN0_TCNTLO:
+                           nic_state->regs.tbcr = (nic_state->regs.tbcr & 0xff00) | val;
+                           break;
+                       case EN0_TCNTHI:
+                           nic_state->regs.tbcr = (nic_state->regs.tbcr & 0x00ff) | (val << 8);
+                           break;
+                        case EN0_ISR:
+                           nic_state->regs.isr &= ~(val & 0x7f);
+                           vnic_update_irq(dev);
+                           break;
+                        case EN0_RSARLO:
+                           nic_state->regs.rsar = (nic_state->regs.rsar & 0xff00) | val;
+                           break;
+                       case EN0_RSARHI:
+                           nic_state->regs.rsar = (nic_state->regs.rsar & 0x00ff) | (val << 8);
+                           break;
+                        case EN0_RCNTLO:
+                           nic_state->regs.rbcr = (nic_state->regs.rbcr & 0xff00) | val;
+                           break;
+                       case EN0_RCNTHI:
+                           nic_state->regs.rbcr = (nic_state->regs.rbcr & 0x00ff) | (val << 8);
+                           break;
+                       case EN0_RXCR:
+                           nic_state->regs.rcr = val;
+                           break;
+                        case EN0_TXCR:
+                            nic_state->regs.tcr = val;
+                       case EN0_DCFG:
+                           nic_state->regs.dcr = val;
+                           break;      
+                       case EN0_IMR:
+                           nic_state->regs.imr = val;
+                           vnic_update_irq(dev);
+                           break;
+                        default:
+                            PrintDebug("vnic_write error: invalid port:0x%x\n", port);
+                            break;
+                       }
+               }
+        if(page == 1){
+               switch(port) {
+                       case EN1_PHYS ... EN1_PHYS + 5:
+                           nic_state->regs.phys[port - EN1_PHYS] = val;
+                           break;
+                       case EN1_CURPAG:
+                           nic_state->regs.curpag = val;
+                           break;
+                       case EN1_MULT ... EN1_MULT + 7:
+                           nic_state->regs.mult[port - EN1_MULT] = val;
+                           break;
+                        default:
+                            PrintDebug("vnic_write error: invalid port:0x%x\n", port);
+                            break;
+                       }
+               }
+       if(page == 2){
+               switch(port) {
+                        case EN2_LDMA0:
+                           nic_state->regs.clda = (nic_state->regs.clda & 0xff00) | val;
+                           break;
+                        case EN2_LDMA1:
+                           nic_state->regs.clda = (nic_state->regs.clda & 0x00ff) | (val << 8);
+                           break;
+                        case EN2_RNPR:
+                           nic_state->regs.rnpp = val;
+                           break;
+                        case EN2_LNRP:
+                           nic_state->regs.lnpp = val;
+                           break;
+                        case EN2_ACNT0:
+                           nic_state->regs.addcnt = (nic_state->regs.addcnt & 0xff00) | val;
+                           break;
+                        case EN2_ACNT1: 
+                           nic_state->regs.addcnt = (nic_state->regs.addcnt & 0x00ff) | (val << 8);
+                           break;
+                        default:
+                           PrintDebug("vnic_write error: invalid port:0x%x\n", port);
+                           break;
+                       }
+               }
+        }
+
+       //dump_state(dev);
+       
+       return length;
+       
+}
+
+static int vnic_ioport_read(ushort_t port,
+                                                       void * dst,
+                                                       uint_t length,
+                                                       struct vm_device *dev)
+{
+    uchar_t page, ret;
+
+    struct nic_context *nic_state = (struct nic_context* )dev->private_data;
+
+    if (length > 1) {
+          PrintDebug("vnic_read error: length %d\n", length);
+          return length;
+    }
+
+    port &= 0x1f;
+
+    if (port == EN0_COMMAND) {
+        ret = nic_state->regs.cmd;
+    } else {
+        page = nic_state->regs.cmd >> 6;
+        if (page == 0){
+            switch(port) {             
+                case EN0_CLDALO:
+                   ret = nic_state->regs.clda & 0x00ff;
+                   break;
+               case EN0_CLDAHI:
+                   ret = (nic_state->regs.clda & 0xff00) >> 8;
+                   break;
+               case EN0_BOUNDARY:
+                   ret = nic_state->regs.boundary;
+                   break;
+                case EN0_TSR:
+                   ret = nic_state->regs.tsr;
+                   break;
+                case EN0_NCR:
+                   ret = nic_state->regs.ncr;
+                   break;
+               case EN0_FIFO:
+                   ret = nic_state->regs.fifo;
+                   break;
+                case EN0_ISR:
+                   ret = nic_state->regs.isr;
+                   vnic_update_irq(dev);
+                   break;
+                case EN0_CRDALO:
+                   ret = nic_state->regs.crda & 0x00ff;
+                   break;
+               case EN0_CRDAHI:
+                   ret = (nic_state->regs.crda & 0xff00) >> 8;
+                   break;
+               case EN0_RSR:
+                   ret = nic_state->regs.rsr;
+                   break;
+                case EN0_COUNTER0:
+                    ret = nic_state->regs.cntr & 0x000000ff;
+                    break;
+               case EN0_COUNTER1:
+                   ret = (nic_state->regs.cntr & 0x0000ff00) >> 8;
+                    break;     
+               case EN0_COUNTER2:
+                   ret = (nic_state->regs.cntr & 0x00ff0000) >> 16;
+                    break;
+                default:
+                    PrintDebug("vnic_read error: invalid port:0x%x\n", port);
+                    ret = 0x00;
+                    break;
+           }
+        }
+        if (page == 1){
+           switch(port) {
+               case EN1_PHYS ... EN1_PHYS + 5:
+                   ret = nic_state->regs.phys[port - EN1_PHYS];
+                   break;
+               case EN1_CURPAG:
+                   ret = nic_state->regs.curpag;
+                   break;
+               case EN1_MULT ... EN1_MULT + 7:
+                   ret = nic_state->regs.mult[port - EN1_MULT];
+                   break;
+                default:
+                    PrintDebug("vnic_read error: invalid port:0x%x\n", port);
+                    ret = 0x00;
+                    break;
+           }
+        }
+        if (page == 2){
+           switch(port) {
+                case EN2_STARTPG:
+                   ret = nic_state->regs.pgstart;
+                   break;
+                case EN2_STOPPG:
+                   ret = nic_state->regs.pgstop;
+                   break;
+                case EN2_RNPR:
+                   ret = nic_state->regs.rnpp;
+                   break;
+                case EN2_LNRP:
+                   ret = nic_state->regs.lnpp;
+                   break;
+                case EN2_TPSR:
+                   ret = nic_state->regs.tpsr;
+                   break;
+                case EN2_ACNT0:
+                   ret = nic_state->regs.addcnt & 0x00ff;
+                   break;
+                case EN2_ACNT1: 
+                   ret = (nic_state->regs.addcnt & 0xff00) >> 8;
+                   break;
+                case EN2_RCR:
+                   ret = nic_state->regs.rcr;
+                   break;
+                case EN2_TCR:
+                   ret = nic_state->regs.tcr;
+                   break;
+                case EN2_DCR:
+                   ret = nic_state->regs.dcr;
+                   break;
+                case EN2_IMR:
+                   ret = nic_state->regs.imr;
+                   break;
+                default:
+                   PrintDebug("vnic_read error: invalid port:0x%x\n", port);
+                   ret = 0x00;
+                   break;
+           }
+        }
+    }
+
+    memcpy(dst, &ret, 1);
+
+    PrintDebug("vnic_read: port:0x%x (%u bytes): 0x%x\n", port,length, (int)ret);
+
+    //dump_state(dev);
+
+    return length;
+
+}
+
+static inline uint16_t cpu2le16(uint16_t val)
+{
+    uint16_t p;
+    uchar_t *p1 = (uchar_t *)&p;
+
+    p1[0] = val;
+    p1[1] = val >> 8;
+
+    return p;
+}
+
+
+static inline uint32_t cpu2le32(uint32_t val)
+{
+    uint32_t p;
+    uchar_t *p1 = (uchar_t *)&p;
+
+    p1[0] = val;
+    p1[1] = val >> 8;
+    p1[2] = val >> 16;
+    p1[3] = val >> 24;
+
+    return p;
+}
+
+static inline uint16_t le16_to_cpu(const uint16_t *p)
+{
+    const uchar_t *p1 = (const uchar_t *)p;
+    return p1[0] | (p1[1] << 8);
+}
+
+static inline uint32_t le32_to_cpu(const uint32_t *p)
+{
+    const uchar_t *p1 = (const uchar_t *)p;
+    return p1[0] | (p1[1] << 8) | (p1[2] << 16) | (p1[3] << 24);
+}
+
+static void vnic_mem_writeb(struct nic_context *nic_state, 
+                                                                       uint32_t addr,
+                                                               uint32_t val)
+{
+    uchar_t tmp;
+
+    tmp = (uchar_t) (val & 0x000000ff);
+    if (addr < 32 || (addr >= NE2K_PMEM_START && addr < NE2K_MEM_SIZE)) {
+        nic_state->mem[addr] = tmp;
+    }
+
+    PrintDebug("wmem addr: %x val: %x\n", addr, val);
+}
+
+static void vnic_mem_writew(struct nic_context *nic_state, 
+                                                                       uint32_t addr,
+                                                               uint32_t val)
+{
+    addr &= ~1; //XXX: check exact behaviour if not even
+    if (addr < 32 ||
+        (addr >= NE2K_PMEM_START && addr < NE2K_MEM_SIZE)) {
+        *(ushort_t *)(nic_state->mem + addr) = cpu2le16(val);
+    }
+
+    PrintDebug("wmem addr: %x val: %x\n", addr, val);
+}
+
+static void vnic_mem_writel(struct nic_context *nic_state,
+                                                                          uint32_t addr,
+                                                                  uint32_t val)
+{
+    addr &= ~1; // XXX: check exact behaviour if not even
+    if (addr < 32 ||
+        (addr >= NE2K_PMEM_START && addr < NE2K_MEM_SIZE)) {
+        *(uint32_t *)(nic_state->mem + addr) = cpu2le32(val);
+    }
+
+    PrintDebug("wmem addr: %x val: %x\n", addr, val);
+}
+
+static uchar_t vnic_mem_readb(struct nic_context *nic_state, uint32_t addr)
+{
+    PrintDebug("rmem addr: %x\n", addr);
+       
+    if (addr < 32 ||
+        (addr >= NE2K_PMEM_START && addr < NE2K_MEM_SIZE)) {
+        return nic_state->mem[addr];
+    } else {
+        return 0xff;
+    }
+}
+
+static ushort_t vnic_mem_readw(struct nic_context *nic_state, uint32_t addr)
+{
+    PrintDebug("rmem addr: %x\n", addr);
+       
+    addr &= ~1; //XXX: check exact behaviour if not even 
+    if (addr < 32 ||
+        (addr >= NE2K_PMEM_START && addr < NE2K_MEM_SIZE)) {
+        return (ushort_t)le16_to_cpu((ushort_t *)(nic_state->mem + addr));
+    } else {
+        return 0xffff;
+    }
+}
+
+static uint32_t vnic_mem_readl(struct nic_context *nic_state, uint32_t addr)
+{
+    PrintDebug("rmem addr: %x\n", addr);
+
+    addr &= ~1; //XXX: check exact behaviour if not even
+    if (addr < 32 ||
+        (addr >= NE2K_PMEM_START && addr < NE2K_MEM_SIZE)) {
+        return (uint32_t)le32_to_cpu((uint32_t *)(nic_state->mem + addr));
+    } else {
+        return 0xffffffff;
+    }
+}
+
+static void vnic_dma_update(struct vm_device *dev, int len)
+{              
+    struct nic_context *nic_state = (struct nic_context *)dev->private_data;
+       
+    nic_state->regs.rsar += len;
+    // wrap
+    if (nic_state->regs.rsar == nic_state->regs.pgstop)
+        nic_state->regs.rsar = nic_state->regs.pgstart;
+
+    if (nic_state->regs.rbcr <= len) {
+        nic_state->regs.rbcr = 0;
+        nic_state->regs.isr |= ENISR_RDC;
+        vnic_update_irq(dev);
+    } else {
+        nic_state->regs.rbcr -= len;
+    }
+}
+
+
+//for data port read/write
+static int vnic_data_read(ushort_t port,
+                                                       void * dst,
+                                                       uint_t length,
+                                                      struct vm_device *dev)
+{
+       uint32_t val;
+       struct nic_context *nic_state = (struct nic_context *)dev->private_data;
+
+       // current dma address
+       uint32_t addr = nic_state->regs.rsar;
+
+       switch(length){
+               case 1:
+                       val = vnic_mem_readb(nic_state, addr);
+                       break;
+               case 2:
+                       val = vnic_mem_readw(nic_state, addr);
+                       break;
+               case 4:
+                       val = vnic_mem_readl(nic_state, addr);
+                       break;
+               default:
+                       PrintDebug("vnic_data_read error: invalid length %d\n", length);
+                       val = 0x0;
+       }
+
+       vnic_dma_update(dev, length);
+
+       memcpy(dst, &val, length);
+
+       PrintDebug("vnic_read: port:0x%x (%u bytes): 0x%x", port & 0x1f,length, val);
+
+       return length;
+}
+
+static int vnic_data_write(ushort_t port,
+                                                       void * src,
+                                                       uint_t length,
+                                                       struct vm_device *dev)
+{
+       uint32_t val;
+       struct nic_context *nic_state = (struct nic_context *)dev->private_data;
+
+       uint32_t addr = nic_state->regs.rsar;
+
+       if (nic_state->regs.rbcr == 0)
+               return length;
+
+       memcpy(&val, src, length);
+
+       //determine the starting address of reading/writing
+       //addr= ??
+       
+       switch (length){
+               case 1:
+                       vnic_mem_writeb(nic_state, addr, val);
+                       break;
+               case 2:
+                       vnic_mem_writew(nic_state, addr, val);
+                       break;
+               case 4:
+                       vnic_mem_writel(nic_state, addr, val);
+                       break;
+               default:
+                       PrintDebug("nic_data_write error: invalid length %d\n", length);
+               }
+       
+       vnic_dma_update(dev, length);
+
+       PrintDebug("vnic_write: port:0x%x (%u bytes): 0x%x\n", port & 0x1f,length, val);
+                       
+       return length;
+}
+
+static int vnic_reset_device(struct vm_device * dev)
+{
+  
+  PrintDebug("vnic: reset device\n");
+
+  init_vnic_context(dev);
+
+  return 0;
+}
+
+
+//for 0xc11f port
+static int vnic_reset_port_read(ushort_t port,
+                                                       void * dst,
+                                                       uint_t length,
+                                                      struct vm_device *dev)
+{
+       uint32_t val = 0x0;
+
+       memcpy(dst, &val, length);
+
+       PrintDebug("vnic_read: port:0x%x (%u bytes): 0x%x\n", port,length, val);
+
+       vnic_reset_device(dev);
+
+       return length;
+}
+
+static int vnic_reset_port_write(ushort_t port,
+                                                       void * src,
+                                                       uint_t length,
+                                                       struct vm_device *dev)
+{
+       uint32_t val;
+
+       memcpy(&val, src, length);
+
+       PrintDebug("vnic_write: port:0x%x (%u bytes): 0x%x\n", port,length, val);
+                       
+       return length;
+}
+
+
+static int vnic_start_device(struct vm_device *dev)
+{
+  PrintDebug("vnic: start device\n");
+  
+  return 0;
+}
+
+
+static int vnic_stop_device(struct vm_device *dev)
+{
+  PrintDebug("vnic: stop device\n");
+  
+  return 0;
+}
+
+static void  init_phy_network()
+{      
+
+  V3_REGISTER_PKT_DELIVERY(&netif_input);
+  
+}
+
+static int vnic_init_device(struct vm_device * dev) 
+{
+  int i;
+
+  PrintDebug("vnic: init_device\n");
+
+  init_phy_network();
+  init_vnic_context(dev);
+
+  current_vnic = dev;  
+
+  for (i = 0; i < 16; i++){    
+       v3_dev_hook_io(dev, NIC_BASE_ADDR + i, &vnic_ioport_read, &vnic_ioport_write);
+  }
+  v3_dev_hook_io(dev, NIC_BASE_ADDR + NIC_DATA_PORT, &vnic_data_read, &vnic_data_write);
+  v3_dev_hook_io(dev, NIC_BASE_ADDR + NIC_RESET_PORT, &vnic_reset_port_read, &vnic_reset_port_write);
+
+  return 0;
+}
+
+
+
+static int vnic_deinit_device(struct vm_device *dev)
+{
+  int i;
+  
+  for (i = 0; i<16; i++){              
+       v3_dev_unhook_io(dev, NIC_BASE_ADDR + i);
+  }
+
+  v3_dev_unhook_io(dev, NIC_BASE_ADDR + NIC_DATA_PORT);
+  v3_dev_unhook_io(dev, NIC_BASE_ADDR + NIC_RESET_PORT);
+
+  //vnic_reset_device(dev);
+  
+  return 0;
+}
+
+
+static struct vm_device_ops dev_ops = { 
+  .init = vnic_init_device, 
+  .deinit = vnic_deinit_device,
+  .reset = vnic_reset_device,
+  .start = vnic_start_device,
+  .stop = vnic_stop_device,
+};
+
+
+struct vm_device *v3_create_vnic() 
+{
+  struct nic_context * nic_state = V3_Malloc(sizeof(struct nic_context));
+
+  //memset(nic_state, 0, sizeof(struct nic_context));
+
+  //PrintDebug("VNIC internal at %x\n",(int)nic_state);
+
+  struct vm_device *device = v3_create_device("VNIC", &dev_ops, nic_state);
+
+  return device;
+}
+
diff --git a/palacios/src/devices/vpci.c b/palacios/src/devices/vpci.c
new file mode 100644 (file)
index 0000000..2638569
--- /dev/null
@@ -0,0 +1,694 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2009, Lei Xia <lxia@northwestern.edu>
+ * Copyright (c) 2009, Chang Seok Bae <jhuell@gmail.com>
+ * Copyright (c) 2009, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author:  Lei Xia <lxia@northwestern.edu>
+ *             Chang Seok Bae <jhuell@gmail.com>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */ 
+/*
+ * Virtual PCI
+ */
+#include <devices/vpci.h>
+#include <palacios/vmm.h>
+#include <palacios/vmm_types.h>
+#include <palacios/vmm_io.h>
+#include <palacios/vmm_string.h>
+#include <palacios/vmm_intr.h>
+
+#define DEBUG_PCI
+
+
+#ifndef DEBUG_PCI
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+#define NUM_DEVICES 255
+#define NUM_BUS 1
+
+struct pci_bus {
+    int bus_num;
+    struct pci_device *device_list[NUM_DEVICES];
+    struct pci_bus *next;
+    struct vm_device *vm_dev;
+};
+
+struct pci_internal {
+  uint_t       num_buses;
+  uint32_t   config_address;   //current value of corresponding to configure port
+  struct pci_bus *bus_list[NUM_BUS];
+};
+
+
+struct port_ops_map {
+       uint32_t port;
+       int (*port_read)(ushort_t port, void * dst, uint_t length, struct vm_device *vdev);
+       int (*port_write)(ushort_t port, void * src, uint_t length, struct vm_device *vdev);
+};
+
+
+//Lei
+struct pci_device * get_device (struct vm_device *vmdev, uchar_t bus_no, uchar_t devfn_no)
+{
+       struct pci_device *dev = NULL;
+       struct pci_bus *bus = NULL;
+       struct pci_internal * pci_state;
+
+       if (bus_no >= NUM_BUS || devfn_no >= NUM_DEVICES)
+               return dev;
+       
+       pci_state = (struct pci_internal *)vmdev->private_data;
+       bus = pci_state->bus_list[bus_no];
+       if (bus)
+               dev = bus->device_list[devfn_no];
+
+       return dev;
+}
+
+
+//Lei
+int pci_hook_ports(struct pci_device *dev, 
+                                       int reg_num, 
+                                       int num_ports, 
+                                       port_read_fn *port_reads[], 
+                                       port_write_fn *port_writes[])
+{
+       struct pci_ioregion *ioreg;
+
+       ioreg = dev->ioregion[reg_num];
+
+       if (!ioreg) return -1;
+
+       if (ioreg->size != num_ports) return -1;
+
+       ioreg->port_reads = port_reads;
+       ioreg->port_writes = port_writes;
+
+       return 0;
+}
+
+//Lei
+static inline void hook_ioregion(struct pci_device *dev, struct pci_ioregion *ioreg)
+{
+       int i;
+
+       if (ioreg->addr == -1) return;
+       if (ioreg->type == PCI_ADDRESS_SPACE_IO){
+               for (i = 0; i < ioreg->size; i++)
+                       if (ioreg->port_reads[i] || ioreg->port_writes[i])
+                               v3_dev_hook_io(dev->bus->vm_dev, 
+                                                               ioreg->addr + i, 
+                                                               ioreg->port_reads[i], 
+                                                               ioreg->port_writes[i]);
+       }
+
+}
+
+
+//Chang
+static uint32_t vpci_read_config(struct pci_device *pdev, uchar_t offset, int len) 
+{
+       uint32_t val = 0x0;
+
+       switch(len) {
+               case 4:
+                       if(offset <= 0xfc) {
+                               val = *(uint32_t *)(&(pdev->config)+offset);
+                               break;
+                       }
+               case 2:
+                       if(offset <= 0xfe) {
+                               val = *(uint16_t *)(&(pdev->config)+offset);
+                               break;
+                       }
+               case 1:
+                       val = *(uint8_t *)(&(pdev->config)+offset);
+                       break;
+               default:
+                       break;                  
+       }
+       
+       return val;
+}
+
+//Lei
+static void vpci_write_config(struct pci_device *dev, uchar_t offset, uint32_t val, int len)
+{
+    uchar_t *dev_config;
+
+    dev_config = (uchar_t *)&(dev->config);
+    dev_config += offset;
+
+    switch(len){
+       case 1:
+               *dev_config = val & 0xff;
+               break;
+       case 2:
+               *((uint16_t *)dev_config) = val & 0xffff;
+               break;
+       case 4:
+               *((uint32_t *)dev_config) = val;
+               break;
+       default:
+               PrintDebug("pci_write_config: wrong length %d\n", len);
+               break;
+       }
+}
+
+//Lei
+void vpci_raise_irq(struct pci_device *pdev, void *data)
+{
+       struct guest_info *vm;
+       int irq_line;
+
+       vm = pdev->bus->vm_dev->vm;
+       irq_line = pdev->config.intr_line;
+       v3_raise_irq(vm, irq_line);
+}
+
+#if 0
+//Chang
+static void pci_write_config(struct pci_device *dev, uint32_t address, uint32_t val, int len) 
+{
+       int can_write, i, reg_num;
+       uint32_t addr;
+
+       if(len == 4 && ((address >= 0x10 && address < 0x10 + 4 * 6) || //base address registers
+               (address >= 0x30 && address < 0x34))) { //expansion rom base address
+
+               struct pci_ioregion * ioregion;
+               if(address >= 0x30) {
+                               reg_num = PCI_ROM_SLOT;
+               }else {
+                       reg_num = ((address - 0x10) >>2); 
+               }
+
+               ioregion = &dev->io_regions[reg_num];
+
+               if(ioregion->size == 0) {//default config
+
+               addr = address;
+                       for (i=0;i<len;i++) {
+                               switch(*(uint8_t *)(&(dev->config)+0x0e)) {
+                               case 0x00:
+                               case 0x80:
+                                       switch(addr) {
+                                       case 0x00:
+                                       case 0x01:
+                                       case 0x02:
+                                       case 0x03:
+                                       case 0x08:
+                                       case 0x09:
+                                       case 0x0a:
+                                       case 0x0b:
+                                       case 0x0e:
+                                       case 0x10 ... 0x27:
+                                       case 0x3d:
+                                               can_write = 0;
+                                               break;
+                                       default:
+                                               can_write = 1;
+                                               break;
+                                       }
+                                       break;  
+                               default:
+                               case 0x01:
+                                       switch(addr) {
+                                       case 0x00:
+                                       case 0x01:
+                                       case 0x02:
+                                       case 0x03:
+                                       case 0x08:
+                                       case 0x09:
+                                       case 0x0a:
+                                       case 0x0b:
+                                       case 0x0e:
+                                       case 0x38 ... 0x3b: 
+                                       case 0x3d:
+                                               can_write = 0;
+                                               break;
+                                       default:
+                                               can_write = 1;
+                                               break;
+                                       }
+                                       break;
+                               }
+                               if (can_write) {
+                                       *(uint32_t *)(&(dev->config)+addr) = val;
+                               }
+                               if(++addr > 0xff) break;
+                               val >>= 8;
+                       }
+
+                       return;
+       
+               }else {
+                       if(reg_num== PCI_ROM_SLOT) {
+                               val &= (~(ioregion->size -1 )) | 1;
+                       } else {
+                               val &= ~(ioregion->size -1);
+                               val |= ioregion->type;
+                       }
+               }
+               //pci_update_mappings();
+               return;
+       }
+}
+#endif
+
+/* -1 for dev_num means auto assign */
+struct pci_device *
+pci_register_device(struct pci_bus *bus, const char *name,
+                               int instance_size, int dev_num,
+                               uint32_t (*config_read)(struct pci_device *pci_dev, uchar_t addr, int len),
+                               void (*config_write)(struct pci_device *pci_dev, uchar_t addr, uint32_t val, int len)) 
+{
+
+       struct pci_device * pci_dev;
+       int found = 0;
+       int i;
+       
+       if(dev_num < 0) {
+               for(dev_num = 0; dev_num < 256; dev_num++) {
+                       if(!bus->device_list[dev_num]) { 
+                               found = 1;
+                               break;
+                       }
+               }
+       }
+       if (found == 0) return NULL;
+
+       pci_dev = (struct pci_device *)V3_Malloc(sizeof(struct pci_device));
+
+       if(!pci_dev) return NULL;
+
+       pci_dev->bus = bus;
+       pci_dev->dev_num = dev_num;
+       pci_dev->irqline = -1;
+
+       strcpy(pci_dev->name,name);
+       
+       if(config_read) 
+              pci_dev->ops.config_read = config_read;
+       else
+               pci_dev->ops.config_read=&vpci_read_config;
+       if(config_write) 
+               pci_dev->ops.config_write = config_write;
+       else
+               pci_dev->ops.config_write=&vpci_write_config;
+
+       pci_dev->ops.raise_irq = &vpci_raise_irq;
+
+      for (i = 0; i < PCI_IO_REGIONS; i++)
+               pci_dev->ioregion[i] = NULL;
+
+      //config space initiate
+
+       bus->device_list[dev_num] = pci_dev;
+
+       return pci_dev;
+}
+
+//Chang
+static void init_fake_device(struct pci_internal *pci_state) 
+{
+       //maybe need table to map device, but just 
+       //bus_num=0, dev_num=0
+       
+       //int i=0;
+       struct pci_device *fake_device;
+
+       //fake dev
+       fake_device = pci_register_device(pci_state->bus_list[0],
+                               "fake ide", sizeof(struct pci_device),
+                               -1,
+                               NULL,NULL);
+       
+       if (!fake_device) return;
+
+       /*
+       intel, ide ctroller
+       vendor id:0x8086
+       device id: 0x1222
+       */
+       fake_device->config.vendor_id = 0x8086;
+       fake_device->config.device_id = 0x1222;
+       fake_device->config.command = 0x0;
+       fake_device->config.status = 0x0;
+       fake_device->config.revision = 0x07;
+       fake_device->config.class_code[0] = 0x1;
+       fake_device->config.class_code[1] = 0x1;
+       fake_device->config.class_code[2] = 0x1;
+       fake_device->config.header_type = 0x0;
+       //base address
+       fake_device->config.BAR[0] = 0x1F0; 
+       fake_device->config.BAR[1] = 0; 
+       fake_device->config.BAR[2] = 0; 
+       fake_device->config.BAR[3] = 0; 
+       fake_device->config.BAR[4] = 0; 
+       fake_device->config.BAR[5] = 0; 
+       
+       //fake dev end
+
+       //need to register io regions
+
+       pci_state->bus_list[0]->device_list[0] = fake_device;
+       fake_device->bus = pci_state->bus_list[0];
+       fake_device->next = NULL;
+       
+       return;
+}
+
+
+
+// Lei
+/* if region_num == -1, assign automatically
+ */
+int 
+pci_register_io_region(struct pci_device *pci_dev, int region_num,
+                                               uint32_t size, int type,
+                                               pci_mapioregion_fn *map_func)
+{
+       int found = 0;
+      struct pci_ioregion *region;
+
+       if(region_num < 0) {
+               for(region_num = 0; region_num < 256; region_num++) {
+                       if(!pci_dev->ioregion[region_num]) { 
+                               found = 1;
+                               break;
+                       }
+               }
+       }
+       if (found == 0) return -1;
+       if (pci_dev->ioregion[region_num] != NULL)
+               return -1;
+
+       region = (struct pci_ioregion *)V3_Malloc(sizeof(struct pci_ioregion));
+       if (!region) return -1;
+
+       region->addr = -1;
+       region->reg_num = region_num;
+       region->size = size;
+       region->mapped_size = -1;
+       region->type = type;
+       region->map_func = map_func;
+       region->port_reads = NULL;
+       region->port_writes = NULL;
+
+       pci_dev->ioregion[region_num] = region;
+
+       return region_num;
+}
+
+
+
+
+//Chang
+static int 
+vpci_addrport_read(ushort_t port,
+                                       void * dst,
+                                       uint_t length,
+                                       struct vm_device *dev) 
+{
+
+  struct pci_internal *pci_state = (struct pci_internal *)dev->private_data;
+  int start;
+  uchar_t *addr;
+  int i;
+
+  start = port - PCI_CONFIG_ADDRESS;
+  if (length + start > 4){
+       return length;   //cross port boundary, is memory mapped IO style
+  }
+  addr = (uchar_t *)&(pci_state->config_address);
+  addr += start;
+  memcpy(dst, addr, length);    //be careful, PCI is little endian
+
+  PrintDebug("PCI Address: reading %d bytes from port %x: 0x", length, port);
+
+  for (i = length - 1; i >= 0; i--) { 
+    PrintDebug("%.2x", ((uchar_t*)dst)[i]);
+  }
+   PrintDebug("\n");
+  return length;
+}
+
+//Lei
+static int 
+vpci_addrport_write(ushort_t port,
+                                               void *src,
+                                               uint_t length,
+                                               struct vm_device *dev)
+{
+  struct pci_internal *pci_state = (struct pci_internal *)dev->private_data;
+  int start;
+  uchar_t *addr;
+  int i;
+
+  start = port - PCI_CONFIG_ADDRESS;
+  if (length + start > 4){
+       return length;   //cross port boundary, is memory mapped IO style
+  }
+  addr = (uchar_t *)&(pci_state->config_address);
+  addr += start;
+  memcpy(addr, src, length);    //be careful, PCI is little endian
+
+  PrintDebug("PCI Address: writing %d bytes to port %x: 0x", length, port);
+
+  for (i = length - 1; i >= 0; i--) { 
+    PrintDebug("%.2x", ((uchar_t*)src)[i]);
+  }
+   PrintDebug("\n");
+  return length;
+}
+
+//Chang
+static int 
+vpci_dataport_read(ushort_t port,
+                                       void * dst,
+                                       uint_t length,
+                                       struct vm_device *vmdev) 
+{
+       /*
+       decode address of config_address
+       bus num         =       config_address[23:16]
+       device num = config_address[15:11]
+       func num =      config_address[10:08]
+       reg num =       config_address[07:02]
+       */
+       
+       struct pci_internal * pci_state;
+       struct pci_device * pci_dev = NULL;
+       int bus_num, devfn, offset;
+       uint32_t address;
+       uint32_t val;
+       int i;
+
+       if (length > 4){
+               PrintDebug("Read more than 4 bytes from port 0x%x\n", (int)port);
+               return length;
+       }
+
+       pci_state = (struct pci_internal *)vmdev->private_data;
+       address = pci_state->config_address;
+       offset = address & 0xff;
+       devfn = (address >> 8) & 0xff;
+       bus_num = (address >> 16) & 0xff; 
+
+       pci_dev = get_device(vmdev, bus_num, devfn);
+
+       if(!pci_dev) {
+               val = 0xffffffff;
+       }else {
+               val = 0x0;
+               val = pci_dev->ops.config_read(pci_dev, offset, length);
+       }
+       memcpy(dst,&val,length);
+
+       PrintDebug("PCI Data: reading %d bytes from port %x: 0x", length, port);
+
+       for (i = length - 1; i >= 0; i--) { 
+               PrintDebug("%.2x", ((uchar_t*)dst)[i]);
+       }
+      PrintDebug("\n");
+               
+       return length;
+
+}
+
+static int 
+vpci_dataport_write(ushort_t port,
+                                               void * src,
+                                               uint_t length,
+                                               struct vm_device *vmdev)
+{
+  struct pci_internal *pci_state;
+  uint32_t val;
+  uint32_t address;
+  struct pci_device *pdev;
+  char bus, devfn, offset;
+  int i;
+
+  if (length > 4){
+               PrintDebug("Write more than 4 bytes to port 0x%x\n", (int)port);
+               return length;
+  }
+
+  pci_state = (struct pci_internal *)vmdev->private_data;
+  address = pci_state->config_address;
+  offset = address & 0xff;
+  devfn = (address >> 8) & 0xff;
+  bus = (address >> 16) & 0xff; 
+
+  pdev = get_device(vmdev, bus, devfn);
+  if (pdev == NULL){
+       // not sure what to do here, just ignore it
+       return length;
+  }
+  
+  val = 0x0;
+  memcpy(&val, src, length);
+  
+  pdev->ops.config_write(pdev, offset, val, length);
+
+  PrintDebug("PCI Data: writing %d bytes to port %x: 0x", length, port);
+
+  for (i = length - 1; i >= 0; i--) { 
+               PrintDebug("%.2x", ((uchar_t*)src)[i]);
+ }
+  PrintDebug("\n");
+  
+  return length;
+}
+       
+
+//Lei
+static void init_pci_bus(struct pci_internal *pci_state) 
+{
+  int i;
+  struct pci_bus *first_bus;
+
+  first_bus = (struct pci_bus *)V3_Malloc(sizeof(struct pci_bus));
+
+  first_bus->bus_num = 0;  //?? not sure
+  for (i = 0; i < NUM_DEVICES; i++)
+       first_bus->device_list[i] = NULL;
+  first_bus->next = NULL;
+
+  pci_state->num_buses = 1;
+  pci_state->bus_list[0] = first_bus;
+  for (i=1; i<NUM_BUS; i++)
+       pci_state->bus_list[i] = NULL;
+}
+
+//Lei
+static void init_pci_internal(struct pci_internal *pci_state) 
+{
+
+  pci_state->config_address = 0x00;  //Not sure????
+  init_pci_bus(pci_state);
+
+}
+
+
+static int vpci_set_defaults(struct vm_device *dev)
+{
+  PrintDebug("vpci: set defaults\n");
+  return 0;
+}
+
+
+static int vpci_reset_device(struct vm_device * dev)
+{
+  
+  PrintDebug("vpci: reset device\n");
+
+  vpci_set_defaults(dev);
+  
+  return 0;
+}
+
+
+static int vpci_start_device(struct vm_device *dev)
+{
+  PrintDebug("vpci: start device\n");
+  
+  return 0;
+}
+
+
+int vpci_stop_device(struct vm_device *dev)
+{
+  PrintDebug("vpci: stop device\n");
+  
+  return 0;
+}
+
+
+int vpci_init_device(struct vm_device * dev) 
+{
+  struct pci_internal *pci_state;
+  int i;
+
+  PrintDebug("vpci: init_device\n");
+  
+  pci_state = (struct pci_internal *)dev->private_data;
+
+  init_pci_internal(pci_state);
+
+  init_fake_device(pci_state); //Chang
+
+  for (i = 0; i<4; i++){
+       v3_dev_hook_io(dev, PCI_CONFIG_ADDRESS + i, &vpci_addrport_read, &vpci_addrport_write);
+       v3_dev_hook_io(dev, PCI_CONFIG_DATA + i, &vpci_dataport_read, &vpci_dataport_write);
+ }
+
+
+  return 0;
+}
+
+int vpci_deinit_device(struct vm_device *dev)
+{
+   int i;
+   
+   for (i = 0; i<4; i++){
+       v3_dev_unhook_io(dev, PCI_CONFIG_ADDRESS + i);
+       v3_dev_unhook_io(dev, PCI_CONFIG_DATA + i);
+  }
+
+  vpci_reset_device(dev);
+  return 0;
+}
+
+static struct vm_device_ops dev_ops = { 
+  .init = vpci_init_device, 
+  .deinit = vpci_deinit_device,
+  .reset = vpci_reset_device,
+  .start = vpci_start_device,
+  .stop = vpci_stop_device,
+};
+
+struct vm_device *v3_create_vpci() {
+  struct pci_internal * pci_state = V3_Malloc(sizeof(struct pci_internal));
+
+  PrintDebug("PCI internal at %x\n",(int)(long)pci_state);
+  struct vm_device *device = v3_create_device("PCI", &dev_ops, pci_state);
+
+  return device;
+}