From: Lei Xia <lxia@northwestern.edu>
Date: Thu, 28 Apr 2011 04:12:42 +0000 (-0500)
Subject: Multithread handling of VNET
X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=fdb16aa5e99a19a91264ae84beaf77d3036548d5;p=palacios.git

Multithread handling of VNET
Debug Level output
Etc
---

diff --git a/palacios/include/palacios/vmm_dev_mgr.h b/palacios/include/palacios/vmm_dev_mgr.h
index e789207..c9999bd 100644
--- a/palacios/include/palacios/vmm_dev_mgr.h
+++ b/palacios/include/palacios/vmm_dev_mgr.h
@@ -179,11 +179,10 @@ struct v3_dev_blk_ops {
 
 struct v3_dev_net_ops {
     /* Backend implemented functions */
-    int (*send)(uint8_t * buf, uint32_t count, void * private_data);
+    int (*send)(uint8_t * buf, uint32_t len, int synchronize, void * private_data);
 
     /* Frontend implemented functions */
-    int (*recv)(uint8_t * buf, uint32_t count, void * frnt_data);
-    void (*poll)(struct v3_vm_info * vm, int budget, void * frnt_data);
+    int (*recv)(uint8_t * buf, uint32_t len, void * frnt_data);
 
     /* This is ugly... */
     void * frontend_data; 
diff --git a/palacios/include/palacios/vmm_ethernet.h b/palacios/include/palacios/vmm_ethernet.h
index 3794d77..6c879dc 100644
--- a/palacios/include/palacios/vmm_ethernet.h
+++ b/palacios/include/palacios/vmm_ethernet.h
@@ -25,21 +25,39 @@
 #define ETHERNET_PACKET_LEN (ETHERNET_HEADER_LEN + ETHERNET_MTU)
 #define ETH_ALEN 6
 
+#define MIN_MTU 68
+#define MAX_MTU 65535
+
+#define MAX_PACKET_LEN (ETHERNET_HEADER_LEN + MAX_MTU)
+
+
+extern int v3_net_debug;
 
 #ifdef __V3VEE__
 
 #include <palacios/vmm.h>
 
+#define V3_Net_Print(level, fmt, args...)					\
+    do {								\
+	if(level <= v3_net_debug) {   \
+	    extern struct v3_os_hooks * os_hooks;			\
+	    if ((os_hooks) && (os_hooks)->print) {			\
+	    	(os_hooks)->print((fmt), ##args);			\
+	    }							\
+	}							\
+    } while (0)	
+
 struct nic_statistics {
-    uint32_t tx_pkts;
+    uint64_t tx_pkts;
     uint64_t tx_bytes;
-    uint32_t tx_dropped;
+    uint64_t tx_dropped;
 	
-    uint32_t rx_pkts;
+    uint64_t rx_pkts;
     uint64_t rx_bytes;
-    uint32_t rx_dropped;
+    uint64_t rx_dropped;
 
-    uint32_t interrupts;
+    uint32_t tx_interrupts;
+    uint32_t rx_interrupts;
 };
     
 static inline int is_multicast_ethaddr(const uint8_t * addr)
diff --git a/palacios/include/palacios/vmm_vnet.h b/palacios/include/palacios/vmm_vnet.h
index 1750fff..0f8c793 100644
--- a/palacios/include/palacios/vmm_vnet.h
+++ b/palacios/include/palacios/vmm_vnet.h
@@ -19,8 +19,8 @@
  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  */
 
-#ifndef __VNET_H__
-#define __VNET_H__
+#ifndef __VNET_CORE_H__
+#define __VNET_CORE_H__
 
 #include <palacios/vmm.h>
 #include <palacios/vmm_ethernet.h>
@@ -38,7 +38,8 @@
 
 #define VNET_HASH_SIZE 	17
 
-//routing table entry
+extern int v3_vnet_debug;
+
 struct v3_vnet_route {
     uint8_t src_mac[ETH_ALEN];
     uint8_t dst_mac[ETH_ALEN];
@@ -100,7 +101,7 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm,
 		uint8_t type,
 		void * priv_data);
 int v3_vnet_add_route(struct v3_vnet_route route);
-int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data);
+int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data, int synchronize);
 int v3_vnet_find_dev(uint8_t  * mac);
 int v3_vnet_stat(struct vnet_stat * stats);
 
@@ -110,19 +111,17 @@ struct v3_vnet_dev_ops {
     int (*input)(struct v3_vm_info * vm, 
 		struct v3_vnet_pkt * pkt, 
 		void * dev_data);
-    void (*poll) (struct v3_vm_info * vm, int budget, void * dev_data);
 };
 
 int v3_init_vnet(void);	
 void v3_deinit_vnet(void);
 
-void v3_vnet_do_poll(struct v3_vm_info * vm);
-
 int v3_vnet_add_dev(struct v3_vm_info * info, uint8_t * mac, 
 		    struct v3_vnet_dev_ops * ops,
 		    void * priv_data);
 int v3_vnet_del_dev(int dev_id);
 
+
 #endif
 
 #endif
diff --git a/palacios/src/devices/lnx_virtio_nic.c b/palacios/src/devices/lnx_virtio_nic.c
index bb13a69..8b9017f 100644
--- a/palacios/src/devices/lnx_virtio_nic.c
+++ b/palacios/src/devices/lnx_virtio_nic.c
@@ -38,38 +38,62 @@
 #define PrintDebug(fmt, args...)
 #endif
 
+#define TX_QUEUE_SIZE 4096
+#define RX_QUEUE_SIZE 4096
+#define CTRL_QUEUE_SIZE 64
+
+/* The feature bitmap for virtio nic
+  * from Linux */
+#define VIRTIO_NET_F_CSUM       0       /* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM 1       /* Guest handles pkts w/ partial csum */
+#define VIRTIO_NET_F_MAC        5       /* Host has given MAC address. */
+#define VIRTIO_NET_F_GSO        6       /* Host handles pkts w/ any GSO type */
+#define VIRTIO_NET_F_GUEST_TSO4 7       /* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6 8       /* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN  9       /* Guest can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_GUEST_UFO  10      /* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4  11      /* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6  12      /* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN   13      /* Host can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO   14      /* Host can handle UFO in. */
+#define VIRTIO_NET_F_MRG_RXBUF  15      /* Host can merge receive buffers. */
+#define VIRTIO_NET_F_STATUS     16      /* virtio_net_config.status available */
+
+/* Port to get virtio config */
+#define VIRTIO_NET_CONFIG 20  
+
 #define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10))
 
+/* for gso_type in virtio_net_hdr */
+#define VIRTIO_NET_HDR_GSO_NONE         0      
+#define VIRTIO_NET_HDR_GSO_TCPV4        1     /* GSO frame, IPv4 TCP (TSO) */
+#define VIRTIO_NET_HDR_GSO_UDP          3       /* GSO frame, IPv4 UDP (UFO) */
+#define VIRTIO_NET_HDR_GSO_TCPV6        4       /* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_ECN          0x80    /* TCP has ECN set */	
 
-struct virtio_net_hdr {
-	uint8_t flags;
-	
-	uint8_t gso_type;
-	uint16_t hdr_len;		/* Ethernet + IP + tcp/udp hdrs */
-	uint16_t gso_size;		/* Bytes to append to hdr_len per frame */
-	uint16_t csum_start;		/* Position to start checksumming from */
-	uint16_t csum_offset;		/* Offset after that to place checksum */
-}__attribute__((packed));
 
+/* for flags in virtio_net_hdr */
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM     1       /* Use csum_start, csum_offset */
 
-struct virtio_net_hdr_mrg_rxbuf {
-	struct virtio_net_hdr hdr;
-	uint16_t num_buffers;	/* Number of merged rx buffers */
-};
 
-	
-#define TX_QUEUE_SIZE 256
-#define RX_QUEUE_SIZE 4096
-#define CTRL_QUEUE_SIZE 64
+/* First element of the scatter-gather list, used with GSO or CSUM features */
+struct virtio_net_hdr
+{
+    uint8_t flags;
+    uint8_t gso_type;
+    uint16_t hdr_len;		/* Ethernet + IP + tcp/udp hdrs */
+    uint16_t gso_size;		/* Bytes to append to hdr_len per frame */
+    uint16_t csum_start;	/* Position to start checksumming from */
+    uint16_t csum_offset;	/* Offset after that to place checksum */
+}__attribute__((packed));
 
-#define VIRTIO_NET_F_MRG_RXBUF	15	/* Host can merge receive buffers. */
-#define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
-#define VIRTIO_NET_F_GSO	6	/* Host handles pkts w/ any GSO type */
-#define VIRTIO_NET_F_HOST_TSO4	11	/* Host can handle TSOv4 in. */
-#define VIRTIO_NET_F_HOST_UFO	14	/* Host can handle UFO in. */
 
-/* Port to get virtio config */
-#define VIRTIO_NET_CONFIG 20  
+/* The header to use when the MRG_RXBUF 
+ * feature has been negotiated. */
+struct virtio_net_hdr_mrg_rxbuf {
+    struct virtio_net_hdr hdr;
+    uint16_t num_buffers;	/* Number of merged rx buffers */
+};
 
 struct virtio_net_config
 {
@@ -89,6 +113,7 @@ struct virtio_net_state {
     struct virtio_net_config net_cfg;
     struct virtio_config virtio_cfg;
 
+    struct v3_vm_info * vm;
     struct vm_device * dev;
     struct pci_device * pci_dev; 
     int io_range_size;
@@ -98,21 +123,23 @@ struct virtio_net_state {
     struct virtio_queue ctrl_vq;  	/* idx 2*/
 
     struct v3_timer * timer;
+    void * poll_thread;
 
-    struct nic_statistics statistics;
+    struct nic_statistics stats;
 
     struct v3_dev_net_ops * net_ops;
     v3_lock_t rx_lock, tx_lock;
 
     uint8_t tx_notify, rx_notify;
     uint32_t tx_pkts, rx_pkts;
-    uint64_t past_ms;
+    uint64_t past_us;
 
     void * backend_data;
     struct virtio_dev_state * virtio_dev;
     struct list_head dev_link;
 };
 
+
 static int virtio_init_state(struct virtio_net_state * virtio) 
 {
     virtio->rx_vq.queue_size = RX_QUEUE_SIZE;
@@ -139,9 +166,10 @@ static int virtio_init_state(struct virtio_net_state * virtio)
 
     virtio->virtio_cfg.pci_isr = 0;
 	
-    virtio->virtio_cfg.host_features = 0 | (1 << VIRTIO_NET_F_MAC) | 
-								(1 << VIRTIO_NET_F_HOST_UFO) | 
-								(1 << VIRTIO_NET_F_HOST_TSO4);
+    virtio->virtio_cfg.host_features = 0 | (1 << VIRTIO_NET_F_MAC);
+	//				   (1 << VIRTIO_NET_F_GSO) | 
+	//				   (1 << VIRTIO_NET_F_HOST_UFO) | 
+		//			   (1 << VIRTIO_NET_F_HOST_TSO4);
 
     if ((v3_lock_init(&(virtio->rx_lock)) == -1) ||
 	(v3_lock_init(&(virtio->tx_lock)) == -1)){
@@ -152,32 +180,36 @@ static int virtio_init_state(struct virtio_net_state * virtio)
 }
 
 static int tx_one_pkt(struct guest_info * core, 
-       struct virtio_net_state * virtio, 
-       struct vring_desc * buf_desc) 
+		      struct virtio_net_state * virtio, 
+		      struct vring_desc * buf_desc) 
 {
     uint8_t * buf = NULL;
     uint32_t len = buf_desc->length;
+    int synchronize = 1; // (virtio->tx_notify == 1)?1:0;
 
     if (v3_gpa_to_hva(core, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) {
 	PrintDebug("Could not translate buffer address\n");
 	return -1;
     }
 
-    if(virtio->net_ops->send(buf, len, virtio->backend_data) >= 0){
-	virtio->statistics.tx_pkts ++;
-	virtio->statistics.tx_bytes += len;
+    V3_Net_Print(2, "Virtio-NIC: virtio_tx: size: %d\n", len);
+    if(v3_net_debug >= 4){
+	v3_hexdump(buf, len, NULL, 0);
+    }
 
-	return 0;
+    if(virtio->net_ops->send(buf, len, synchronize, virtio->backend_data) < 0){
+	virtio->stats.tx_dropped ++;
+	return -1;
     }
 
-    virtio->statistics.tx_dropped ++;
+    virtio->stats.tx_pkts ++;
+    virtio->stats.tx_bytes += len;
 
-    return -1;
+    return 0;
 }
 
 
-static int 
-copy_data_to_desc(struct guest_info * core, 
+static inline int copy_data_to_desc(struct guest_info * core, 
 		  struct virtio_net_state * virtio_state, 
 		  struct vring_desc * desc, 
 		  uchar_t * buf, 
@@ -188,7 +220,7 @@ copy_data_to_desc(struct guest_info * core,
     uint8_t * desc_buf = NULL;
 
     if (v3_gpa_to_hva(core, desc->addr_gpa, (addr_t *)&(desc_buf)) == -1) {
-	PrintError("Could not translate buffer address\n");
+	PrintDebug("Could not translate buffer address\n");
 	return -1;
     }
     len = (desc->length < buf_len)?(desc->length - offset):buf_len;
@@ -198,7 +230,7 @@ copy_data_to_desc(struct guest_info * core,
 }
 
 
-static int get_desc_count(struct virtio_queue * q, int index) {
+static inline int get_desc_count(struct virtio_queue * q, int index) {
     struct vring_desc * tmp_desc = &(q->desc[index]);
     int cnt = 1;
     
@@ -218,32 +250,10 @@ static inline void disable_cb(struct virtio_queue *queue) {
     queue->used->flags |= VRING_NO_NOTIFY_FLAG;
 }
 
-
-/* interrupt the guest, so the guest core get EXIT to Palacios */
-static inline void notify_guest(struct virtio_net_state * virtio){
-    v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0);
-}
-
-
-/* guest free some pkts for rx queue */
-static int handle_rx_queue_kick(struct guest_info * core, 
-			  struct virtio_net_state * virtio) 
-{
-    return 0;
-}
-
-
-static int handle_ctrl(struct guest_info * core, 
-		       struct virtio_net_state * virtio) {
-	
-    return 0;
-}
-
 static int handle_pkt_tx(struct guest_info * core, 
 			 struct virtio_net_state * virtio_state) 
 {
     struct virtio_queue *q = &(virtio_state->tx_vq);
-    struct virtio_net_hdr *hdr = NULL;
     int txed = 0;
     unsigned long flags;
 
@@ -253,12 +263,16 @@ static int handle_pkt_tx(struct guest_info * core,
 
     flags = v3_lock_irqsave(virtio_state->tx_lock);
     while (q->cur_avail_idx != q->avail->index) {
+	struct virtio_net_hdr *hdr = NULL;
 	struct vring_desc * hdr_desc = NULL;
 	addr_t hdr_addr = 0;
 	uint16_t desc_idx = q->avail->ring[q->cur_avail_idx % q->queue_size];
 	int desc_cnt = get_desc_count(q, desc_idx);
-	uint32_t req_len = 0;
-	int i = 0;
+
+	if(desc_cnt > 2){
+	    PrintError("VNIC: merged rx buffer not supported, desc_cnt %d\n", desc_cnt);
+	    goto exit_error;
+	}
 
 	hdr_desc = &(q->desc[desc_idx]);
 	if (v3_gpa_to_hva(core, hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
@@ -269,25 +283,15 @@ static int handle_pkt_tx(struct guest_info * core,
 	hdr = (struct virtio_net_hdr *)hdr_addr;
 	desc_idx = hdr_desc->next;
 
-	if(desc_cnt > 2){
-	    PrintError("VNIC: merged rx buffer not supported, desc_cnt %d\n", desc_cnt);
+	/* here we assumed that one ethernet pkt is not splitted into multiple buffer */	
+	struct vring_desc * buf_desc = &(q->desc[desc_idx]);
+	if (tx_one_pkt(core, virtio_state, buf_desc) == -1) {
+	    PrintError("Virtio NIC: Error handling nic operation\n");
 	    goto exit_error;
 	}
-
-	/* here we assumed that one ethernet pkt is not splitted into multiple virtio buffer */
-	for (i = 0; i < desc_cnt - 1; i++) {	
-	    struct vring_desc * buf_desc = &(q->desc[desc_idx]);
-	    if (tx_one_pkt(core, virtio_state, buf_desc) == -1) {
-		PrintError("Error handling nic operation\n");
-		goto exit_error;
-	    }
-
-	    req_len += buf_desc->length;
-	    desc_idx = buf_desc->next;
-	}
-
+	    
 	q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size];
-	q->used->ring[q->used->index % q->queue_size].length = req_len; /* What do we set this to???? */
+	q->used->ring[q->used->index % q->queue_size].length = buf_desc->length; /* What do we set this to???? */
 	q->used->index ++;
 	
 	q->cur_avail_idx ++;
@@ -297,11 +301,16 @@ static int handle_pkt_tx(struct guest_info * core,
 
     v3_unlock_irqrestore(virtio_state->tx_lock, flags);
 	
-    if (txed && !(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
+    if (virtio_state->virtio_cfg.pci_isr == 0 && 
+	  txed && !(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
 	v3_pci_raise_irq(virtio_state->virtio_dev->pci_bus, 0, virtio_state->pci_dev);
 	virtio_state->virtio_cfg.pci_isr = 0x1;
 
-	virtio_state->statistics.interrupts ++;
+	virtio_state->stats.rx_interrupts ++;
+    }
+
+    if(txed > 0) {
+	V3_Net_Print(2, "Virtio Handle TX: txed pkts: %d\n", txed);
     }
 
     return 0;
@@ -386,6 +395,10 @@ static int virtio_io_write(struct guest_info *core,
 		    break;
 		case 1:
 		    virtio_setup_queue(core, virtio, &virtio->tx_vq, pfn, page_addr);
+		    if(virtio->tx_notify == 0){
+	 		disable_cb(&virtio->tx_vq);
+			V3_THREAD_WAKEUP(virtio->poll_thread);
+    		    }
 		    break;
 		case 2:
 		    virtio_setup_queue(core, virtio, &virtio->ctrl_vq, pfn, page_addr);
@@ -408,20 +421,16 @@ static int virtio_io_write(struct guest_info *core,
 	    {
 		uint16_t queue_idx = *(uint16_t *)src;	   		
 		if (queue_idx == 0){
-		    if(handle_rx_queue_kick(core, virtio) == -1){
-			PrintError("Could not handle Virtio NIC rx kick\n");
-			return -1;
-		    }
+		    /* receive queue refill */
+		    virtio->stats.tx_interrupts ++;
 		} else if (queue_idx == 1){
 		    if (handle_pkt_tx(core, virtio) == -1) {
 			PrintError("Could not handle Virtio NIC tx kick\n");
 			return -1;
 		    }
+		    virtio->stats.tx_interrupts ++;
 		} else if (queue_idx == 2){
-		    if (handle_ctrl(core, virtio) == -1) {
-			PrintError("Could not handle Virtio NIC ctrl kick\n");
-			return -1;
-		    }
+		    /* ctrl */
 		} else {
 		    PrintError("Wrong queue index %d\n", queue_idx);
 		}	
@@ -462,7 +471,7 @@ static int virtio_io_read(struct guest_info *core,
 	case HOST_FEATURES_PORT:
 	    if (length != 4) {
 		PrintError("Illegal read length for host features\n");
-		return -1;
+		//return -1;
 	    }
 	    *(uint32_t *)dst = virtio->virtio_cfg.host_features;
 	    break;
@@ -544,10 +553,10 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) {
     uint32_t offset = 0;
     unsigned long flags;
 
-#ifdef CONFIG_DEBUG_VIRTIO_NET
-    PrintDebug("Virtio-NIC: virtio_rx: size: %d\n", size);	
-    v3_hexdump(buf, size, NULL, 0);
-#endif
+    V3_Net_Print(2, "Virtio-NIC: virtio_rx: size: %d\n", size);
+    if(v3_net_debug >= 4){
+	v3_hexdump(buf, size, NULL, 0);
+    }
 
     flags = v3_lock_irqsave(virtio->rx_lock);
 
@@ -555,29 +564,28 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) {
     memset(&hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
 
     if (q->ring_avail_addr == 0) {
-	PrintDebug("Queue is not set\n");
+	V3_Net_Print(2, "Virtio NIC: RX Queue not set\n");
+	virtio->stats.rx_dropped ++;
 	goto err_exit;
     }
 
     if (q->cur_avail_idx != q->avail->index){
 	addr_t hdr_addr = 0;
-	uint16_t hdr_idx = q->avail->ring[q->cur_avail_idx % q->queue_size];
 	uint16_t buf_idx = 0;
+	uint16_t hdr_idx = q->avail->ring[q->cur_avail_idx % q->queue_size];
 	struct vring_desc * hdr_desc = NULL;
+	struct vring_desc * buf_desc = NULL;
+	uint32_t len;
 
 	hdr_desc = &(q->desc[hdr_idx]);
 	if (v3_gpa_to_hva(&(virtio->virtio_dev->vm->cores[0]), hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
-	    PrintDebug("Could not translate receive buffer address\n");
+	    V3_Net_Print(2, "Virtio NIC: Could not translate receive buffer address\n");
+	    virtio->stats.rx_dropped ++;
 	    goto err_exit;
 	}
-	hdr.num_buffers = 1;
-	memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf));
-	if (offset >= data_len) {
-	    hdr_desc->flags &= ~VIRTIO_NEXT_FLAG;
-	}
 
-	struct vring_desc * buf_desc = NULL;
-	for (buf_idx = hdr_desc->next; offset < data_len; buf_idx = q->desc[hdr_idx].next) {
+#if 0 /* merged buffer */
+	for(buf_idx = hdr_desc->next; offset < data_len; buf_idx = q->desc[hdr_idx].next) {
 	    uint32_t len = 0;
 	    buf_desc = &(q->desc[buf_idx]);
 
@@ -587,33 +595,63 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) {
 		buf_desc->flags = VIRTIO_NEXT_FLAG;		
 	    }
 	    buf_desc->length = len;
+	    hdr.num_buffers ++;
 	}
 	buf_desc->flags &= ~VIRTIO_NEXT_FLAG;
-	
+	memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf));
+#endif
+
+	hdr.num_buffers = 1;
+	memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf));
+	if (data_len == 0) {
+	    hdr_desc->flags &= ~VIRTIO_NEXT_FLAG;
+	}
+
+	buf_idx = hdr_desc->next;
+	buf_desc = &(q->desc[buf_idx]);
+	len = copy_data_to_desc(&(virtio->virtio_dev->vm->cores[0]), virtio, buf_desc, buf, data_len, 0);	    
+	if (len < data_len) {
+	    V3_Net_Print(2, "Virtio NIC: ring buffer len less than pkt size, merged buffer not supported\n");
+	    virtio->stats.rx_dropped ++;
+		
+	    goto err_exit;
+	}
+	buf_desc->flags &= ~VIRTIO_NEXT_FLAG;
+
 	q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size];
 	q->used->ring[q->used->index % q->queue_size].length = data_len + hdr_len; /* This should be the total length of data sent to guest (header+pkt_data) */
 	q->used->index++;
 	q->cur_avail_idx++;
 
- 	virtio->statistics.rx_pkts ++;
-	virtio->statistics.rx_bytes += size;
+ 	virtio->stats.rx_pkts ++;
+	virtio->stats.rx_bytes += size;
     } else {
-	virtio->statistics.rx_dropped ++;
+	V3_Net_Print(2, "Virtio NIC: Guest RX queue is full\n");
+	virtio->stats.rx_dropped ++;
+
+ 	/* kick guest to refill the queue */
+	virtio->virtio_cfg.pci_isr = 0x1;	
+	v3_pci_raise_irq(virtio->virtio_dev->pci_bus, 0, virtio->pci_dev);
+	v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0);
+	virtio->stats.rx_interrupts ++;
 	
 	goto err_exit;
     }
 
-    if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
+    if (virtio->virtio_cfg.pci_isr == 0 && 
+	!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
 	PrintDebug("Raising IRQ %d\n",  virtio->pci_dev->config_header.intr_line);
-	
+
+	virtio->virtio_cfg.pci_isr = 0x1;	
 	v3_pci_raise_irq(virtio->virtio_dev->pci_bus, 0, virtio->pci_dev);
-	virtio->virtio_cfg.pci_isr = 0x1;
-	virtio->statistics.interrupts ++;
+
+	virtio->stats.rx_interrupts ++;
     }
 
     v3_unlock_irqrestore(virtio->rx_lock, flags);
 
-    /* notify guest if guest is running */
+    /* notify guest if it is in guest mode */
+    /* ISSUE: What is gonna happen if guest thread is running on the same core as this thread? */
     if(virtio->rx_notify == 1){
 	v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0);
     }
@@ -650,12 +688,21 @@ static struct v3_device_ops dev_ops = {
 };
 
 
-static void virtio_nic_poll(struct v3_vm_info * vm, int budget, void * data){
-    struct virtio_net_state * virtio = (struct virtio_net_state *)data;
+static int virtio_tx_flush(void * args){
+    struct virtio_net_state *virtio  = (struct virtio_net_state *)args;
+
+    V3_Print("Virtio TX Poll Thread Starting for %s\n", virtio->vm->name);
 
-    if(virtio->tx_notify == 0){
-    	handle_pkt_tx(&(vm->cores[0]), virtio);
+    while(1){
+    	if(virtio->tx_notify == 0){
+    	    handle_pkt_tx(&(virtio->vm->cores[0]), virtio);
+	    v3_yield(NULL);
+    	}else {
+	    V3_THREAD_SLEEP();
+    	}
     }
+
+    return 0;
 }
 
 static int register_dev(struct virtio_dev_state * virtio, 
@@ -732,31 +779,33 @@ static int register_dev(struct virtio_dev_state * virtio,
 
 #define RATE_UPPER_THRESHOLD 10  /* 10000 pkts per second, around 100Mbits */
 #define RATE_LOWER_THRESHOLD 1
-#define PROFILE_PERIOD 50 /*50ms*/
+#define PROFILE_PERIOD 10000 /*us*/
 
-/* Timer Functions */
 static void virtio_nic_timer(struct guest_info * core, 
 			     uint64_t cpu_cycles, uint64_t cpu_freq, 
 			     void * priv_data) {
     struct virtio_net_state * net_state = (struct virtio_net_state *)priv_data;
-    uint64_t period_ms;
+    uint64_t period_us;
+    static int profile_ms = 0;
 
-    period_ms = cpu_cycles/cpu_freq;
-    net_state->past_ms += period_ms;
+    period_us = (1000*cpu_cycles)/cpu_freq;
+    net_state->past_us += period_us;
 
-    if(net_state->past_ms >  PROFILE_PERIOD){ 
+#if 0
+    if(net_state->past_us > PROFILE_PERIOD){ 
 	uint32_t tx_rate, rx_rate;
 	
-	tx_rate = (net_state->statistics.tx_pkts - net_state->tx_pkts)/net_state->past_ms; /* pkts/per ms */
-	rx_rate = (net_state->statistics.rx_pkts - net_state->rx_pkts)/net_state->past_ms;
+	tx_rate = (net_state->stats.tx_pkts - net_state->tx_pkts)/(net_state->past_us/1000); /* pkts/per ms */
+	rx_rate = (net_state->stats.rx_pkts - net_state->rx_pkts)/(net_state->past_us/1000);
 
-	net_state->tx_pkts = net_state->statistics.tx_pkts;
-	net_state->rx_pkts = net_state->statistics.rx_pkts;
+	net_state->tx_pkts = net_state->stats.tx_pkts;
+	net_state->rx_pkts = net_state->stats.rx_pkts;
 
 	if(tx_rate > RATE_UPPER_THRESHOLD && net_state->tx_notify == 1){
 	    V3_Print("Virtio NIC: Switch TX to VMM driven mode\n");
 	    disable_cb(&(net_state->tx_vq));
 	    net_state->tx_notify = 0;
+	    V3_THREAD_WAKEUP(net_state->poll_thread);
 	}
 
 	if(tx_rate < RATE_LOWER_THRESHOLD && net_state->tx_notify == 0){
@@ -766,19 +815,30 @@ static void virtio_nic_timer(struct guest_info * core,
 	}
 
 	if(rx_rate > RATE_UPPER_THRESHOLD && net_state->rx_notify == 1){
-	    PrintDebug("Virtio NIC: Switch RX to VMM None notify mode\n");
+	    V3_Print("Virtio NIC: Switch RX to VMM None notify mode\n");
 	    net_state->rx_notify = 0;
 	}
 
 	if(rx_rate < RATE_LOWER_THRESHOLD && net_state->rx_notify == 0){
-	    PrintDebug("Virtio NIC: Switch RX to VMM notify mode\n");
+	    V3_Print("Virtio NIC: Switch RX to VMM notify mode\n");
 	    net_state->rx_notify = 1;
 	}
 
-	net_state->past_ms = 0;
+	net_state->past_us = 0;
     }
-}
+#endif
 
+    profile_ms += period_us/1000;
+    if(profile_ms > 20000){
+	V3_Net_Print(1, "Virtio NIC: TX: Pkt: %lld, Bytes: %lld\n\t\tRX Pkt: %lld. Bytes: %lld\n\t\tDropped: tx %lld, rx %lld\nInterrupts: tx %d, rx %d\nTotal Exit: %lld\n",
+	    	net_state->stats.tx_pkts, net_state->stats.tx_bytes,
+	    	net_state->stats.rx_pkts, net_state->stats.rx_bytes,
+	    	net_state->stats.tx_dropped, net_state->stats.rx_dropped,
+	    	net_state->stats.tx_interrupts, net_state->stats.rx_interrupts,
+	    	net_state->vm->cores[0].num_exits);
+	profile_ms = 0;
+    }
+}
 
 static struct v3_timer_ops timer_ops = {
     .update_timer = virtio_nic_timer,
@@ -796,19 +856,21 @@ static int connect_fn(struct v3_vm_info * info,
     memset(net_state, 0, sizeof(struct virtio_net_state));
     register_dev(virtio, net_state);
 
+    net_state->vm = info;
     net_state->net_ops = ops;
     net_state->backend_data = private_data;
     net_state->virtio_dev = virtio;
-    net_state->tx_notify = 1;
-    net_state->rx_notify = 1;
-
+    net_state->tx_notify = 0;
+    net_state->rx_notify = 0;
+	
     net_state->timer = v3_add_timer(&(info->cores[0]),&timer_ops,net_state);
 
     ops->recv = virtio_rx;
-    ops->poll = virtio_nic_poll;
     ops->frontend_data = net_state;
     memcpy(ops->fnt_mac, virtio->mac, ETH_ALEN);
 
+    net_state->poll_thread = V3_CREATE_THREAD(virtio_tx_flush, (void *)net_state, "Virtio_Poll");
+
     return 0;
 }
 
@@ -834,14 +896,7 @@ static int virtio_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
 
     if (macstr != NULL && !str2mac(macstr, virtio_state->mac)) {
 	PrintDebug("Virtio NIC: Mac specified %s\n", macstr);
-	PrintDebug("MAC: %x:%x:%x:%x:%x:%x\n", virtio_state->mac[0],
-				virtio_state->mac[1],
-				virtio_state->mac[2],
-				virtio_state->mac[3],
-				virtio_state->mac[4],
-				virtio_state->mac[5]);
     }else {
-    	PrintDebug("Virtio NIC: MAC not specified\n");
 	random_ethaddr(virtio_state->mac);
     }
 
diff --git a/palacios/src/devices/lnx_virtio_vnet.c b/palacios/src/devices/lnx_virtio_vnet.c
index b4b7342..87f158d 100644
--- a/palacios/src/devices/lnx_virtio_vnet.c
+++ b/palacios/src/devices/lnx_virtio_vnet.c
@@ -303,7 +303,7 @@ static int do_tx_pkts(struct guest_info * core,
     	memcpy(pkt.header, virtio_pkt->pkt, ETHERNET_HEADER_LEN);
    	pkt.data = virtio_pkt->pkt;
 
-	v3_vnet_send_pkt(&pkt, NULL);
+	v3_vnet_send_pkt(&pkt, NULL, 1);
 	
 	q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size];
 	q->used->ring[q->used->index % q->queue_size].length = pkt_desc->length; // What do we set this to????
diff --git a/palacios/src/devices/vnet_nic.c b/palacios/src/devices/vnet_nic.c
index 0fdaaba..05117e5 100644
--- a/palacios/src/devices/vnet_nic.c
+++ b/palacios/src/devices/vnet_nic.c
@@ -42,7 +42,7 @@ struct vnet_nic_state {
 
 /* called by frontend, send pkt to VNET */
 static int vnet_nic_send(uint8_t * buf, uint32_t len, 
-			 void * private_data) {
+			 int synchronize, void * private_data) {
     struct vnet_nic_state * vnetnic = (struct vnet_nic_state *)private_data;
 
     struct v3_vnet_pkt pkt;
@@ -52,15 +52,13 @@ static int vnet_nic_send(uint8_t * buf, uint32_t len,
     memcpy(pkt.header, buf, ETHERNET_HEADER_LEN);
     pkt.data = buf;
 
-#ifdef CONFIG_DEBUG_VNET_NIC
-    {
-    	PrintDebug("VNET-NIC: send pkt (size: %d, src_id: %d, src_type: %d)\n", 
+    V3_Net_Print(2, "VNET-NIC: send pkt (size: %d, src_id: %d, src_type: %d)\n", 
 		   pkt.size, pkt.src_id, pkt.src_type);
-    	v3_hexdump(buf, len, NULL, 0);
+    if(v3_net_debug >= 4){
+	v3_hexdump(buf, len, NULL, 0);
     }
-#endif
 
-    return v3_vnet_send_pkt(&pkt, NULL);;
+    return v3_vnet_send_pkt(&pkt, NULL, synchronize);
 }
 
 
@@ -70,22 +68,13 @@ static int virtio_input(struct v3_vm_info * info,
 			void * private_data){
     struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data;
 
-    PrintDebug("VNET-NIC: receive pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n", 
+    V3_Net_Print(2, "VNET-NIC: receive pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n", 
 		pkt->size, pkt->src_id, pkt->src_type, pkt->dst_id, pkt->dst_type);
 	
     return vnetnic->net_ops.recv(pkt->data, pkt->size,
 				 vnetnic->net_ops.frontend_data);
 }
 
-/* poll data from front-end */
-static void virtio_poll(struct v3_vm_info * info, 
-			int budget,
-			void * private_data){
-    struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data;
-
-    vnetnic->net_ops.poll(info, budget, vnetnic->net_ops.frontend_data);
-}
-
 
 static int vnet_nic_free(struct vnet_nic_state * vnetnic) {
 
@@ -102,7 +91,6 @@ static struct v3_device_ops dev_ops = {
 
 static struct v3_vnet_dev_ops vnet_dev_ops = {
     .input = virtio_input,
-    .poll = virtio_poll,
 };
 
 
diff --git a/palacios/src/palacios/vmm_vnet_core.c b/palacios/src/palacios/vmm_vnet_core.c
index e0e0ac7..4b54d71 100644
--- a/palacios/src/palacios/vmm_vnet_core.c
+++ b/palacios/src/palacios/vmm_vnet_core.c
@@ -31,6 +31,8 @@
 #define PrintDebug(fmt, args...)
 #endif
 
+int v3_net_debug = 0;
+
 struct eth_hdr {
     uint8_t dst_mac[ETH_ALEN];
     uint8_t src_mac[ETH_ALEN];
@@ -45,11 +47,6 @@ struct vnet_dev {
     struct v3_vnet_dev_ops dev_ops;
     void * private_data;
 
-    int active;
-
-    uint64_t bytes_tx, bytes_rx;
-    uint32_t pkts_tx, pkt_rx;
-    
     struct list_head node;
 } __attribute__((packed));
 
@@ -60,7 +57,6 @@ struct vnet_brg_dev {
 
     uint8_t type;
 
-    int active;
     void * private_data;
 } __attribute__((packed));
 
@@ -85,6 +81,20 @@ struct route_list {
 } __attribute__((packed));
 
 
+struct queue_entry{
+    uint8_t use;
+    struct v3_vnet_pkt pkt;
+    uint8_t data[ETHERNET_PACKET_LEN];
+};
+
+#define VNET_QUEUE_SIZE 10240
+struct vnet_queue {
+	struct queue_entry buf[VNET_QUEUE_SIZE];
+	int head, tail;
+	int count;
+	v3_lock_t lock;
+};
+
 static struct {
     struct list_head routes;
     struct list_head devs;
@@ -97,10 +107,13 @@ static struct {
     v3_lock_t lock;
     struct vnet_stat stats;
 
-    struct hashtable * route_cache;
-} vnet_state;
+    void * pkt_flush_thread;
 
+    struct vnet_queue pkt_q;
 
+    struct hashtable * route_cache;
+} vnet_state;
+	
 
 #ifdef CONFIG_DEBUG_VNET
 static inline void mac_to_string(uint8_t * mac, char * buf) {
@@ -182,7 +195,8 @@ static int clear_hash_cache() {
     return 0;
 }
 
-static int look_into_cache(const struct v3_vnet_pkt * pkt, struct route_list ** routes) {
+static int look_into_cache(const struct v3_vnet_pkt * pkt, 
+			   struct route_list ** routes) {
     *routes = (struct route_list *)v3_htable_search(vnet_state.route_cache, (addr_t)(pkt->hash_buf));
    
     return 0;
@@ -306,8 +320,8 @@ static struct route_list * match_route(const struct v3_vnet_pkt * pkt) {
     int max_rank = 0;
     struct list_head match_list;
     struct eth_hdr * hdr = (struct eth_hdr *)(pkt->data);
-//    uint8_t src_type = pkt->src_type;
-  //  uint32_t src_link = pkt->src_id;
+    //    uint8_t src_type = pkt->src_type;
+    //  uint32_t src_link = pkt->src_id;
 
 #ifdef CONFIG_DEBUG_VNET
     {
@@ -425,19 +439,18 @@ static struct route_list * match_route(const struct v3_vnet_pkt * pkt) {
 }
 
 
-int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
+int vnet_tx_one_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
     struct route_list * matched_routes = NULL;
     unsigned long flags;
     int i;
 
-#ifdef CONFIG_DEBUG_VNET
-   {
-	int cpu = V3_Get_CPU();
-       PrintDebug("VNET/P Core: cpu %d: pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n",
+    int cpu = V3_Get_CPU();
+    V3_Net_Print(2, "VNET/P Core: cpu %d: pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n",
 		  cpu, pkt->size, pkt->src_id, 
 		  pkt->src_type, pkt->dst_id, pkt->dst_type);
-   }
-#endif
+    if(v3_net_debug >= 4){
+	    v3_hexdump(pkt->data, pkt->size, NULL, 0);
+    }
 
     flags = v3_lock_irqsave(vnet_state.lock);
 
@@ -466,30 +479,30 @@ int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
     for (i = 0; i < matched_routes->num_routes; i++) {
 	struct vnet_route_info * route = matched_routes->routes[i];
 	
-        if (route->route_def.dst_type == LINK_EDGE) {
-	    struct vnet_brg_dev *bridge = vnet_state.bridge;
-            pkt->dst_type = LINK_EDGE;
-            pkt->dst_id = route->route_def.dst_id;
+	if (route->route_def.dst_type == LINK_EDGE) {
+	    struct vnet_brg_dev * bridge = vnet_state.bridge;
+	    pkt->dst_type = LINK_EDGE;
+	    pkt->dst_id = route->route_def.dst_id;
 
-    	    if (bridge == NULL || (bridge->active == 0)) {
-	        PrintDebug("VNET/P Core: No active bridge to sent data to\n");
+    	    if (bridge == NULL) {
+	        V3_Net_Print(2, "VNET/P Core: No active bridge to sent data to\n");
 		 continue;
     	    }
 
     	    if(bridge->brg_ops.input(bridge->vm, pkt, bridge->private_data) < 0){
-                PrintDebug("VNET/P Core: Packet not sent properly to bridge\n");
+                V3_Net_Print(2, "VNET/P Core: Packet not sent properly to bridge\n");
                 continue;
 	    }         
 	    vnet_state.stats.tx_bytes += pkt->size;
 	    vnet_state.stats.tx_pkts ++;
         } else if (route->route_def.dst_type == LINK_INTERFACE) {
-            if (route->dst_dev == NULL || route->dst_dev->active == 0){
-	 	PrintDebug("VNET/P Core: No active device to sent data to\n");
+            if (route->dst_dev == NULL){
+	 	  V3_Net_Print(2, "VNET/P Core: No active device to sent data to\n");
 	        continue;
             }
 
 	    if(route->dst_dev->dev_ops.input(route->dst_dev->vm, pkt, route->dst_dev->private_data) < 0) {
-                PrintDebug("VNET/P Core: Packet not sent properly\n");
+                V3_Net_Print(2, "VNET/P Core: Packet not sent properly\n");
                 continue;
 	    }
 	    vnet_state.stats.tx_bytes += pkt->size;
@@ -502,6 +515,50 @@ int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
     return 0;
 }
 
+
+static int vnet_pkt_enqueue(struct v3_vnet_pkt * pkt){
+    unsigned long flags;
+    struct queue_entry * entry;
+    struct vnet_queue * q = &(vnet_state.pkt_q);
+
+    flags = v3_lock_irqsave(q->lock);
+
+    if (q->count >= VNET_QUEUE_SIZE){
+	V3_Net_Print(1, "VNET Queue overflow!\n");
+	v3_unlock_irqrestore(q->lock, flags);
+	return -1;
+    }
+	
+    q->count ++;
+    entry = &(q->buf[q->tail++]);
+    q->tail %= VNET_QUEUE_SIZE;
+	
+    v3_unlock_irqrestore(q->lock, flags);
+
+    /* this is ugly, but should happen very unlikely */
+    while(entry->use);
+
+    entry->pkt.data = entry->data;
+    memcpy(&(entry->pkt), pkt, sizeof(struct v3_vnet_pkt));
+    memcpy(entry->data, pkt->data, pkt->size);
+
+    entry->use = 1;
+
+    return 0;
+}
+
+
+int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data, int synchronize) {
+    if(synchronize){
+	vnet_tx_one_pkt(pkt, NULL);
+    }else {
+       vnet_pkt_enqueue(pkt);
+    	V3_Net_Print(2, "VNET/P Core: Put pkt into Queue: pkt size %d\n", pkt->size);
+    }
+	
+    return 0;
+}
+
 int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac, 
 		    struct v3_vnet_dev_ops *ops,
 		    void * priv_data){
@@ -517,11 +574,9 @@ int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac,
    
     memcpy(new_dev->mac_addr, mac, 6);
     new_dev->dev_ops.input = ops->input;
-    new_dev->dev_ops.poll = ops->poll;
     new_dev->private_data = priv_data;
     new_dev->vm = vm;
     new_dev->dev_id = 0;
-    new_dev->active = 1;
 
     flags = v3_lock_irqsave(vnet_state.lock);
 
@@ -544,7 +599,6 @@ int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac,
 }
 
 
-
 int v3_vnet_del_dev(int dev_id){
     struct vnet_dev * dev = NULL;
     unsigned long flags;
@@ -566,6 +620,7 @@ int v3_vnet_del_dev(int dev_id){
     return 0;
 }
 
+
 int v3_vnet_stat(struct vnet_stat * stats){
 	
     stats->rx_bytes = vnet_state.stats.rx_bytes;
@@ -604,12 +659,10 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm,
     struct vnet_brg_dev * tmp_bridge = NULL;    
     
     flags = v3_lock_irqsave(vnet_state.lock);
-
     if (vnet_state.bridge == NULL) {
 	bridge_free = 1;
 	vnet_state.bridge = (void *)1;
     }
-
     v3_unlock_irqrestore(vnet_state.lock, flags);
 
     if (bridge_free == 0) {
@@ -629,7 +682,6 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm,
     tmp_bridge->brg_ops.input = ops->input;
     tmp_bridge->brg_ops.poll = ops->poll;
     tmp_bridge->private_data = priv_data;
-    tmp_bridge->active = 1;
     tmp_bridge->type = type;
 	
     /* make this atomic to avoid possible race conditions */
@@ -641,20 +693,39 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm,
 }
 
 
-void v3_vnet_do_poll(struct v3_vm_info * vm){
-    struct vnet_dev * dev = NULL;
+static int vnet_tx_flush(void *args){
+    unsigned long flags;
+    struct queue_entry * entry;
+    struct vnet_queue * q = &(vnet_state.pkt_q);
 
-    /* TODO: run this on separate threads
-      * round-robin schedule, with maximal budget for each poll
-      */
-    list_for_each_entry(dev, &(vnet_state.devs), node) {
-    	    if(dev->dev_ops.poll != NULL){
-		dev->dev_ops.poll(vm, -1, dev->private_data);
-    	    }
+    V3_Print("VNET/P Handing Pkt Thread Starting ....\n");
+
+    //V3_THREAD_SLEEP();
+    /* we need thread sleep/wakeup in Palacios */
+    while(1){
+    	flags = v3_lock_irqsave(q->lock);
+
+    	if (q->count <= 0){
+	    v3_unlock_irqrestore(q->lock, flags);
+	    v3_yield(NULL);
+	    //V3_THREAD_SLEEP();
+    	}else {
+    	    q->count --;
+    	    entry = &(q->buf[q->head++]);
+    	    q->head %= VNET_QUEUE_SIZE;
+
+    	    v3_unlock_irqrestore(q->lock, flags);
+
+   	    /* this is ugly, but should happen very unlikely */
+    	    while(!entry->use);
+	    vnet_tx_one_pkt(&(entry->pkt), NULL);
+	    entry->use = 0;
+
+	    V3_Net_Print(2, "vnet_tx_flush: pkt (size %d)\n", entry->pkt.size);   
+	}
     }
 }
 
-
 int v3_init_vnet() {
     memset(&vnet_state, 0, sizeof(vnet_state));
 	
@@ -669,12 +740,15 @@ int v3_init_vnet() {
     }
 
     vnet_state.route_cache = v3_create_htable(0, &hash_fn, &hash_eq);
-
     if (vnet_state.route_cache == NULL) {
         PrintError("VNET/P Core: Fails to initiate route cache\n");
         return -1;
     }
 
+    v3_lock_init(&(vnet_state.pkt_q.lock));
+
+    vnet_state.pkt_flush_thread = V3_CREATE_THREAD(vnet_tx_flush, NULL, "VNET_Pkts");
+
     PrintDebug("VNET/P Core is initiated\n");
 
     return 0;