From: Lei Xia Date: Wed, 6 Oct 2010 18:06:38 +0000 (-0500) Subject: All updates on the VNET during summer X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=4c4382c3fa7e7e87c260b6bb473220d0081ca1d5;p=palacios.git All updates on the VNET during summer add VMM-mode/Guest-mode support of VNET/P some kind of flow-control in VNET/P (not quite sure if it is working) VNET/P running on multicore Not tested with current devel branch --- diff --git a/palacios/include/palacios/vmm_dev_mgr.h b/palacios/include/palacios/vmm_dev_mgr.h index 23b861d..9e019b5 100644 --- a/palacios/include/palacios/vmm_dev_mgr.h +++ b/palacios/include/palacios/vmm_dev_mgr.h @@ -158,11 +158,18 @@ struct v3_dev_blk_ops { }; struct v3_dev_net_ops { + /* below functions are called by frontend device + * These will be filled in by the backend when a backend is initiated. --Lei*/ int (*send)(uint8_t * buf, uint32_t count, void * private_data, struct vm_device *dest_dev); - - // This will be filled in by the frontend when a backend is connected. - // The backend then calls this function for packet RX - int (*recv)(uint8_t * buf, uint32_t count, void * private_data); + void (*start_rx)(void *back_data); + void (*stop_rx)(void *back_data); + + /* below functions are called by Backend device + * These will be filled in by the frontend when a backend is connected. -- Lei*/ + int (*recv)(uint8_t * buf, uint32_t count, void * frnt_data); + void (*poll)(struct v3_vm_info *vm, void* frnt_data); + void (*start_tx)(void * frnt_data); + void (*stop_tx)(void * frnt_data); void * frontend_data; }; diff --git a/palacios/include/palacios/vmm_vnet.h b/palacios/include/palacios/vmm_vnet.h index 4316c99..254fbf5 100644 --- a/palacios/include/palacios/vmm_vnet.h +++ b/palacios/include/palacios/vmm_vnet.h @@ -7,9 +7,9 @@ * and the University of New Mexico. You can find out more at * http://www.v3vee.org * - * Copyright (c) 2009, Lei Xia + * Copyright (c) 2010, Lei Xia * Copyright (c) 2009, Yuan Tang - * Copyright (c) 2009, The V3VEE Project + * Copyright (c) 2010, The V3VEE Project * All rights reserved. * * Author: Lei Xia @@ -35,7 +35,7 @@ typedef enum {LINK_INTERFACE=0, LINK_EDGE, LINK_ANY} link_type_t; //for 'type' a #define VNET_HASH_SIZE 17 #define ETHERNET_HEADER_LEN 14 -#define ETHERNET_MTU 6000 +#define ETHERNET_MTU 1500 #define ETHERNET_PACKET_LEN (ETHERNET_HEADER_LEN + ETHERNET_MTU) //routing table entry @@ -92,43 +92,59 @@ struct v3_vnet_profile{ }; #endif -struct v3_vnet_bridge_input_args{ + +struct v3_vnet_bridge_xcall_args{ struct v3_vm_info * vm; struct v3_vnet_pkt *vnet_pkts; uint16_t pkt_num; void * private_data; }; -int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void *private_data); +struct v3_vnet_dev_xcall_args{ + struct v3_vm_info * vm; + void * private_data; +}; + +struct v3_vnet_dev_ops { + int (*input)(struct v3_vm_info * vm, struct v3_vnet_pkt * pkt, void * dev_data); + void (*poll) (struct v3_vm_info *vm, void *dev_data); + void (*poll_xcall)(void *arg); + + void (*start_tx)(void * dev_data); + void (*stop_tx)(void * dev_data); +}; + +struct v3_vnet_bridge_ops { + int (*input)(struct v3_vm_info * vm, struct v3_vnet_pkt pkt[], uint16_t pkt_num, void * private_data); + int (*xcall_input)(void *data); + void (*polling_pkt)(struct v3_vm_info * vm, void *private_data); +}; + -void v3_vnet_send_pkt_xcall(void * data); +int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void *private_data); int v3_vnet_add_route(struct v3_vnet_route route); -int V3_init_vnet(); +int v3_init_vnet(); int v3_vnet_add_bridge(struct v3_vm_info * vm, - int (*input)(struct v3_vm_info * vm, struct v3_vnet_pkt pkt[], uint16_t pkt_num, void * private_data), + /*int (*input)(struct v3_vm_info * vm, struct v3_vnet_pkt pkt[], uint16_t pkt_num, void * private_data), void (*xcall_input)(void *data), - int (*poll_pkt)(struct v3_vm_info * vm, void * private_data), - uint16_t max_delayed_pkts, - long max_latency, + int (*poll_pkt)(struct v3_vm_info * vm, void * private_data),*/ + struct v3_vnet_bridge_ops *ops, void * priv_data); int v3_vnet_add_dev(struct v3_vm_info *info, uint8_t mac[6], - int (*dev_input)(struct v3_vm_info * vm, struct v3_vnet_pkt * pkt, void * private_data), + struct v3_vnet_dev_ops *ops, void * priv_data); -void v3_vnet_heartbeat(struct guest_info *core); - - -int v3_vnet_disable_bridge(); -int v3_vnet_enable_bridge(); - -void v3_vnet_polling(); +void v3_vnet_poll(struct v3_vm_info *vm); -int v3_vnet_rx(uchar_t *buf, uint16_t size, uint16_t src_id, uint8_t src_type); +/* enable a vnet device, tell VNET can send pkts to it */ +int v3_vnet_enable_device(int dev_id); +/* tell VNET stop sending pkts to it */ +int v3_vnet_disable_device(int dev_id); #endif diff --git a/palacios/src/devices/lnx_virtio_nic.c b/palacios/src/devices/lnx_virtio_nic.c index 8993066..4f86bbe 100644 --- a/palacios/src/devices/lnx_virtio_nic.c +++ b/palacios/src/devices/lnx_virtio_nic.c @@ -7,13 +7,13 @@ * and the University of New Mexico. You can find out more at * http://www.v3vee.org * - * Copyright (c) 2008, Lei Xia - * Copyright (c) 2008, Cui Zheng - * Copyright (c) 2008, The V3VEE Project + * Copyright (c) 2010, Lei Xia + * Copyright (c) 2010, Cui Zheng + * Copyright (c) 2010, The V3VEE Project * All rights reserved. * * Author: Lei Xia - * Cui Zheng + * Cui Zheng * * * This is free software. You are permitted to use, @@ -28,7 +28,6 @@ #include #include #include - #include @@ -47,18 +46,32 @@ struct virtio_net_hdr { uint8_t gso_type; uint16_t hdr_len; /* Ethernet + IP + tcp/udp hdrs */ uint16_t gso_size; /* Bytes to append to hdr_len per frame */ - uint16_t csum_start; /* Position to start checksumming from */ - uint16_t csum_offset; /* Offset after that to place checksum */ + uint16_t csum_start; /* Position to start checksumming from */ + uint16_t csum_offset; /* Offset after that to place checksum */ }__attribute__((packed)); + +/* This is the version of the header to use when the MRG_RXBUF + * feature has been negotiated. */ +struct virtio_net_hdr_mrg_rxbuf { + struct virtio_net_hdr hdr; + uint16_t num_buffers; /* Number of merged rx buffers */ +}; + -#define QUEUE_SIZE 1024 +#define TX_QUEUE_SIZE 64 +#define RX_QUEUE_SIZE 1024 #define CTRL_QUEUE_SIZE 64 #define ETH_ALEN 6 +#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */ +#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ +#define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ +#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */ + struct virtio_net_config { - uint8_t mac[ETH_ALEN]; //VIRTIO_NET_F_MAC + uint8_t mac[ETH_ALEN]; /* VIRTIO_NET_F_MAC */ uint16_t status; } __attribute__((packed)); @@ -76,33 +89,46 @@ struct virtio_net_state { struct pci_device * pci_dev; int io_range_size; - struct virtio_queue rx_vq; //index 0, rvq in Linux virtio driver, handle packet to guest - struct virtio_queue tx_vq; //index 1, svq in Linux virtio driver, handle packet from guest - struct virtio_queue ctrl_vq; //index 2, ctrol info from guest + struct virtio_queue rx_vq; /* idx 0, pkts to guest */ + struct virtio_queue tx_vq; /* idx 1, pkts from guest */ + struct virtio_queue ctrl_vq; /* idx 2 */ - ulong_t pkt_sent, pkt_recv, pkt_drop; + int buffed_rx; + int tx_disabled; /* stop TX pkts from guest */ + uint16_t cur_notify_tx_idx; /*for used in update_tx_queue */ -#if 1 //for temporary performance testing purpose - long last_sent_time, last_recv_time; -#endif + uint64_t pkt_sent, pkt_recv, pkt_drop; + uint64_t tx_stop_times, rx_stop_times, tx_poll_times, rx_ipi_num; struct v3_dev_net_ops * net_ops; - - v3_lock_t lock; + v3_lock_t rx_lock, tx_lock; void * backend_data; struct virtio_dev_state * virtio_dev; struct list_head dev_link; }; +/* virtio nic error type */ +#define ERR_VIRTIO_OTHER 1 +#define ERR_VIRTIO_RXQ_FULL 2 +#define ERR_VIRTIO_RXQ_NOSET 3 +#define ERR_VIRTIO_TXQ_NOSET 4 +#define ERR_VIRTIO_TXQ_FULL 5 +#define ERR_VIRTIO_TXQ_DISABLED 6 + + static int virtio_free(struct vm_device * dev) { - return -1; + return 0; } static int virtio_init_state(struct virtio_net_state * virtio) { + virtio->rx_vq.queue_size = RX_QUEUE_SIZE; + virtio->tx_vq.queue_size = TX_QUEUE_SIZE; + virtio->ctrl_vq.queue_size = CTRL_QUEUE_SIZE; + virtio->rx_vq.ring_desc_addr = 0; virtio->rx_vq.ring_avail_addr = 0; virtio->rx_vq.ring_used_addr = 0; @@ -121,15 +147,17 @@ static int virtio_init_state(struct virtio_net_state * virtio) virtio->ctrl_vq.pfn = 0; virtio->ctrl_vq.cur_avail_idx = 0; - virtio->virtio_cfg.host_features = 0; - //virtio->virtio_cfg.status = VIRTIO_NET_S_LINK_UP; virtio->virtio_cfg.pci_isr = 0; + + virtio->virtio_cfg.host_features = 0; // (1 << VIRTIO_NET_F_MAC); - if (v3_lock_init(&(virtio->lock)) == -1){ - PrintError("Virtio NIC: Failure to init lock for net_state\n"); + if ((v3_lock_init(&(virtio->rx_lock)) == -1) || + (v3_lock_init(&(virtio->tx_lock)) == -1)){ + PrintError("Virtio NIC: Failure to init locks for net_state\n"); } virtio->pkt_sent = virtio->pkt_recv = virtio->pkt_drop = 0; + virtio->buffed_rx = 0; return 0; } @@ -139,37 +167,21 @@ static int pkt_tx(struct guest_info *core, struct virtio_net_state * virtio, str uint8_t * buf = NULL; uint32_t len = buf_desc->length; - PrintDebug("Virtio NIC: Virtio Pkt Sending, net_state: %p, pkt size: %d\n", virtio, len); - if (v3_gpa_to_hva(core, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) { PrintError("Could not translate buffer address\n"); - return -1; + return -ERR_VIRTIO_OTHER; } - if (virtio->net_ops->send(buf, len, virtio->backend_data, NULL) == -1) { - return -1; - } - - return 0; + return virtio->net_ops->send(buf, len, virtio->backend_data, NULL); } -static int build_receive_header(struct virtio_net_hdr * hdr, const void * buf, int raw) { - hdr->flags = 0; - - if (!raw) { - memcpy(hdr, buf, sizeof(struct virtio_net_hdr)); - } else { - memset(hdr, 0, sizeof(struct virtio_net_hdr)); - } - - return 0; -} static int copy_data_to_desc(struct guest_info *core, struct virtio_net_state * virtio_state, struct vring_desc * desc, uchar_t * buf, - uint_t buf_len) + uint_t buf_len, + uint_t offset) { uint32_t len; uint8_t * desc_buf = NULL; @@ -178,15 +190,13 @@ static int copy_data_to_desc(struct guest_info *core, PrintError("Could not translate buffer address\n"); return -1; } - len = (desc->length < buf_len)?desc->length:buf_len; - memcpy(desc_buf, buf, len); + len = (desc->length < buf_len)?(desc->length - offset):buf_len; + memcpy(desc_buf+offset, buf, len); return len; } - - static int get_desc_count(struct virtio_queue * q, int index) { struct vring_desc * tmp_desc = &(q->desc[index]); int cnt = 1; @@ -199,7 +209,54 @@ static int get_desc_count(struct virtio_queue * q, int index) { return cnt; } -static int handle_ctrl(struct guest_info *core, struct virtio_net_state * dev) { +static inline void enable_cb(struct virtio_queue *queue){ + queue->used->flags &= ~ VRING_NO_NOTIFY_FLAG; +} + +static inline void disable_cb(struct virtio_queue *queue) { + queue->used->flags |= VRING_NO_NOTIFY_FLAG; +} + +/* interrupt the guest, so the guest core get EXIT to Palacios + * this happens when there are either incoming pkts for the guest + * or the guest can start TX pkts again */ +static inline void notify_guest(struct virtio_net_state * virtio){ + v3_interrupt_cpu(virtio->virtio_dev->vm, virtio->virtio_dev->vm->cores[0].cpu_id, 0); +} + + +/* guest free some pkts from rx queue */ +static int handle_rx_kick(struct guest_info *core, struct virtio_net_state * virtio) +{ + unsigned long flags; + + flags = v3_lock_irqsave(virtio->rx_lock); + + virtio->net_ops->start_rx(virtio->backend_data); + disable_cb(&virtio->rx_vq); + + v3_unlock_irqrestore(virtio->rx_lock, flags); + + return 0; +} + +#ifdef CONFIG_VNET_PROFILE +static void print_profile_info(struct virtio_net_state *virtio){ + PrintError("Virtio NIC: %p, sent: %lld, rxed: %lld, dropped: %lld, \ + tx_stop: %lld, rx_stop: %lld, poll_time: %lld, rx_ipi: %lld\n", + virtio, + virtio->pkt_sent, + virtio->pkt_recv, + virtio->pkt_drop, + virtio->tx_stop_times, + virtio->rx_stop_times, + virtio->tx_poll_times, + virtio->rx_ipi_num); +} +#endif + +static int handle_ctrl(struct guest_info *core, struct virtio_net_state * virtio) { + return 0; } @@ -207,9 +264,16 @@ static int handle_pkt_tx(struct guest_info *core, struct virtio_net_state * virt { struct virtio_queue * q = &(virtio_state->tx_vq); struct virtio_net_hdr * hdr = NULL; + int recved = 0; + unsigned long flags; + + if (!q->ring_avail_addr) + return -ERR_VIRTIO_TXQ_NOSET; - PrintDebug("Virtio NIC: TX: running on cpu: %d\n", V3_Get_CPU()); + if(virtio_state->tx_disabled) + return -ERR_VIRTIO_TXQ_DISABLED; + flags = v3_lock_irqsave(virtio_state->tx_lock); while (q->cur_avail_idx != q->avail->index) { struct vring_desc * hdr_desc = NULL; addr_t hdr_addr = 0; @@ -221,57 +285,193 @@ static int handle_pkt_tx(struct guest_info *core, struct virtio_net_state * virt hdr_desc = &(q->desc[desc_idx]); if (v3_gpa_to_hva(core, hdr_desc->addr_gpa, &(hdr_addr)) == -1) { PrintError("Could not translate block header address\n"); - return -1; + goto exit_error; } hdr = (struct virtio_net_hdr*)hdr_addr; desc_idx = hdr_desc->next; + if(desc_cnt > 2){ + PrintError("VNIC: merged rx buffer not supported\n"); + goto exit_error; + } + + /* here we assumed that one ethernet pkt is not splitted into multiple virtio buffer */ for (i = 0; i < desc_cnt - 1; i++) { struct vring_desc * buf_desc = &(q->desc[desc_idx]); if (pkt_tx(core, virtio_state, buf_desc) == -1) { PrintError("Error handling nic operation\n"); - return -1; + goto exit_error; } req_len += buf_desc->length; desc_idx = buf_desc->next; } virtio_state->pkt_sent ++; + recved ++; q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size]; q->used->ring[q->used->index % q->queue_size].length = req_len; // What do we set this to???? - q->used->index++; - + q->used->index ++; + q->cur_avail_idx ++; } + v3_unlock_irqrestore(virtio_state->tx_lock, flags); + + if(!recved) + return 0; + if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) { v3_pci_raise_irq(virtio_state->virtio_dev->pci_bus, 0, virtio_state->pci_dev); virtio_state->virtio_cfg.pci_isr = 0x1; } + #ifdef CONFIG_VNET_PROFILE - if (virtio_state->pkt_sent % 50000 == 0){ + static long min = 1024, max = 0, total=0; + static int i=0; + total += recved; + i ++; + if(recved > max) max = recved; + if(recved < min) min = recved; + if(total > 100000) { + PrintError("VNIC: TX polling: %ld, min %ld, max %ld, avg: %ld pkts\n", total, min, max, total/i); + min = 1024; + max = 0; + i = 1; + total = 0; + } +#endif + + + return 0; + +exit_error: + + v3_unlock_irqrestore(virtio_state->tx_lock, flags); + return -ERR_VIRTIO_OTHER; +} + + +#if 0 //for multicore VNET +/* used for poll pkt from virtio nic by VNET + * only when vnet is running on sidecore */ +static int handle_pkt_tx_sidecore(struct guest_info *core, struct virtio_net_state * virtio) +{ + struct virtio_queue * q = &(virtio->tx_vq); + struct virtio_net_hdr * hdr = NULL; + int recved = 0; + unsigned long flags; + + if (!q->ring_avail_addr) { + return -ERR_VIRTIO_TXQ_NOSET; + } + + flags = v3_lock_irqsave(virtio->tx_lock); + + while (q->cur_avail_idx != q->avail->index) { + struct vring_desc * hdr_desc = NULL; + addr_t hdr_addr = 0; + uint16_t desc_idx = q->avail->ring[q->cur_avail_idx % q->queue_size]; + int desc_cnt = get_desc_count(q, desc_idx); + uint32_t req_len = 0; + int i = 0; + + hdr_desc = &(q->desc[desc_idx]); + if (v3_gpa_to_hva(core, hdr_desc->addr_gpa, &(hdr_addr)) == -1) { + PrintError("Could not translate block header address\n"); + goto exit_error; + } + + hdr = (struct virtio_net_hdr*)hdr_addr; + desc_idx = hdr_desc->next; + + if(desc_cnt > 2){ + PrintError("VNIC: merged rx buffer not supported\n"); + goto exit_error; + } + + for (i = 0; i < desc_cnt - 1; i++) { + struct vring_desc * buf_desc = &(q->desc[desc_idx]); + if (pkt_tx_sidecore(core, virtio, buf_desc) < 0) { + PrintError("Error handling nic operation\n"); + goto exit_error; + } + + req_len += buf_desc->length; + desc_idx = buf_desc->next; + } + recved ++; + + q->cur_avail_idx ++; + } + + if(recved) PrintDebug("VNIC: Tx polling %d pkts\n", recved); + + v3_unlock_irqrestore(virtio->tx_lock, flags); + + return 0; + +exit_error: + + v3_unlock_irqrestore(virtio->tx_lock, flags); + return -ERR_VIRTIO_OTHER; +} + + +/* called by VNET, to notify Virtio to update the tx_queue used index + * used only when vnet running on sidecore */ +static int update_tx_queue(struct virtio_net_state * virtio, int handled_pkt){ + struct virtio_queue * q = &(virtio->tx_vq); + unsigned long flags; + + if (!q->ring_avail_addr) { + return -ERR_VIRTIO_TXQ_NOSET; + } + + flags = v3_lock_irqsave(virtio->tx_lock); + while(handled_pkt > 0){ + q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[virtio->cur_notify_tx_idx % q->queue_size]; + q->used->ring[q->used->index % q->queue_size].length = req_len; // TODO: + q->used->index ++; + virtio->cur_notify_tx_idx ++; + handled_pkt --; + } + v3_unlock_irqrestore(virtio->tx_lock, flags); + + if (handled_pkt && (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG))) { + v3_pci_raise_irq(virtio->virtio_dev->pci_bus, 0, virtio->pci_dev); + virtio->virtio_cfg.pci_isr = 0x1; + + /* do we need to notify here? */ + notify_guest(virtio); + } + + virtio->pkt_sent += handled_pkt; + +#ifdef CONFIG_VNET_PROFILE + if (virtio->pkt_sent % 50000 == 0){ long cur_time, time; rdtscll(cur_time); time = cur_time - virtio_state->last_sent_time; PrintError("Virtio NIC: last sent 50000 cycles: %ld\n",time); - //PrintError("Virtio NIC: sent: %ld, rxed: %ld, dropped: %ld\n", - // virtio_state->pkt_sent, - // virtio_state->pkt_recv, - // virtio_state->pkt_drop); - rdtscll(virtio_state->last_sent_time); + PrintError("Virtio NIC: sent: %ld, rxed: %ld, dropped: %ld\n", + virtio->pkt_sent, + virtio->pkt_recv, + virtio->pkt_drop); + rdtscll(virtio->last_sent_time); } #endif + return 0; } - +#endif static int virtio_setup_queue(struct guest_info *core, - struct virtio_net_state * virtio_state, - struct virtio_queue * queue, + struct virtio_net_state * virtio_state, + struct virtio_queue * queue, addr_t pfn, addr_t page_addr) { queue->pfn = pfn; @@ -326,7 +526,6 @@ static int virtio_io_write(struct guest_info *core, uint16_t port, void * src, u return -1; } virtio->virtio_cfg.guest_features = *(uint32_t *)src; - PrintDebug("Setting Guest Features to %x\n", virtio->virtio_cfg.guest_features); break; case VRING_PG_NUM_PORT: @@ -337,15 +536,14 @@ static int virtio_io_write(struct guest_info *core, uint16_t port, void * src, u addr_t pfn = *(uint32_t *)src; addr_t page_addr = (pfn << VIRTIO_PAGE_SHIFT); uint16_t queue_idx = virtio->virtio_cfg.vring_queue_selector; - - PrintDebug("Virtio Write: pfn: %p, page_addr %p, queue_idx %d\n", (void *)pfn, (void *)page_addr, queue_idx); - switch (queue_idx) { case 0: virtio_setup_queue(core, virtio, &virtio->rx_vq, pfn, page_addr); + disable_cb(&virtio->rx_vq); break; case 1: virtio_setup_queue(core, virtio, &virtio->tx_vq, pfn, page_addr); + disable_cb(&virtio->tx_vq); break; case 2: virtio_setup_queue(core, virtio, &virtio->ctrl_vq, pfn, page_addr); @@ -358,7 +556,7 @@ static int virtio_io_write(struct guest_info *core, uint16_t port, void * src, u case VRING_Q_SEL_PORT: virtio->virtio_cfg.vring_queue_selector = *(uint16_t *)src; if (virtio->virtio_cfg.vring_queue_selector > 2) { - PrintError("Virtio NIC device only uses 3 queue, selected %d\n", + PrintError("Virtio NIC: wrong queue idx: %d\n", virtio->virtio_cfg.vring_queue_selector); return -1; } @@ -368,8 +566,10 @@ static int virtio_io_write(struct guest_info *core, uint16_t port, void * src, u { uint16_t queue_idx = *(uint16_t *)src; if (queue_idx == 0){ - PrintDebug("receive queue notification 0, packet get by Guest\n"); + handle_rx_kick(core, virtio); + PrintError("rx kick\n"); } else if (queue_idx == 1){ + PrintError("tx kick\n"); if (handle_pkt_tx(core, virtio) == -1) { PrintError("Could not handle NIC Notification\n"); return -1; @@ -380,8 +580,7 @@ static int virtio_io_write(struct guest_info *core, uint16_t port, void * src, u return -1; } } else { - PrintError("Virtio NIC device only uses 3 queue, selected %d\n", - queue_idx); + PrintError("Wrong queue index %d\n", queue_idx); } break; } @@ -397,6 +596,7 @@ static int virtio_io_write(struct guest_info *core, uint16_t port, void * src, u case VIRTIO_ISR_PORT: virtio->virtio_cfg.pci_isr = *(uint8_t *)src; break; + default: return -1; break; @@ -461,7 +661,6 @@ static int virtio_io_read(struct guest_info *core, uint16_t port, void * dst, ui default: break; } - PrintDebug("queue index: %d, value=0x%x\n", (int)queue_idx, *(uint16_t *)dst); break; case VIRTIO_STATUS_PORT: @@ -479,7 +678,7 @@ static int virtio_io_read(struct guest_info *core, uint16_t port, void * dst, ui break; default: - PrintError("Virtio NIC: Read of Unhandled Virtio Read\n"); + PrintError("Virtio NIC: Read of Unhandled Virtio Read:%d\n", port_idx); return -1; } @@ -487,33 +686,31 @@ static int virtio_io_read(struct guest_info *core, uint16_t port, void * dst, ui } - - static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) { struct virtio_net_state * virtio = (struct virtio_net_state *)private_data; struct virtio_queue * q = &(virtio->rx_vq); - struct virtio_net_hdr hdr; - uint32_t hdr_len = sizeof(struct virtio_net_hdr); + struct virtio_net_hdr_mrg_rxbuf hdr; + uint32_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); uint32_t data_len = size; uint32_t offset = 0; unsigned long flags; - int ret_val = -1; + int ret_val = -ERR_VIRTIO_OTHER; int raw = 1; - flags = v3_lock_irqsave(virtio->lock); - - PrintDebug("VIRTIO NIC: RX on cpu %d to virtio nic %p, size:%d\n", V3_Get_CPU(), virtio, size); + flags = v3_lock_irqsave(virtio->rx_lock); virtio->pkt_recv ++; - - if (!raw){ + if (!raw) data_len -= hdr_len; - } - build_receive_header(&hdr, buf, raw); + if (!raw) + memcpy(&hdr, buf, sizeof(struct virtio_net_hdr_mrg_rxbuf)); + else + memset(&hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); if (q->ring_avail_addr == 0) { PrintError("Queue is not set\n"); + ret_val = -ERR_VIRTIO_RXQ_NOSET; goto exit; } @@ -526,34 +723,53 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) { hdr_desc = &(q->desc[hdr_idx]); if (v3_gpa_to_hva(&(virtio->virtio_dev->vm->cores[0]), hdr_desc->addr_gpa, &(hdr_addr)) == -1) { PrintError("Could not translate receive buffer address\n"); - ret_val = -1; goto exit; } - - memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr)); + hdr.num_buffers = 1; + memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf)); if (offset >= data_len) { hdr_desc->flags &= ~VIRTIO_NEXT_FLAG; } + struct vring_desc * buf_desc = NULL; for (buf_idx = hdr_desc->next; offset < data_len; buf_idx = q->desc[hdr_idx].next) { - struct vring_desc * buf_desc = &(q->desc[buf_idx]); uint32_t len = 0; + buf_desc = &(q->desc[buf_idx]); - len = copy_data_to_desc(&(virtio->virtio_dev->vm->cores[0]), virtio, buf_desc, buf + offset, data_len - offset); + len = copy_data_to_desc(&(virtio->virtio_dev->vm->cores[0]), virtio, buf_desc, buf + offset, data_len - offset, 0); offset += len; if (offset < data_len) { buf_desc->flags = VIRTIO_NEXT_FLAG; } buf_desc->length = len; } + buf_desc->flags &= ~VIRTIO_NEXT_FLAG; q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size]; - q->used->ring[q->used->index % q->queue_size].length = data_len + hdr_len; // This should be the total length of data sent to guest (header+pkt_data) + q->used->ring[q->used->index % q->queue_size].length = data_len + hdr_len; /* This should be the total length of data sent to guest (header+pkt_data) */ q->used->index++; - q->cur_avail_idx++; + + /* if there are certain num of pkts in the RX queue, notify guest + * so guest will exit to palacios + * when it returns, guest gets the virtio rx interrupt */ + if((++virtio->buffed_rx > q->queue_size/5) && + (q->avail->flags & VIRTIO_NO_IRQ_FLAG)) { + if(virtio->virtio_dev->vm->cores[0].cpu_id != V3_Get_CPU()){ + notify_guest(virtio); + virtio->rx_ipi_num ++; + } + virtio->buffed_rx = 0; + } } else { virtio->pkt_drop++; + /* RX queue is full, tell backend to stop RX on this device */ + virtio->net_ops->stop_rx(virtio->backend_data); + enable_cb(&virtio->rx_vq); + + virtio->rx_stop_times ++; + + ret_val = -ERR_VIRTIO_RXQ_FULL; goto exit; } @@ -566,25 +782,123 @@ static int virtio_rx(uint8_t * buf, uint32_t size, void * private_data) { ret_val = offset; exit: + + v3_unlock_irqrestore(virtio->rx_lock, flags); + + return ret_val; +} + + +#if 0 /* for encapuslation */ +/* virtio RX with encapulation version */ +static int virtio_rx_encap(uint8_t * data, + uint32_t size, + void * encap_header, + uint16_t encap_len, + void * private_data) { + struct virtio_net_state * virtio = (struct virtio_net_state *)private_data; + struct virtio_queue * q = &(virtio->rx_vq); + struct virtio_net_hdr_mrg_rxbuf hdr; + uint32_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + uint32_t data_len = size + encap_len; + unsigned long flags; + int ret_val = -ERR_VIRTIO_OTHER; + + if (q->ring_avail_addr == 0) { + PrintError("Queue is not set\n"); + ret_val = -ERR_VIRTIO_RXQ_NOSET; + goto exit; + } + + memset(&hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf)); + + flags = v3_lock_irqsave(virtio->rx_lock); + if (q->cur_avail_idx != q->avail->index){ + addr_t hdr_addr = 0; + uint16_t hdr_idx = q->avail->ring[q->cur_avail_idx % q->queue_size]; + uint16_t buf_idx = 0; + struct vring_desc * hdr_desc = NULL; + uint32_t offset = 0; + + hdr_desc = &(q->desc[hdr_idx]); + if (guest_pa_to_host_va(&(virtio->virtio_dev->vm->cores[0]), hdr_desc->addr_gpa, &(hdr_addr)) == -1) { + PrintError("Could not translate receive buffer address\n"); + goto exit; + } + hdr.num_buffers = 1; + memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr_mrg_rxbuf)); + if (offset >= data_len) { + hdr_desc->flags &= ~VIRTIO_NEXT_FLAG; + } + + struct vring_desc * buf_desc = NULL; + uint32_t len = 0; + buf_idx = hdr_desc->next; + buf_desc = &(q->desc[buf_idx]); + if(hdr_len > 0 && !encap_header) { + len = copy_data_to_desc(&(virtio->virtio_dev->vm->cores[0]), virtio, buf_desc, encap_header, encap_len, 0); + offset += len; + } + + len = copy_data_to_desc(&(virtio->virtio_dev->vm->cores[0]), virtio, buf_desc, data + offset, data_len - offset, offset); + offset += len; + + if(data_len < offset)/* if there is large pkt, need merge more buffer */ + PrintDebug("Virtio NIC: data pkt larger than RX queue buffer\n"); + + buf_desc->length = offset; + buf_desc->flags &= ~VIRTIO_NEXT_FLAG; + + q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size]; + q->used->ring[q->used->index % q->queue_size].length = data_len + hdr_len; /* This should be the total length of data sent to guest (header+pkt_data) */ + q->used->index++; + q->cur_avail_idx++; + + /* notify guest + * when it returns from EXIT, guest gets the virtio rx interrupt */ + if((++virtio->buffed_rx > q->queue_size/2) && + (q->avail->flags & VIRTIO_NO_IRQ_FLAG)) { + if(virtio->virtio_dev->vm->cores[0].cpu_id != V3_Get_CPU()){ + notify_guest(virtio); + } + virtio->buffed_rx = 0; + } + ret_val = offset; + } else { + virtio->pkt_drop++; + ret_val = -ERR_VIRTIO_RXQ_FULL; + goto exit; + } + virtio->pkt_recv ++; + + if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) { + PrintDebug("Raising IRQ %d\n", virtio->pci_dev->config_header.intr_line); + v3_pci_raise_irq(virtio->virtio_dev->pci_bus, 0, virtio->pci_dev); + virtio->virtio_cfg.pci_isr = 0x1; + } + #ifdef CONFIG_VNET_PROFILE if (virtio->pkt_recv % 50000 == 0){ long cur_time, time; rdtscll(cur_time); time = cur_time - virtio->last_recv_time; - PrintError("Virtio NIC: last recv 50000 cycles: %ld\n",time); - //PrintError("Virtio NIC: sent: %ld, rxed: %ld, dropped: %ld\n", - //virtio->pkt_sent, - //virtio->pkt_recv, - //virtio->pkt_drop); + PrintError("Virtio NIC: sent: %ld, rxed: %ld, dropped: %ld\n", + virtio->pkt_sent, + virtio->pkt_recv, + virtio->pkt_drop); rdtscll(virtio->last_recv_time); } #endif - v3_unlock_irqrestore(virtio->lock, flags); +exit: + + v3_unlock_irqrestore(virtio->rx_lock, flags); return ret_val; } +#endif + static struct v3_device_ops dev_ops = { @@ -594,6 +908,84 @@ static struct v3_device_ops dev_ops = { .stop = NULL, }; +#if 0 //temporary hacking LX +static struct virtio_net_state *vnic_states[2] = {NULL, NULL}; +static int num_vnic = 0; + +void vnic_polling(void *data){ + struct v3_vm_info *info = (struct v3_vm_info *)data; + if(vnic_states[0] != NULL && info == vnic_states[0]->virtio_dev->vm){ + handle_pkt_tx(&(info->cores[0]), vnic_states[0]); + } + + if(vnic_states[1] != NULL && info == vnic_states[1]->virtio_dev->vm){ + handle_pkt_tx(&(info->cores[0]), vnic_states[1]); + } +} +#endif + +/* TODO: Issue here: which vm info it needs? calling VM or the device's own VM? */ +static void virtio_nic_poll(struct v3_vm_info *vm, void *data){ + struct virtio_net_state *virtio = (struct virtio_net_state *)data; + + handle_pkt_tx(&(vm->cores[0]), virtio); + + virtio->tx_poll_times ++; + +#ifdef CONFIG_VNET_PROFILE + static uint64_t last_time = 0; + uint64_t time; + rdtscll(time); + if((time - last_time) > 5000000000){ + last_time = time; + print_profile_info(virtio); + } +#endif +} + +#if 0 /* NAPI */ +/* tx one pkt from guest */ +static int virtio_tx_pkt(struct guest_info *core, struct virtio_net_state * virtio_state){ + +} + +static void virtio_nic_poll(struct v3_vm_info *vm, void *data, int budget){ + + +} + +#endif + + +static void virtio_start_tx(void *data){ + struct virtio_net_state * virtio = (struct virtio_net_state *)data; + + /* do we need a lock here? */ + virtio->tx_disabled = 0; + + /* notify the device's guest it can start sending pkt */ + if(virtio->virtio_dev->vm->cores[0].cpu_id != V3_Get_CPU()){ + notify_guest(virtio); + } +} + +static void virtio_stop_tx(void *data){ + struct virtio_net_state * virtio = (struct virtio_net_state *)data; + + /* do we need a lock here? */ + virtio->tx_disabled = 1; + + /* how do we stop the guest to exit to palacios for sending pkt? */ + if(virtio->virtio_dev->vm->cores[0].cpu_id != V3_Get_CPU()){ + disable_cb(&virtio->tx_vq); + } + + virtio->tx_stop_times ++; +} + + + + static int register_dev(struct virtio_dev_state * virtio, struct virtio_net_state * net_state) { struct pci_device * pci_dev = NULL; @@ -629,7 +1021,7 @@ static int register_dev(struct virtio_dev_state * virtio, struct virtio_net_stat bars[0].private_data = net_state; pci_dev = v3_pci_register_device(virtio->pci_bus, PCI_STD_DEVICE, - 0, PCI_AUTO_DEV_NUM, 0, + 0, 4/*PCI_AUTO_DEV_NUM*/, 0, "LNX_VIRTIO_NIC", bars, NULL, NULL, NULL, net_state); @@ -651,15 +1043,21 @@ static int register_dev(struct virtio_dev_state * virtio, struct virtio_net_stat pci_dev->config_header.intr_pin = 1; pci_dev->config_header.max_latency = 1; // ?? (qemu does it...) - net_state->pci_dev = pci_dev; - net_state->virtio_cfg.host_features = 0; //no features support now - net_state->rx_vq.queue_size = QUEUE_SIZE; - net_state->tx_vq.queue_size = QUEUE_SIZE; - net_state->ctrl_vq.queue_size = CTRL_QUEUE_SIZE; + net_state->pci_dev = pci_dev; net_state->virtio_dev = virtio; - + + uchar_t mac[6] = {0x11,0x11,0x11,0x11,0x11,0x11}; + memcpy(net_state->net_cfg.mac, mac, 6); + + memcpy(pci_dev->config_data, net_state->net_cfg.mac, ETH_ALEN); + virtio_init_state(net_state); +#if 0 //temporary hacking LX + vnic_states[num_vnic ++] = net_state; + PrintError("VNIC: num of vnic %d\n", num_vnic); +#endif + return 0; } @@ -678,6 +1076,9 @@ static int connect_fn(struct v3_vm_info * info, net_state->backend_data = private_data; ops->recv = virtio_rx; + ops->poll = virtio_nic_poll; + ops->start_tx = virtio_start_tx; + ops->stop_tx = virtio_stop_tx; ops->frontend_data = net_state; return 0; @@ -686,9 +1087,9 @@ static int connect_fn(struct v3_vm_info * info, static int virtio_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { struct vm_device * pci_bus = v3_find_dev(vm, v3_cfg_val(cfg, "bus")); struct virtio_dev_state * virtio_state = NULL; - char * dev_id = v3_cfg_val(cfg, "ID"); + char * name = v3_cfg_val(cfg, "name"); - PrintDebug("Virtio NIC: Initializing VIRTIO Network device: %s\n", dev_id); + PrintDebug("Virtio NIC: Initializing VIRTIO Network device: %s\n", name); if (pci_bus == NULL) { PrintError("Virtio NIC: VirtIO devices require a PCI Bus"); @@ -702,14 +1103,14 @@ static int virtio_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { virtio_state->pci_bus = pci_bus; virtio_state->vm = vm; - struct vm_device * dev = v3_allocate_device(dev_id, &dev_ops, virtio_state); + struct vm_device * dev = v3_allocate_device(name, &dev_ops, virtio_state); if (v3_attach_device(vm, dev) == -1) { - PrintError("Virtio NIC: Could not attach device %s\n", dev_id); + PrintError("Virtio NIC: Could not attach device %s\n", name); return -1; } - if (v3_dev_add_net_frontend(vm, dev_id, connect_fn, (void *)virtio_state) == -1) { - PrintError("Virtio NIC: Could not register %s as net frontend\n", dev_id); + if (v3_dev_add_net_frontend(vm, name, connect_fn, (void *)virtio_state) == -1) { + PrintError("Virtio NIC: Could not register %s as net frontend\n", name); return -1; } diff --git a/palacios/src/devices/lnx_virtio_vnet.c b/palacios/src/devices/lnx_virtio_vnet.c index 4569bc7..435df48 100644 --- a/palacios/src/devices/lnx_virtio_vnet.c +++ b/palacios/src/devices/lnx_virtio_vnet.c @@ -13,7 +13,7 @@ * All rights reserved. * * Author: Jack Lange - * Lei Xia + * Lei Xia * * This is free software. You are permitted to use, * redistribute, and modify it as specified in the file "V3VEE_LICENSE". @@ -63,13 +63,7 @@ struct virtio_vnet_state { int io_range_size; v3_lock_t lock; - uint32_t pkt_sent; - uint32_t pkt_recv; - uint32_t pkt_drop; - uint32_t tx_exit; - uint32_t rx_exit; - uint32_t total_exit; - + ulong_t pkt_sent, pkt_recv, pkt_drop, tx_exit, rx_exit, total_exit; int ready; }; @@ -149,8 +143,7 @@ static int handle_cmd_kick(struct guest_info * core, struct virtio_vnet_state * uint8_t status = 0; - PrintDebug("VNET Bridge: CMD: Descriptor Count=%d, index=%d, desc_idx=%d\n", - desc_cnt, q->cur_avail_idx % QUEUE_SIZE, desc_idx); + PrintDebug("VNET Bridge: CMD: Descriptor Count=%d, index=%d, desc_idx=%d\n", desc_cnt, q->cur_avail_idx % QUEUE_SIZE, desc_idx); if (desc_cnt < 3) { PrintError("VNET Bridge cmd must include at least 3 descriptors (cnt=%d)\n", desc_cnt); @@ -235,25 +228,23 @@ static int vnet_pkt_input_cb(struct v3_vm_info * vm, struct v3_vnet_pkt vnet_pk int ret_val = -1; unsigned long flags; uint16_t sent; - struct v3_vnet_pkt * pkt = NULL; + struct v3_vnet_pkt *pkt; - if (pkt_num <= 0) { + if(pkt_num <= 0) return 0; - } flags = v3_lock_irqsave(vnet_state->lock); if (q->ring_avail_addr == 0) { PrintError("Queue is not set\n"); - v3_unlock_irqrestore(vnet_state->lock, flags); - return ret_val; + goto exit; } PrintDebug("VNET Bridge: RX: running on cpu: %d, num of pkts: %d\n", V3_Get_CPU(), pkt_num); - for (sent = 0; sent < pkt_num; sent++) { + for(sent = 0; sent < pkt_num; sent ++) { pkt = &vnet_pkts[sent]; - vnet_state->pkt_recv++; + vnet_state->pkt_recv ++; if (q->cur_avail_idx != q->avail->index) { uint16_t pkt_idx = q->avail->ring[q->cur_avail_idx % q->queue_size]; @@ -265,8 +256,7 @@ static int vnet_pkt_input_cb(struct v3_vm_info * vm, struct v3_vnet_pkt vnet_pk if (v3_gpa_to_hva(&(vm->cores[0]), pkt_desc->addr_gpa, (addr_t *)&(virtio_pkt)) == -1) { PrintError("Could not translate buffer address\n"); - v3_unlock_irqrestore(vnet_state->lock, flags); - return ret_val; + goto exit; } PrintDebug("VNET Bridge: RX: pkt sent to guest pkt size: %d, dst link: %d\n", pkt->size, pkt->dst_id); @@ -282,14 +272,13 @@ static int vnet_pkt_input_cb(struct v3_vm_info * vm, struct v3_vnet_pkt vnet_pk q->used->index++; q->cur_avail_idx++; } else { - vnet_state->pkt_drop++; - v3_vnet_disable_bridge(); + vnet_state->pkt_drop ++; + //v3_vnet_disable_bridge(); } } - if (sent == 0) { - v3_unlock_irqrestore(vnet_state->lock, flags); - return ret_val; + if(sent == 0){ + goto exit; } if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) { @@ -302,39 +291,36 @@ static int vnet_pkt_input_cb(struct v3_vm_info * vm, struct v3_vnet_pkt vnet_pk #ifdef CONFIG_VNET_PROFILE - if (vnet_state->pkt_recv % 200000 == 0) + if (vnet_state->pkt_recv % 20000 == 0) PrintError("Vnet Bridge: sent: %ld, rxed: %ld, dropped: %ld, total exit: %ld, tx exit: %ld, rx exit: %ld\n", - vnet_state->pkt_sent, - vnet_state->pkt_recv, - vnet_state->pkt_drop, - vnet_state->total_exit, - vnet_state->tx_exit, - vnet_state->rx_exit); + vnet_state->pkt_sent, + vnet_state->pkt_recv, + vnet_state->pkt_drop, + vnet_state->total_exit, + vnet_state->tx_exit, + vnet_state->rx_exit); #endif - v3_unlock_irqrestore(vnet_state->lock, flags); +exit: + v3_unlock_irqrestore(vnet_state->lock, flags); + return ret_val; - } -static void vnet_pkt_input_xcall(void * data) { - struct v3_vnet_bridge_input_args * args = (struct v3_vnet_bridge_input_args *)data; +static int vnet_pkt_input_xcall(void *data){ + struct v3_vnet_bridge_xcall_args *args = (struct v3_vnet_bridge_xcall_args *)data; - vnet_pkt_input_cb(args->vm, args->vnet_pkts, args->pkt_num, args->private_data); + return vnet_pkt_input_cb(args->vm, args->vnet_pkts, args->pkt_num, args->private_data); } -static int handle_pkt_kick(struct guest_info * core, struct virtio_vnet_state * vnet_state) { +static int handle_pkt_kick(struct guest_info *core, struct virtio_vnet_state * vnet_state) +{ struct virtio_queue * q = &(vnet_state->queue[XMIT_QUEUE]); - unsigned long flags = 0; int recvd = 0; - int cpu = V3_Get_CPU(); - - flags = v3_lock_irqsave(vnet_state->lock); - + if (q->ring_avail_addr == 0) { - v3_unlock_irqrestore(vnet_state->lock,flags); - return 0; + return -1; } while (q->cur_avail_idx != q->avail->index) { @@ -351,71 +337,63 @@ static int handle_pkt_kick(struct guest_info * core, struct virtio_vnet_state * return -1; } - PrintDebug("VNET Bridge: TX: on cpu %d pkt size: %d, dst link: %d\n", cpu, virtio_pkt->pkt_size, virtio_pkt->link_id); - - v3_vnet_rx(virtio_pkt->pkt, virtio_pkt->pkt_size, virtio_pkt->link_id, LINK_EDGE); + //PrintDebug("VNET Bridge: TX: on cpu %d pkt size: %d, dst link: %d\n", cpu, virtio_pkt->pkt_size, virtio_pkt->link_id); + + struct v3_vnet_pkt pkt; + pkt.size = virtio_pkt->pkt_size; + pkt.src_type = LINK_EDGE; + pkt.src_id = 0; + memcpy(pkt.header, virtio_pkt->pkt, ETHERNET_HEADER_LEN); + pkt.data = virtio_pkt->pkt; + v3_vnet_send_pkt(&pkt, NULL); + q->used->ring[q->used->index % q->queue_size].id = q->avail->ring[q->cur_avail_idx % q->queue_size]; q->used->ring[q->used->index % q->queue_size].length = pkt_desc->length; // What do we set this to???? q->used->index++; - vnet_state->pkt_sent++; - recvd++; + vnet_state->pkt_sent ++; + recvd ++; q->cur_avail_idx++; } - if (recvd == 0) { - v3_unlock_irqrestore(vnet_state->lock,flags); + if(recvd == 0){ return 0; } - //PrintError("In polling get %d\n", recvd); - - //if on the dom0 core, interrupt the domU core to poll pkts - //otherwise, call the polling directly - - - if (vnet_state->vm->cores[0].cpu_id == cpu) { - cpu = (cpu == 0) ? 1 : 0; - v3_interrupt_cpu(vnet_state->vm, cpu, V3_VNET_POLLING_VECTOR); - } else { - v3_vnet_polling(); - } - - if ((vnet_state->pkt_sent % (QUEUE_SIZE/20)) == 0) { - //optimized for guest's, batch the interrupts - - if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) { + if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) { v3_pci_raise_irq(vnet_state->pci_bus, 0, vnet_state->pci_dev); vnet_state->virtio_cfg.pci_isr = 0x1; - } } - + + //PrintError("Virtio VNET: polling %d pkts\n", recvd); + #ifdef CONFIG_VNET_PROFILE - if (vnet_state->pkt_sent % 200000 == 0) + if (vnet_state->pkt_sent % 20000 == 0) PrintError("Vnet Bridge: sent: %ld, rxed: %ld, dropped: %ld, total exit: %ld, tx exit: %ld, rx exit: %ld\n", - vnet_state->pkt_sent, - vnet_state->pkt_recv, - vnet_state->pkt_drop, - vnet_state->total_exit, - vnet_state->tx_exit, - vnet_state->rx_exit); + vnet_state->pkt_sent, + vnet_state->pkt_recv, + vnet_state->pkt_drop, + vnet_state->total_exit, + vnet_state->tx_exit, + vnet_state->rx_exit); #endif - v3_unlock_irqrestore(vnet_state->lock,flags); - return 0; } -static int polling_pkt_from_guest(struct v3_vm_info * vm, void *private_data) { +static void vnet_virtio_poll(struct v3_vm_info * vm, void *private_data){ struct virtio_vnet_state * vnet_state = (struct virtio_vnet_state *)private_data; - - return handle_pkt_kick(&(vm->cores[0]), vnet_state); + + if(vm == vnet_state->vm){ + handle_pkt_kick(&(vm->cores[0]), vnet_state); + } } -static int handle_rx_kick(struct guest_info *core, struct virtio_vnet_state * vnet_state) { - v3_vnet_enable_bridge(); +static int handle_rx_kick(struct guest_info *core, struct virtio_vnet_state * vnet_state) +{ + //v3_vnet_enable_bridge(); return 0; } @@ -426,79 +404,71 @@ static int vnet_virtio_io_write(struct guest_info * core, uint16_t port, void * PrintDebug("VNET Bridge: VIRTIO VNET Write for port %d len=%d, value=%x\n", port, length, *(uint32_t *)src); - PrintDebug("VNET Bridge: port idx=%d\n", port_idx); - - vnet_state->total_exit++; + vnet_state->total_exit ++; switch (port_idx) { case GUEST_FEATURES_PORT: - if (length != 4) { PrintError("Illegal write length for guest features\n"); return -1; } - vnet_state->virtio_cfg.guest_features = *(uint32_t *)src; break; - case VRING_PG_NUM_PORT: { + case VRING_PG_NUM_PORT: + if (length == 4) { + addr_t pfn = *(uint32_t *)src; + addr_t page_addr = (pfn << VIRTIO_PAGE_SHIFT); - addr_t pfn = *(uint32_t *)src; - addr_t page_addr = (pfn << VIRTIO_PAGE_SHIFT); + vnet_state->cur_queue->pfn = pfn; + + vnet_state->cur_queue->ring_desc_addr = page_addr ; + vnet_state->cur_queue->ring_avail_addr = page_addr + (QUEUE_SIZE * sizeof(struct vring_desc)); + vnet_state->cur_queue->ring_used_addr = ( vnet_state->cur_queue->ring_avail_addr + \ + sizeof(struct vring_avail) + \ + (QUEUE_SIZE * sizeof(uint16_t))); + + // round up to next page boundary. + vnet_state->cur_queue->ring_used_addr = (vnet_state->cur_queue->ring_used_addr + 0xfff) & ~0xfff; - if (length != 4) { - PrintError("Illegal write length for page frame number\n"); - return -1; - } - + if (v3_gpa_to_hva(core, vnet_state->cur_queue->ring_desc_addr, (addr_t *)&(vnet_state->cur_queue->desc)) == -1) { + PrintError("Could not translate ring descriptor address\n"); + return -1; + } - vnet_state->cur_queue->pfn = pfn; - - vnet_state->cur_queue->ring_desc_addr = page_addr ; - vnet_state->cur_queue->ring_avail_addr = page_addr + (QUEUE_SIZE * sizeof(struct vring_desc)); - vnet_state->cur_queue->ring_used_addr = ( vnet_state->cur_queue->ring_avail_addr + \ - sizeof(struct vring_avail) + \ - (QUEUE_SIZE * sizeof(uint16_t))); - - // round up to next page boundary. - vnet_state->cur_queue->ring_used_addr = (vnet_state->cur_queue->ring_used_addr + 0xfff) & ~0xfff; - - if (v3_gpa_to_hva(core, vnet_state->cur_queue->ring_desc_addr, (addr_t *)&(vnet_state->cur_queue->desc)) == -1) { - PrintError("Could not translate ring descriptor address\n"); - return -1; - } - - if (v3_gpa_to_hva(core, vnet_state->cur_queue->ring_avail_addr, (addr_t *)&(vnet_state->cur_queue->avail)) == -1) { - PrintError("Could not translate ring available address\n"); - return -1; - } - - if (v3_gpa_to_hva(core, vnet_state->cur_queue->ring_used_addr, (addr_t *)&(vnet_state->cur_queue->used)) == -1) { - PrintError("Could not translate ring used address\n"); + if (v3_gpa_to_hva(core, vnet_state->cur_queue->ring_avail_addr, (addr_t *)&(vnet_state->cur_queue->avail)) == -1) { + PrintError("Could not translate ring available address\n"); + return -1; + } + + if (v3_gpa_to_hva(core, vnet_state->cur_queue->ring_used_addr, (addr_t *)&(vnet_state->cur_queue->used)) == -1) { + PrintError("Could not translate ring used address\n"); + return -1; + } + + PrintDebug("VNET Bridge: RingDesc_addr=%p, Avail_addr=%p, Used_addr=%p\n", + (void *)(vnet_state->cur_queue->ring_desc_addr), + (void *)(vnet_state->cur_queue->ring_avail_addr), + (void *)(vnet_state->cur_queue->ring_used_addr)); + + PrintDebug("VNET Bridge: RingDesc=%p, Avail=%p, Used=%p\n", + vnet_state->cur_queue->desc, vnet_state->cur_queue->avail, vnet_state->cur_queue->used); + + if(vnet_state->queue[RECV_QUEUE].avail != NULL){ + vnet_state->ready = 1; + vnet_state->queue[RECV_QUEUE].used->flags |= VRING_NO_NOTIFY_FLAG; + } + + //No notify when there is pkt tx from guest + //palacios will do the polling + if(vnet_state->queue[XMIT_QUEUE].used != NULL){ + vnet_state->queue[XMIT_QUEUE].used->flags |= VRING_NO_NOTIFY_FLAG; + } + } else { + PrintError("Illegal write length for page frame number\n"); return -1; } - - PrintDebug("VNET Bridge: RingDesc_addr=%p, Avail_addr=%p, Used_addr=%p\n", - (void *)(vnet_state->cur_queue->ring_desc_addr), - (void *)(vnet_state->cur_queue->ring_avail_addr), - (void *)(vnet_state->cur_queue->ring_used_addr)); - - PrintDebug("VNET Bridge: RingDesc=%p, Avail=%p, Used=%p\n", - vnet_state->cur_queue->desc, - vnet_state->cur_queue->avail, - vnet_state->cur_queue->used); - - if (vnet_state->queue[RECV_QUEUE].avail != NULL){ - vnet_state->ready = 1; - } - - //No notify when there is pkt tx from guest - if (vnet_state->queue[XMIT_QUEUE].used != NULL) { - vnet_state->queue[XMIT_QUEUE].used->flags |= VRING_NO_NOTIFY_FLAG; - } - break; - } case VRING_Q_SEL_PORT: vnet_state->virtio_cfg.vring_queue_selector = *(uint16_t *)src; @@ -526,8 +496,7 @@ static int vnet_virtio_io_write(struct guest_info * core, uint16_t port, void * PrintError("Could not handle Virtio VNET TX\n"); return -1; } - vnet_state->tx_exit ++; - //PrintError("Notify on TX\n"); + PrintError("Notify on TX\n"); } else if (queue_idx == 2) { if (handle_rx_kick(core, vnet_state) == -1){ PrintError("Could not handle Virtio RX buffer refills Kick\n"); @@ -644,9 +613,9 @@ static int dev_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { struct vm_device * pci_bus = v3_find_dev(vm, v3_cfg_val(cfg, "bus")); struct virtio_vnet_state * vnet_state = NULL; struct pci_device * pci_dev = NULL; - char * dev_id = v3_cfg_val(cfg, "ID"); + char * name = v3_cfg_val(cfg, "name"); - PrintDebug("VNET Bridge: Initializing VNET Bridge Control device: %s\n", dev_id); + PrintDebug("VNET Bridge: Initializing VNET Bridge Control device: %s\n", name); if (pci_bus == NULL) { PrintError("VNET Bridge device require a PCI Bus"); @@ -658,10 +627,10 @@ static int dev_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { vnet_state->vm = vm; - struct vm_device * dev = v3_allocate_device(dev_id, &dev_ops, vnet_state); + struct vm_device * dev = v3_allocate_device(name, &dev_ops, vnet_state); if (v3_attach_device(vm, dev) == -1) { - PrintError("Could not attach device %s\n", dev_id); + PrintError("Could not attach device %s\n", name); return -1; } @@ -699,7 +668,7 @@ static int dev_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { bars[0].private_data = vnet_state; pci_dev = v3_pci_register_device(pci_bus, PCI_STD_DEVICE, - 0, PCI_AUTO_DEV_NUM, 0, + 0, 5 /*PCI_AUTO_DEV_NUM*/, 0, "LNX_VIRTIO_VNET", bars, NULL, NULL, NULL, vnet_state); @@ -724,9 +693,13 @@ static int dev_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { virtio_reset(vnet_state); - V3_Print("Registering Virtio device as vnet bridge\n"); - v3_vnet_add_bridge(vm, vnet_pkt_input_cb, vnet_pkt_input_xcall, polling_pkt_from_guest, 0, 500000, (void *)vnet_state); + struct v3_vnet_bridge_ops brg_ops; + brg_ops.input = vnet_pkt_input_cb; + brg_ops.polling_pkt = vnet_virtio_poll; + brg_ops.xcall_input = vnet_pkt_input_xcall; + V3_Print("Registering Virtio device as vnet bridge\n"); + v3_vnet_add_bridge(vm, &brg_ops, (void *)vnet_state); return 0; } diff --git a/palacios/src/devices/vnet_nic.c b/palacios/src/devices/vnet_nic.c index 479593b..ff039e2 100644 --- a/palacios/src/devices/vnet_nic.c +++ b/palacios/src/devices/vnet_nic.c @@ -7,8 +7,8 @@ * and the University of New Mexico. You can find out more at * http://www.v3vee.org * - * Copyright (c) 2008, Lei Xia - * Copyright (c) 2008, The V3VEE Project + * Copyright (c) 2010, Lei Xia + * Copyright (c) 2010, The V3VEE Project * All rights reserved. * * Author: Lei Xia @@ -40,11 +40,27 @@ struct vnet_nic_state { int vnet_dev_id; }; +/* called by frontend device, + * tell the VNET can start sending pkt to it */ +static void start_rx(void *private_data){ + struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data; -static int vnet_send(uint8_t * buf, uint32_t len, void * private_data, struct vm_device * dest_dev){ - struct vnet_nic_state * vnetnic = (struct vnet_nic_state *)private_data; - struct v3_vnet_pkt pkt; + v3_vnet_enable_device(vnetnic->vnet_dev_id); +} + +/* called by frontend device, + * tell the VNET stop sending pkt to it */ +static void stop_rx(void *private_data){ + struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data; + + v3_vnet_disable_device(vnetnic->vnet_dev_id); +} + +/* called by frontend, send pkt to VNET */ +static int vnet_nic_send(uint8_t * buf, uint32_t len, void * private_data, struct vm_device *dest_dev){ + struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data; + struct v3_vnet_pkt pkt; pkt.size = len; pkt.src_type = LINK_INTERFACE; pkt.src_id = vnetnic->vnet_dev_id; @@ -58,39 +74,55 @@ static int vnet_send(uint8_t * buf, uint32_t len, void * private_data, struct vm v3_hexdump(buf, len, NULL, 0); } #endif -/* - v3_vnet_rx(buf, len, vnetnic->vnet_dev_id, LINK_INTERFACE); - - //if on the dom0 core, interrupt the domU core to poll pkts - //otherwise, call the polling directly - int cpu = V3_Get_CPU(); - cpu = (cpu == 0)?1:0; - v3_interrupt_cpu(vnetnic->vm, cpu, V3_VNET_POLLING_VECTOR); - */ - - v3_vnet_send_pkt(&pkt, NULL); - return 0; + return v3_vnet_send_pkt(&pkt, NULL);; } + +/* called by VNET, + * send pkt to frontend device */ static int virtio_input(struct v3_vm_info *info, struct v3_vnet_pkt * pkt, void * private_data){ struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data; - PrintDebug("Vnet-nic: In input: vnet_nic state %p\n", vnetnic); + return vnetnic->net_ops.recv(pkt->data, + pkt->size, + vnetnic->net_ops.frontend_data); +} + +/* called by VNET, + * tell frontend device to poll data from guest */ +static void virtio_poll(struct v3_vm_info *info, void * private_data){ + struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data; - return vnetnic->net_ops.recv(pkt->data, pkt->size, vnetnic->net_ops.frontend_data); + vnetnic->net_ops.poll(info, vnetnic->net_ops.frontend_data); } -static int register_to_vnet(struct v3_vm_info * vm, - struct vnet_nic_state * vnet_nic, - char * dev_name, - uint8_t mac[6]) { - - PrintDebug("Vnet-nic: register Vnet-nic device %s, state %p to VNET\n", dev_name, vnet_nic); - - return v3_vnet_add_dev(vm, mac, virtio_input, (void *)vnet_nic); +/* called by VNET, from different processor */ +static void virtio_poll_xcall(void *data){ + struct v3_vnet_dev_xcall_args *args = (struct v3_vnet_dev_xcall_args *)data; + struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)args->private_data; + + if(args->vm == vnetnic->vm) /*only do polling on the same Virtual Machine */ + virtio_poll(args->vm, args->private_data); +} + +/* called by VNET, + * tell the frontend to start sending pkt to VNET*/ +static void start_tx(void *private_data){ + struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data; + + vnetnic->net_ops.start_tx(vnetnic->net_ops.frontend_data); } +/* called by VNET + * tell the frontend device to stop sending pkt to VNET*/ +static void stop_tx(void *private_data){ + struct vnet_nic_state *vnetnic = (struct vnet_nic_state *)private_data; + + vnetnic->net_ops.stop_tx(vnetnic->net_ops.frontend_data); +} + + static int vnet_nic_free(struct vm_device * dev) { return 0; } @@ -102,25 +134,36 @@ static struct v3_device_ops dev_ops = { .stop = NULL, }; +static struct v3_vnet_dev_ops vnet_dev_ops = { + .input = virtio_input, + .poll = virtio_poll, + .poll_xcall = virtio_poll_xcall, + .start_tx = start_tx, + .stop_tx = stop_tx, +}; -static int str2mac(char * macstr, uint8_t mac[6]){ - uint8_t hex[2]; - int i = 0; - char * s = macstr; - while (s) { - memcpy(hex, s, 2); - mac[i++] = (char)atox(hex); +static int register_to_vnet(struct v3_vm_info * vm, + struct vnet_nic_state *vnet_nic, + char *dev_name, + uchar_t mac[6]) { + + PrintDebug("Vnet-nic: register Vnet-nic device %s, state %p to VNET\n", dev_name, vnet_nic); + + return v3_vnet_add_dev(vm, mac, &vnet_dev_ops, (void *)vnet_nic); +} - if (i == 6) { - return 0; - } - s = strchr(s, ':'); +static int str2mac(char *macstr, char mac[6]){ + char hex[2], *s = macstr; + int i = 0; - if (s) { - s++; - } + while(s){ + memcpy(hex, s, 2); + mac[i++] = (char)atox(hex); + if (i == 6) return 0; + s=strchr(s, ':'); + if(s) s++; } return -1; @@ -128,53 +171,129 @@ static int str2mac(char * macstr, uint8_t mac[6]){ static int vnet_nic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { struct vnet_nic_state * vnetnic = NULL; - char * dev_id = v3_cfg_val(cfg, "ID"); + char * name = v3_cfg_val(cfg, "name"); char * macstr = NULL; - int vnet_dev_id = 0; - v3_cfg_tree_t * frontend_cfg = v3_cfg_subtree(cfg, "frontend"); + char mac[6]; + int vnet_dev_id; + v3_cfg_tree_t * frontend_cfg = v3_cfg_subtree(cfg, "frontend"); macstr = v3_cfg_val(frontend_cfg, "mac"); if (macstr == NULL) { - PrintDebug("Vnet-nic configuration error: No Mac specified\n"); - return -1; + PrintDebug("Vnet-nic: No Mac specified\n"); + } else { + str2mac(macstr, mac); } vnetnic = (struct vnet_nic_state *)V3_Malloc(sizeof(struct vnet_nic_state)); memset(vnetnic, 0, sizeof(struct vnet_nic_state)); - struct vm_device * dev = v3_allocate_device(dev_id, &dev_ops, vnetnic); + struct vm_device * dev = v3_allocate_device(name, &dev_ops, vnetnic); if (v3_attach_device(vm, dev) == -1) { - PrintError("Could not attach device %s\n", dev_id); + PrintError("Could not attach device %s\n", name); return -1; } - - - vnetnic->net_ops.send = vnet_send; - str2mac(macstr, vnetnic->mac); + vnetnic->net_ops.send = vnet_nic_send; + vnetnic->net_ops.start_rx = start_rx; + vnetnic->net_ops.stop_rx = stop_rx; + memcpy(vnetnic->mac, mac, 6); vnetnic->vm = vm; if (v3_dev_connect_net(vm, v3_cfg_val(frontend_cfg, "tag"), &(vnetnic->net_ops), frontend_cfg, vnetnic) == -1) { PrintError("Could not connect %s to frontend %s\n", - dev_id, v3_cfg_val(frontend_cfg, "tag")); + name, v3_cfg_val(frontend_cfg, "tag")); return -1; } PrintDebug("Vnet-nic: Connect %s to frontend %s\n", - dev_id, v3_cfg_val(frontend_cfg, "tag")); + name, v3_cfg_val(frontend_cfg, "tag")); - if ((vnet_dev_id = register_to_vnet(vm, vnetnic, dev_id, vnetnic->mac)) == -1) { - PrintError("Vnet-nic device %s (mac: %s) fails to registered to VNET\n", dev_id, macstr); - return -1; + if ((vnet_dev_id = register_to_vnet(vm, vnetnic, name, vnetnic->mac)) == -1) { + PrintError("Vnet-nic device %s (mac: %s) fails to registered to VNET\n", name, macstr); } - vnetnic->vnet_dev_id = vnet_dev_id; - PrintDebug("Vnet-nic device %s (mac: %s, %ld) registered to VNET\n", dev_id, macstr, *((uint32_t *)vnetnic->mac)); + PrintDebug("Vnet-nic device %s (mac: %s, %ld) registered to VNET\n", + name, macstr, *((ulong_t *)vnetnic->mac)); + + +//for temporary hack for vnet bridge test +#if 0 + { + uchar_t zeromac[6] = {0,0,0,0,0,0}; + + if(!strcmp(name, "vnet_nic")){ + struct v3_vnet_route route; + + route.dst_id = vnet_dev_id; + route.dst_type = LINK_INTERFACE; + route.src_id = 0; + route.src_type = LINK_EDGE; + memcpy(route.dst_mac, zeromac, 6); + route.dst_mac_qual = MAC_ANY; + memcpy(route.src_mac, zeromac, 6); + route.src_mac_qual = MAC_ANY; + v3_vnet_add_route(route); + + + route.dst_id = 0; + route.dst_type = LINK_EDGE; + route.src_id = vnet_dev_id; + route.src_type = LINK_INTERFACE; + memcpy(route.dst_mac, zeromac, 6); + route.dst_mac_qual = MAC_ANY; + memcpy(route.src_mac, zeromac, 6); + route.src_mac_qual = MAC_ANY; + + v3_vnet_add_route(route); + } + } +#endif + +//for temporary hack for Linux bridge (w/o encapuslation) test +#if 1 + { + static int vnet_nic_guestid = -1; + static int vnet_nic_dom0 = -1; + uchar_t zeromac[6] = {0,0,0,0,0,0}; + + if(!strcmp(name, "vnet_nic")){ //domu + vnet_nic_guestid = vnet_dev_id; + } + if (!strcmp(name, "vnet_nic_dom0")){ + vnet_nic_dom0 = vnet_dev_id; + } + if(vnet_nic_guestid != -1 && vnet_nic_dom0 !=-1){ + struct v3_vnet_route route; + + route.src_id = vnet_nic_guestid; + route.src_type = LINK_INTERFACE; + route.dst_id = vnet_nic_dom0; + route.dst_type = LINK_INTERFACE; + memcpy(route.dst_mac, zeromac, 6); + route.dst_mac_qual = MAC_ANY; + memcpy(route.src_mac, zeromac, 6); + route.src_mac_qual = MAC_ANY; + v3_vnet_add_route(route); + + + route.src_id = vnet_nic_dom0; + route.src_type = LINK_INTERFACE; + route.dst_id = vnet_nic_guestid; + route.dst_type = LINK_INTERFACE; + memcpy(route.dst_mac, zeromac, 6); + route.dst_mac_qual = MAC_ANY; + memcpy(route.src_mac, zeromac, 6); + route.src_mac_qual = MAC_ANY; + + v3_vnet_add_route(route); + } + } +#endif return 0; } diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c index 8e45d5b..1510d7c 100644 --- a/palacios/src/palacios/vmm.c +++ b/palacios/src/palacios/vmm.c @@ -98,7 +98,7 @@ void Init_V3(struct v3_os_hooks * hooks, int num_cpus) { #ifdef CONFIG_VNET - V3_init_vnet(); + v3_init_vnet(); #endif if ((hooks) && (hooks->call_on_cpu)) { diff --git a/palacios/src/palacios/vmm_vnet.c b/palacios/src/palacios/vmm_vnet.c index d7455ac..1ec015e 100644 --- a/palacios/src/palacios/vmm_vnet.c +++ b/palacios/src/palacios/vmm_vnet.c @@ -7,7 +7,7 @@ * and the University of New Mexico. You can find out more at * http://www.v3vee.org * - * Copyright (c) 2009, Lei Xia + * Copyright (c) 2010, Lei Xia * Copyright (c) 2009, Yuan Tang * Copyright (c) 2009, The V3VEE Project * All rights reserved. @@ -31,57 +31,75 @@ #endif +/* for UDP encapuslation */ +struct eth_header { + uchar_t dest[6]; + uchar_t src[6]; + uint16_t type; +}__attribute__((packed)); + +struct ip_header { + uint8_t version: 4; + uint8_t hdr_len: 4; + uchar_t tos; + uint16_t total_len; + uint16_t id; + uint8_t flags: 3; + uint16_t offset: 13; + uchar_t ttl; + uchar_t proto; + uint16_t cksum; + uint32_t src_addr; + uint32_t dst_addr; +}__attribute__((packed)); + +struct udp_header { + uint16_t src_port; + uint16_t dst_port; + uint16_t len; + uint16_t csum;//set to zero, disable the xsum +}__attribute__((packed)); + +struct udp_link_header { + struct eth_header eth_hdr; + struct ip_header ip_hdr; + struct udp_header udp_hdr; +}__attribute__((packed)); +/* end with UDP encapuslation structures */ + + + struct eth_hdr { uint8_t dst_mac[6]; uint8_t src_mac[6]; - uint16_t type; // indicates layer 3 protocol type + uint16_t type; /* indicates layer 3 protocol type */ } __attribute__((packed)); - - - struct vnet_dev { - + int dev_id; uint8_t mac_addr[6]; struct v3_vm_info * vm; - - int (*input)(struct v3_vm_info * vm, struct v3_vnet_pkt * pkt, void * private_data); + struct v3_vnet_dev_ops dev_ops; void * private_data; + + int rx_disabled; - int dev_id; struct list_head node; } __attribute__((packed)); -#define BRIDGE_BUF_SIZE 512 -struct bridge_pkts_buf { - int start, end; - int num; - v3_lock_t lock; - struct v3_vnet_pkt pkts[BRIDGE_BUF_SIZE]; - uint8_t datas[ETHERNET_PACKET_LEN * BRIDGE_BUF_SIZE]; -}; - struct vnet_brg_dev { struct v3_vm_info * vm; - - int (*input)(struct v3_vm_info * vm, struct v3_vnet_pkt pkt[], uint16_t pkt_num, void * private_data); - void (*xcall_input)(void * data); - int (*polling_pkt)(struct v3_vm_info * vm, void * private_data); + struct v3_vnet_bridge_ops brg_ops; int disabled; - - uint16_t max_delayed_pkts; - long max_latency; //in cycles void * private_data; } __attribute__((packed)); - - struct vnet_route_info { struct v3_vnet_route route_def; @@ -101,6 +119,14 @@ struct route_list { } __attribute__((packed)); +#define BUF_SIZE 4096 +struct pkts_buf { + int start, end; + int num; + v3_lock_t lock; + struct v3_vnet_pkt pkts[BUF_SIZE]; +}; + static struct { struct list_head routes; @@ -109,66 +135,60 @@ static struct { int num_routes; int num_devs; - struct vnet_brg_dev * bridge; + struct vnet_brg_dev *bridge; v3_lock_t lock; - struct hashtable * route_cache; + uint8_t sidecores; /* 0 -vnet not running on sidecore, > 0, number of extra cores that can be used by VNET */ + uint64_t cores_map; /* bitmaps for which cores can be used by VNET for sidecore, maxium 64 */ - struct bridge_pkts_buf in_buf; //incoming packets buffer + struct hashtable * route_cache; } vnet_state; #ifdef CONFIG_DEBUG_VNET -static inline void mac_to_string(uint8_t mac[6], char * buf) { +static inline void mac_to_string(char mac[6], char * buf) { snprintf(buf, 100, "%d:%d:%d:%d:%d:%d", mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); } -static void print_route(struct vnet_route_info * route){ +static void print_route(struct vnet_route_info *route){ char str[50]; - memset(str, 0, 50); - mac_to_string(route->route_def.src_mac, str); PrintDebug("Src Mac (%s), src_qual (%d)\n", str, route->route_def.src_mac_qual); - mac_to_string(route->route_def.dst_mac, str); PrintDebug("Dst Mac (%s), dst_qual (%d)\n", str, route->route_def.dst_mac_qual); - PrintDebug("Src dev id (%d), src type (%d)", route->route_def.src_id, route->route_def.src_type); - PrintDebug("Dst dev id (%d), dst type (%d)\n", route->route_def.dst_id, route->route_def.dst_type); - if (route->route_def.dst_type == LINK_INTERFACE) { - PrintDebug("dst_dev (%p), dst_dev_id (%d), dst_dev_input (%p), dst_dev_data (%p)\n", - route->dst_dev, - route->dst_dev->dev_id, - route->dst_dev->input, - route->dst_dev->private_data); + PrintDebug("dst_dev (%p), dst_dev_id (%d), dst_dev_ops(%p), dst_dev_data (%p)\n", + route->dst_dev, + route->dst_dev->dev_id, + (void *)&(route->dst_dev->dev_ops), + route->dst_dev->private_data); } } -static void dump_routes() { - struct vnet_route_info * route = NULL; - int i = 0; +static void dump_routes(){ + struct vnet_route_info *route; + int i = 0; PrintDebug("\n========Dump routes starts ============\n"); - list_for_each_entry(route, &(vnet_state.routes), node) { - PrintDebug("\nroute %d:\n", i++); + PrintDebug("\nroute %d:\n", ++i); + print_route(route); } - PrintDebug("\n========Dump routes end ============\n"); } @@ -218,13 +238,12 @@ static int look_into_cache(const struct v3_vnet_pkt * pkt, struct route_list ** static struct vnet_dev * find_dev_by_id(int idx) { struct vnet_dev * dev = NULL; - + list_for_each_entry(dev, &(vnet_state.devs), node) { int dev_id = dev->dev_id; - if (dev_id == idx) { + if (dev_id == idx) return dev; - } } return NULL; @@ -234,28 +253,27 @@ static struct vnet_dev * find_dev_by_mac(char mac[6]) { struct vnet_dev * dev = NULL; list_for_each_entry(dev, &(vnet_state.devs), node) { - if (memcmp(dev->mac_addr, mac, 6) == 0) { + if (!memcmp(dev->mac_addr, mac, 6)) return dev; - } } return NULL; } -int get_device_id_by_mac(char mac[6]) { - struct vnet_dev * dev = find_dev_by_mac(mac); - - if (dev == NULL) { +int v3_vnet_id_by_mac(char mac[6]){ + + struct vnet_dev *dev = find_dev_by_mac(mac); + + if (dev == NULL) return -1; - } - + return dev->dev_id; } int v3_vnet_add_route(struct v3_vnet_route route) { struct vnet_route_info * new_route = NULL; - uint32_t flags = 0; + unsigned long flags; new_route = (struct vnet_route_info *)V3_Malloc(sizeof(struct vnet_route_info)); memset(new_route, 0, sizeof(struct vnet_route_info)); @@ -274,8 +292,6 @@ int v3_vnet_add_route(struct v3_vnet_route route) { if (new_route->route_def.dst_type == LINK_INTERFACE) { new_route->dst_dev = find_dev_by_id(new_route->route_def.dst_id); - PrintDebug("Vnet: Add route, get device: dev_id %d, input : %p, private_data %p\n", - new_route->dst_dev->dev_id, new_route->dst_dev->input, new_route->dst_dev->private_data); } if (new_route->route_def.src_type == LINK_INTERFACE) { @@ -299,8 +315,9 @@ int v3_vnet_add_route(struct v3_vnet_route route) { -// At the end allocate a route_list -// This list will be inserted into the cache so we don't need to free it +/* At the end allocate a route_list + * This list will be inserted into the cache so we don't need to free it + */ static struct route_list * match_route(const struct v3_vnet_pkt * pkt) { struct vnet_route_info * route = NULL; struct route_list * matches = NULL; @@ -425,117 +442,80 @@ static struct route_list * match_route(const struct v3_vnet_pkt * pkt) { return matches; } -#if 0 -static int flush_bridge_pkts(struct vnet_brg_dev *bridge){ - uint32_t flags; - int num; - int start; - int send; - struct v3_vnet_bridge_input_args args; - int cpu_id = bridge->vm->cores[0].cpu_id; - int current_core = V3_Get_CPU(); - +static int send_to_bridge(struct v3_vnet_pkt * pkt){ + struct vnet_brg_dev *bridge = vnet_state.bridge; + if (bridge == NULL) { - PrintDebug("VNET: No bridge to sent data to links\n"); + PrintError("VNET: No bridge to sent data to links\n"); return -1; } - flags = v3_lock_irqsave(bridge->recv_buf.lock); - - num = bridge->recv_buf.num; - start = bridge->recv_buf.start; + return bridge->brg_ops.input(bridge->vm, pkt, 1, bridge->private_data); +} - bridge->recv_buf.num -= num; - bridge->recv_buf.start += num; - bridge->recv_buf.start %= BRIDGE_BUF_SIZE; - - v3_unlock_irqrestore(bridge->recv_buf.lock, flags); +/* enable a vnet device, notify VNET it can send pkts to it */ +int v3_vnet_enable_device(int dev_id){ + struct vnet_dev *dev = find_dev_by_id(dev_id); + unsigned long flags; - if (bridge->disabled) { - PrintDebug("VNET: In flush bridge pkts: Bridge is disabled\n"); + if(!dev) return -1; - } - if (num <= 2 && num > 0) { - PrintDebug("VNET: In flush bridge pkts: %d\n", num); - } + if(!dev->rx_disabled) + return 0; - if (num > 0) { - PrintDebug("VNET: In flush bridge pkts to bridge, cur_cpu %d, brige_core: %d\n", current_core, cpu_id); - if (current_core == cpu_id) { - if ((start + num) < BRIDGE_BUF_SIZE) { - bridge->input(bridge->vm, &(bridge->recv_buf.pkts[start]), num, bridge->private_data); - } else { - bridge->input(bridge->vm, &(bridge->recv_buf.pkts[start]), (BRIDGE_BUF_SIZE - start), bridge->private_data); - send = num - (BRIDGE_BUF_SIZE - start); - bridge->input(bridge->vm, &(bridge->recv_buf.pkts[0]), send, bridge->private_data); - } - } else { - args.vm = bridge->vm; - args.private_data = bridge->private_data; - - if ((start + num) < BRIDGE_BUF_SIZE) { - args.pkt_num = num; - args.vnet_pkts = &(bridge->recv_buf.pkts[start]); - V3_Call_On_CPU(cpu_id, bridge->xcall_input, (void *)&args); - } else { - args.pkt_num = BRIDGE_BUF_SIZE - start; - args.vnet_pkts = &(bridge->recv_buf.pkts[start]); - V3_Call_On_CPU(cpu_id, bridge->xcall_input, (void *)&args); - - send = num - (BRIDGE_BUF_SIZE - start); - args.pkt_num = send; - args.vnet_pkts = &(bridge->recv_buf.pkts[0]); - V3_Call_On_CPU(cpu_id, bridge->xcall_input, (void *)&args); - } - } - - PrintDebug("VNET: flush bridge pkts %d\n", num); + flags = v3_lock_irqsave(vnet_state.lock); + dev->rx_disabled = 0; + v3_unlock_irqrestore(vnet_state.lock, flags); + + /* TODO: Wake up all other guests who are trying to send pkts */ + dev = NULL; + list_for_each_entry(dev, &(vnet_state.devs), node) { + if (dev->dev_id != dev_id) + dev->dev_ops.start_tx(dev->private_data); } - + return 0; } -#endif -static int send_to_bridge(struct v3_vnet_pkt * pkt){ - struct vnet_brg_dev * bridge = vnet_state.bridge; +/* Notify VNET to stop sending pkts to it */ +int v3_vnet_disable_device(int dev_id){ + struct vnet_dev *dev = find_dev_by_id(dev_id); + unsigned long flags; - if (bridge == NULL) { - PrintDebug("VNET: No bridge to sent data to links\n"); + if(!dev) return -1; - } - - if (bridge->max_delayed_pkts <= 1) { - if (bridge->disabled) { - PrintDebug("VNET: Bridge diabled\n"); - return -1; - } + flags = v3_lock_irqsave(vnet_state.lock); + dev->rx_disabled = 1; + v3_unlock_irqrestore(vnet_state.lock, flags); - bridge->input(bridge->vm, pkt, 1, bridge->private_data); - PrintDebug("VNET: sent one packet to the bridge\n"); + /* TODO: Notify all other guests to stop send pkts */ + dev = NULL; + list_for_each_entry(dev, &(vnet_state.devs), node) { + if (dev->dev_id != dev_id) + dev->dev_ops.stop_tx(dev->private_data); } - return 0; } int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) { struct route_list * matched_routes = NULL; - uint32_t flags = 0; - int i = 0; - + unsigned long flags; + int i; + #ifdef CONFIG_DEBUG_VNET - { + { struct eth_hdr * hdr = (struct eth_hdr *)(pkt->header); char dest_str[100]; char src_str[100]; - int cpu = V3_Get_CPU(); - + mac_to_string(hdr->src_mac, src_str); mac_to_string(hdr->dst_mac, dest_str); + int cpu = V3_Get_CPU(); PrintDebug("Vnet: on cpu %d, HandleDataOverLink. SRC(%s), DEST(%s), pkt size: %d\n", cpu, src_str, dest_str, pkt->size); } #endif @@ -545,7 +525,7 @@ int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) { look_into_cache(pkt, &matched_routes); if (matched_routes == NULL) { - PrintDebug("Vnet: send pkt Looking into routing table\n"); + PrintError("Vnet: send pkt Looking into routing table\n"); matched_routes = match_route(pkt); @@ -554,7 +534,7 @@ int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) { } else { PrintDebug("Could not find route for packet... discards packet\n"); v3_unlock_irqrestore(vnet_state.lock, flags); - return -1; + return 0; /* do we return -1 here?*/ } } @@ -572,100 +552,29 @@ int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) { if (send_to_bridge(pkt) == -1) { PrintDebug("VNET: Packet not sent properly to bridge\n"); continue; - } - + } } else if (route->route_def.dst_type == LINK_INTERFACE) { - if (route->dst_dev->input(route->dst_dev->vm, pkt, route->dst_dev->private_data) == -1) { - PrintDebug("VNET: Packet not sent properly\n"); - continue; + if (!route->dst_dev->rx_disabled){ + if(route->dst_dev->dev_ops.input(route->dst_dev->vm, pkt, route->dst_dev->private_data) == -1) { + PrintDebug("VNET: Packet not sent properly\n"); + continue; + } } } else { - PrintDebug("Vnet: Wrong Edge type\n"); - continue; + PrintError("VNET: Wrong Edge type\n"); } - PrintDebug("Vnet: v3_vnet_send_pkt: Forward packet according to Route %d\n", i); + PrintDebug("VNET: Forward one packet according to Route %d\n", i); } return 0; } -void v3_vnet_send_pkt_xcall(void * data) { - struct v3_vnet_pkt * pkt = (struct v3_vnet_pkt *)data; - v3_vnet_send_pkt(pkt, NULL); -} - - -void v3_vnet_polling() { - uint32_t flags = 0; - int num = 0; - int start = 0; - struct v3_vnet_pkt * buf = NULL; - - PrintDebug("In vnet pollling: cpu %d\n", V3_Get_CPU()); - - flags = v3_lock_irqsave(vnet_state.in_buf.lock); - - num = vnet_state.in_buf.num; - start = vnet_state.in_buf.start; - - PrintDebug("VNET: polling pkts %d\n", num); - - while (num > 0) { - buf = &(vnet_state.in_buf.pkts[vnet_state.in_buf.start]); - - v3_vnet_send_pkt(buf, NULL); - - vnet_state.in_buf.num--; - vnet_state.in_buf.start++; - vnet_state.in_buf.start %= BRIDGE_BUF_SIZE; - num--; - } - - v3_unlock_irqrestore(vnet_state.in_buf.lock, flags); - - return; -} - - -int v3_vnet_rx(uint8_t * buf, uint16_t size, uint16_t src_id, uint8_t src_type) { - uint32_t flags = 0; - int end = 0; - struct v3_vnet_pkt * pkt = NULL; - - flags = v3_lock_irqsave(vnet_state.in_buf.lock); - - end = vnet_state.in_buf.end; - pkt = &(vnet_state.in_buf.pkts[end]); - - if (vnet_state.in_buf.num > BRIDGE_BUF_SIZE){ - PrintDebug("VNET: bridge rx: buffer full\n"); - v3_unlock_irqrestore(vnet_state.in_buf.lock, flags); - return 0; - } - - vnet_state.in_buf.num++; - vnet_state.in_buf.end++; - vnet_state.in_buf.end %= BRIDGE_BUF_SIZE; - - pkt->size = size; - pkt->src_id = src_id; - pkt->src_type = src_type; - memcpy(pkt->header, buf, ETHERNET_HEADER_LEN); - memcpy(pkt->data, buf, size); - - - v3_unlock_irqrestore(vnet_state.in_buf.lock, flags); - - return 0; -} - - -int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t mac[6], - int (*netif_input)(struct v3_vm_info * vm, struct v3_vnet_pkt * pkt, void * private_data), +int v3_vnet_add_dev(struct v3_vm_info *vm, uint8_t mac[6], + struct v3_vnet_dev_ops *ops, void * priv_data){ struct vnet_dev * new_dev = NULL; - uint32_t flags = 0; + unsigned long flags; new_dev = (struct vnet_dev *)V3_Malloc(sizeof(struct vnet_dev)); @@ -675,7 +584,8 @@ int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t mac[6], } memcpy(new_dev->mac_addr, mac, 6); - new_dev->input = netif_input; + new_dev->dev_ops.input = ops->input; + new_dev->dev_ops.poll = ops->poll; new_dev->private_data = priv_data; new_dev->vm = vm; new_dev->dev_id = 0; @@ -689,49 +599,51 @@ int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t mac[6], v3_unlock_irqrestore(vnet_state.lock, flags); - // if the device was found previosly the id should still be 0 + /* if the device was found previosly the id should still be 0 */ if (new_dev->dev_id == 0) { PrintError("Device Alrady exists\n"); return -1; } - PrintDebug("Vnet: Add Device: dev_id %d, input : %p, private_data %p\n", - new_dev->dev_id, new_dev->input, new_dev->private_data); + PrintDebug("Vnet: Add Device: dev_id %d\n", new_dev->dev_id); return new_dev->dev_id; } -void v3_vnet_heartbeat(struct guest_info *core){ - //static long last_time, cur_time; +/* TODO: Still need to figure out how to handle this multicore part --Lei + */ +void v3_vnet_poll(struct v3_vm_info *vm){ + struct vnet_dev * dev = NULL; - if (vnet_state.bridge == NULL) { - return; - } -/* - if(vnet_state.bridge->max_delayed_pkts > 1){ - if(V3_Get_CPU() != vnet_state.bridge->vm->cores[0].cpu_id){ - rdtscll(cur_time); - } - - if ((cur_time - last_time) >= vnet_state.bridge->max_latency) { - last_time = cur_time; - flush_bridge_pkts(vnet_state.bridge); - } + switch (vnet_state.sidecores) { + case 0: + list_for_each_entry(dev, &(vnet_state.devs), node) { + if(dev->vm == vm){ + dev->dev_ops.poll(vm, dev->private_data); + } + } + break; + case 1: + break; + case 2: + list_for_each_entry(dev, &(vnet_state.devs), node) { + int cpu_id = vm->cores[0].cpu_id + 2; /* temporary here, should use vnet_state.cores_map */ + struct v3_vnet_dev_xcall_args dev_args; /* could cause problem here -LX */ + dev_args.vm = vm; + dev_args.private_data = dev->private_data; + V3_Call_On_CPU(cpu_id, dev->dev_ops.poll_xcall, (void *)&dev_args); + } + break; + default: + break; } -*/ - vnet_state.bridge->polling_pkt(vnet_state.bridge->vm, vnet_state.bridge->private_data); } int v3_vnet_add_bridge(struct v3_vm_info * vm, - int (*input)(struct v3_vm_info * vm, struct v3_vnet_pkt pkt[], uint16_t pkt_num, void * private_data), - void (*xcall_input)(void * data), - int (*poll_pkt)(struct v3_vm_info * vm, void * private_data), - uint16_t max_delayed_pkts, - long max_latency, + struct v3_vnet_bridge_ops *ops, void * priv_data) { - - uint32_t flags = 0; + unsigned long flags; int bridge_free = 0; struct vnet_brg_dev * tmp_bridge = NULL; @@ -758,31 +670,13 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm, } tmp_bridge->vm = vm; - tmp_bridge->input = input; - tmp_bridge->xcall_input = xcall_input; - tmp_bridge->polling_pkt = poll_pkt; + tmp_bridge->brg_ops.input = ops->input; + tmp_bridge->brg_ops.xcall_input = ops->xcall_input; + tmp_bridge->brg_ops.polling_pkt = ops->polling_pkt; tmp_bridge->private_data = priv_data; tmp_bridge->disabled = 0; - -/* - //initial receving buffer - tmp_bridge->recv_buf.start = 0; - tmp_bridge->recv_buf.end = 0; - tmp_bridge->recv_buf.num = 0; - if(v3_lock_init(&(tmp_bridge->recv_buf.lock)) == -1){ - PrintError("VNET: add bridge, error to initiate recv buf lock\n"); - } - int i; - for(i = 0; irecv_buf.pkts[i].data = &(tmp_bridge->recv_buf.datas[i*ETHERNET_PACKET_LEN]); - } - -*/ - - tmp_bridge->max_delayed_pkts = (max_delayed_pkts < BRIDGE_BUF_SIZE) ? max_delayed_pkts : BRIDGE_BUF_SIZE; - tmp_bridge->max_latency = max_latency; - // make this atomic to avoid possible race conditions + /* make this atomic to avoid possible race conditions */ flags = v3_lock_irqsave(vnet_state.lock); vnet_state.bridge = tmp_bridge; v3_unlock_irqrestore(vnet_state.lock, flags); @@ -791,8 +685,9 @@ int v3_vnet_add_bridge(struct v3_vm_info * vm, } +#if 0 int v3_vnet_disable_bridge() { - uint32_t flags = 0; + unsigned long flags; flags = v3_lock_irqsave(vnet_state.lock); @@ -807,7 +702,7 @@ int v3_vnet_disable_bridge() { int v3_vnet_enable_bridge() { - uint32_t flags = 0; + unsigned long flags; flags = v3_lock_irqsave(vnet_state.lock); @@ -819,12 +714,9 @@ int v3_vnet_enable_bridge() { return 0; } +#endif - - -int V3_init_vnet() { - int i = 0; - +int v3_init_vnet() { memset(&vnet_state, 0, sizeof(vnet_state)); INIT_LIST_HEAD(&(vnet_state.routes)); @@ -838,23 +730,7 @@ int V3_init_vnet() { if (v3_lock_init(&(vnet_state.lock)) == -1){ PrintError("VNET: Failure to init lock for routes table\n"); } - PrintDebug("VNET: Locks initiated\n"); - - //initial incoming pkt buffer - vnet_state.in_buf.start = 0; - vnet_state.in_buf.end = 0; - vnet_state.in_buf.num = 0; - - if (v3_lock_init(&(vnet_state.in_buf.lock)) == -1){ - PrintError("VNET: add bridge, error to initiate send buf lock\n"); - } - - for (i = 0; i < BRIDGE_BUF_SIZE; i++){ - vnet_state.in_buf.pkts[i].data = &(vnet_state.in_buf.datas[i * ETHERNET_PACKET_LEN]); - } - - PrintDebug("VNET: Receiving buffer initiated\n"); vnet_state.route_cache = v3_create_htable(0, &hash_fn, &hash_eq); @@ -863,6 +739,9 @@ int V3_init_vnet() { return -1; } + vnet_state.sidecores = 0; + vnet_state.cores_map = 0; + PrintDebug("VNET: initiated\n"); return 0;