2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11 * Copyright (c) 2008, Lei Xia <lxia@northwestern.edu>
12 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
13 * All rights reserved.
15 * Author: Jack Lange <jarusl@cs.northwestern.edu>
16 * Lei Xia <lxia@northwestern.edu>
19 * This is free software. You are permitted to use,
20 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
23 #include <palacios/vmm.h>
24 #include <palacios/vmm_dev_mgr.h>
25 #include <devices/lnx_virtio_pci.h>
26 #include <devices/lnx_virtio_nic.h>
27 #include <palacios/vm_guest_mem.h>
29 #include <devices/pci.h>
32 #ifndef CONFIG_DEBUG_VIRTIO_BLK
34 #define PrintDebug(fmt, args...)
37 #define NIC_STATUS_OK 0
38 #define NIC_STATUS_ERR 1
39 #define NIC_STATUS_NOT_SUPPORTED 2
42 /* The feature bitmap for virtio net */
43 #define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */
44 #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */
45 #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
46 #define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */
47 #define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */
48 #define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */
49 #define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO[6] w/ ECN in. */
50 #define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */
51 #define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */
52 #define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */
53 #define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */
54 #define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */
55 #define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */
56 #define VIRTIO_NET_F_STATUS 16 /* virtio_net_config.status available */
57 #define VIRTIO_NET_F_CTRL_VQ 17 /* Control channel available */
58 #define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */
59 #define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */
60 #define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */
61 #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
63 /* Maximum packet size we can receive from tap device: header + 64k */
64 #define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10))
67 struct virtio_net_hdr {
68 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
71 #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
72 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
73 #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */
74 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */
75 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */
78 uint16_t hdr_len; /* Ethernet + IP + tcp/udp hdrs */
79 uint16_t gso_size; /* Bytes to append to hdr_len per frame */
80 uint16_t csum_start; /* Position to start checksumming from */
81 uint16_t csum_offset; /* Offset after that to place checksum */
82 }__attribute__((packed));
87 #define QUEUE_SIZE 256
88 #define CTRL_QUEUE_SIZE 64
92 int (*send)(uint8_t * buf, uint32_t count, void * private_data);
93 int (*receive)(uint8_t * buf, uint32_t count, void * private_data);
99 struct virtio_net_config
101 uint8_t mac[ETH_ALEN];
102 // See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above
104 } __attribute__((packed));
106 struct virtio_net_state {
107 struct virtio_net_config net_cfg;
108 struct virtio_config virtio_cfg;
110 struct vm_device * pci_bus;
111 struct pci_device * pci_dev;
113 struct virtio_queue rx_vq; //index 0, rvq in Linux virtio driver, handle packet to guest
114 struct virtio_queue tx_vq; //index 1, svq in Linux virtio driver, handle packet from guest
115 struct virtio_queue ctrl_vq; //index 2, ctrol info from guest
117 struct v3_net_ops * net_ops;
125 static int virtio_free(struct vm_device * dev)
131 static int virtio_reset(struct vm_device * dev)
133 struct virtio_net_state * virtio = (struct virtio_net_state *)dev->private_data;
135 virtio->rx_vq.ring_desc_addr = 0;
136 virtio->rx_vq.ring_avail_addr = 0;
137 virtio->rx_vq.ring_used_addr = 0;
138 virtio->rx_vq.pfn = 0;
139 virtio->rx_vq.cur_avail_idx = 0;
141 virtio->tx_vq.ring_desc_addr = 0;
142 virtio->tx_vq.ring_avail_addr = 0;
143 virtio->tx_vq.ring_used_addr = 0;
144 virtio->tx_vq.pfn = 0;
145 virtio->tx_vq.cur_avail_idx = 0;
147 virtio->ctrl_vq.ring_desc_addr = 0;
148 virtio->ctrl_vq.ring_avail_addr = 0;
149 virtio->ctrl_vq.ring_used_addr = 0;
150 virtio->ctrl_vq.pfn = 0;
151 virtio->ctrl_vq.cur_avail_idx = 0;
153 virtio->virtio_cfg.status = VIRTIO_NET_S_LINK_UP;
154 virtio->virtio_cfg.pci_isr = 0;
155 virtio->private_data = NULL;
160 static int read_op(struct vm_device * dev, uint8_t * buf, uint32_t len)
162 struct virtio_net_state * virtio = (struct virtio_net_state *)dev->private_data;
165 PrintDebug("Receving pkt from guest\n");
167 ret = virtio->net_ops->receive(buf, len, virtio->private_data);
172 static int write_op(struct vm_device *dev, uint8_t *buf, uint32_t len)
174 struct virtio_net_state * virtio = (struct virtio_net_state *)dev->private_data;
177 PrintDebug("Receving pkt from guest\n");
179 ret = virtio->net_ops->send(buf, len, virtio->private_data);
185 //sending guest's packet to network sink
186 static int handle_pkt_write(struct vm_device *dev, struct virtio_net_hdr *hdr,
187 struct vring_desc *buf_desc, uint8_t *status)
189 //struct virtio_net_state * virtio = (struct virtio_net_state *)dev->private_data;
190 uint8_t * buf = NULL;
192 PrintDebug("Handling Virtio Net write\n");
194 if (guest_pa_to_host_va(dev->vm, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) {
195 PrintError("Could not translate buffer address\n");
199 PrintDebug("Length=%d\n", buf_desc->length);
201 if (write_op(dev, buf, buf_desc->length) == -1) {
202 *status = NIC_STATUS_ERR;
205 *status = NIC_STATUS_OK;
208 PrintDebug("Returning Status: %d\n", *status);
215 //get packet from network, and send to guest
216 static int handle_pkt_read(struct vm_device *dev, struct virtio_net_hdr *hdr,
217 struct vring_desc *buf_desc, uint8_t *status)
219 //struct virtio_net_state * virtio = (struct virtio_net_state *)dev->private_data;
220 uint8_t * buf = NULL;
222 PrintDebug("Handling Virtio Net read\n");
224 if (guest_pa_to_host_va(dev->vm, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) {
225 PrintError("Could not translate buffer address\n");
229 PrintDebug("Length=%d\n", buf_desc->length);
231 if (read_op(dev, buf, buf_desc->length) == -1) {
232 *status = NIC_STATUS_ERR;
235 *status = NIC_STATUS_OK;
238 PrintDebug("Returning Status: %d\n", *status);
243 static int get_desc_count(struct virtio_queue * q, int index)
245 struct vring_desc * tmp_desc = &(q->desc[index]);
248 while (tmp_desc->flags & VIRTIO_NEXT_FLAG) {
249 tmp_desc = &(q->desc[tmp_desc->next]);
257 static int handle_ctrl(struct vm_device * dev) {
263 // TODO: handle receiving, not done yet
264 //send packet to guest
265 static int handle_pkt_rx(struct vm_device * dev)
268 if (handle_pkt_read(dev, NULL, 0, NULL) == -1) {
269 PrintError("Error handling nic operation\n");
276 //get packet from guest
277 static int handle_pkt_tx(struct vm_device * dev)
279 struct virtio_net_state *virtio = (struct virtio_net_state *)dev->private_data;
280 struct virtio_queue *q = &(virtio->rx_vq);
282 PrintDebug("VIRTIO NIC KICK: cur_index=%d (mod=%d), avail_index=%d\n",
283 q->cur_avail_idx, q->cur_avail_idx % QUEUE_SIZE, q->avail->index);
285 while (q->cur_avail_idx < q->avail->index) {
286 struct vring_desc * hdr_desc = NULL;
287 struct vring_desc * buf_desc = NULL;
288 struct vring_desc * status_desc = NULL;
289 struct virtio_net_hdr hdr;
291 uint16_t desc_idx = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE];
292 int desc_cnt = get_desc_count(q, desc_idx);
294 uint8_t * status_ptr = NULL;
295 uint8_t status = NIC_STATUS_OK;
296 uint32_t req_len = 0;
298 PrintDebug("Descriptor Count=%d, index=%d\n", desc_cnt, q->cur_avail_idx % QUEUE_SIZE);
300 hdr_desc = &(q->desc[desc_idx]);
302 PrintDebug("Header Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", hdr_desc,
303 (void *)(hdr_desc->addr_gpa), hdr_desc->length, hdr_desc->flags, hdr_desc->next);
305 if (guest_pa_to_host_va(dev->vm, hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
306 PrintError("Could not translate block header address\n");
310 // We copy the block op header out because we are going to modify its contents
311 memcpy(&hdr, (void *)hdr_addr, sizeof(struct virtio_net_hdr));
313 PrintDebug("NIC Op Hdr (ptr=%p) type=%d, sector=%p\n", (void *)hdr_addr, hdr.hdr_len, (void *)hdr.csum_start);
315 desc_idx = hdr_desc->next;
317 for (i = 0; i < desc_cnt - 2; i++) {
318 uint8_t tmp_status = NIC_STATUS_OK;
320 buf_desc = &(q->desc[desc_idx]);
322 PrintDebug("Buffer Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", buf_desc,
323 (void *)(buf_desc->addr_gpa), buf_desc->length, buf_desc->flags, buf_desc->next);
325 if (handle_pkt_write(dev, &hdr, buf_desc, &tmp_status) == -1) {
326 PrintError("Error handling nic operation\n");
330 if (tmp_status != NIC_STATUS_OK) {
334 req_len += buf_desc->length;
335 desc_idx = buf_desc->next;
338 status_desc = &(q->desc[desc_idx]);
340 PrintDebug("Status Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", status_desc,
341 (void *)(status_desc->addr_gpa), status_desc->length, status_desc->flags, status_desc->next);
343 if (guest_pa_to_host_va(dev->vm, status_desc->addr_gpa, (addr_t *)&(status_ptr)) == -1) {
344 PrintError("Could not translate status address\n");
348 req_len += status_desc->length;
349 *status_ptr = status;
351 q->used->ring[q->used->index % QUEUE_SIZE].id = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE];
352 q->used->ring[q->used->index % QUEUE_SIZE].length = req_len; // What do we set this to????
358 if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
359 PrintDebug("Raising IRQ %d\n", virtio->pci_dev->config_header.intr_line);
360 v3_pci_raise_irq(virtio->pci_bus, 0, virtio->pci_dev);
361 virtio->virtio_cfg.pci_isr = 0x1;
368 static int virtio_setup_queue(struct vm_device * dev, struct virtio_queue *queue, addr_t pfn, addr_t page_addr)
372 queue->ring_desc_addr = page_addr ;
373 queue->ring_avail_addr = page_addr + (QUEUE_SIZE * sizeof(struct vring_desc));
374 queue->ring_used_addr = (queue->ring_avail_addr + \
375 sizeof(struct vring_avail) + \
376 (QUEUE_SIZE * sizeof(uint16_t)));
378 // round up to next page boundary.
379 queue->ring_used_addr = (queue->ring_used_addr + 0xfff) & ~0xfff;
381 if (guest_pa_to_host_va(dev->vm, queue->ring_desc_addr, (addr_t *)&(queue->desc)) == -1) {
382 PrintError("Could not translate ring descriptor address\n");
387 if (guest_pa_to_host_va(dev->vm, queue->ring_avail_addr, (addr_t *)&(queue->avail)) == -1) {
388 PrintError("Could not translate ring available address\n");
393 if (guest_pa_to_host_va(dev->vm, queue->ring_used_addr, (addr_t *)&(queue->used)) == -1) {
394 PrintError("Could not translate ring used address\n");
398 PrintDebug("RingDesc_addr=%p, Avail_addr=%p, Used_addr=%p\n",
399 (void *)(queue->ring_desc_addr),
400 (void *)(queue->ring_avail_addr),
401 (void *)(queue->ring_used_addr));
403 PrintDebug("RingDesc=%p, Avail=%p, Used=%p\n",
404 queue->desc, queue->avail, queue->used);
411 static int virtio_io_write(uint16_t port, void * src, uint_t length, void * private_data)
413 struct vm_device * dev = (struct vm_device *)private_data;
414 struct virtio_net_state * virtio = (struct virtio_net_state *)dev->private_data;
415 int port_idx = port % virtio->io_range_size;
418 PrintDebug("VIRTIO NIC Write for port %d (index=%d) len=%d, value=%x\n",
419 port, port_idx, length, *(uint32_t *)src);
423 case GUEST_FEATURES_PORT:
425 PrintError("Illegal write length for guest features\n");
429 virtio->virtio_cfg.guest_features = *(uint32_t *)src;
430 PrintDebug("Setting Guest Features to %x\n", virtio->virtio_cfg.guest_features);
433 case VRING_PG_NUM_PORT:
435 addr_t pfn = *(uint32_t *)src;
436 addr_t page_addr = (pfn << VIRTIO_PAGE_SHIFT);
438 uint16_t queue_idx = virtio->virtio_cfg.vring_queue_selector;
441 virtio_setup_queue(dev, &virtio->rx_vq, pfn, page_addr);
444 virtio_setup_queue(dev, &virtio->tx_vq, pfn, page_addr);
447 virtio_setup_queue(dev, &virtio->ctrl_vq, pfn, page_addr);
454 PrintError("Illegal write length for page frame number\n");
458 case VRING_Q_SEL_PORT:
459 virtio->virtio_cfg.vring_queue_selector = *(uint16_t *)src;
461 if (virtio->virtio_cfg.vring_queue_selector > 2) {
462 PrintError("Virtio NIC device only uses 3 queue, selected %d\n",
463 virtio->virtio_cfg.vring_queue_selector);
468 case VRING_Q_NOTIFY_PORT:
469 PrintDebug("Handling Kick\n");
470 uint16_t queue_idx = *(uint16_t *)src;
472 if (handle_pkt_rx(dev) == -1) {
473 PrintError("Could not handle NIC Notification\n");
476 }else if (queue_idx == 1){
477 if (handle_pkt_tx(dev) == -1) {
478 PrintError("Could not handle NIC Notification\n");
481 }else if (queue_idx == 2){
482 if (handle_ctrl(dev) == -1) {
483 PrintError("Could not handle NIC Notification\n");
487 PrintError("Virtio NIC device only uses 3 queue, selected %d\n",
492 case VIRTIO_STATUS_PORT:
493 virtio->virtio_cfg.status = *(uint8_t *)src;
495 if (virtio->virtio_cfg.status == 0) {
496 PrintDebug("Resetting device\n");
502 case VIRTIO_ISR_PORT:
503 virtio->virtio_cfg.pci_isr = *(uint8_t *)src;
514 static int virtio_io_read(uint16_t port, void * dst, uint_t length, void * private_data)
516 struct vm_device * dev = (struct vm_device *)private_data;
517 struct virtio_net_state * virtio = (struct virtio_net_state *)dev->private_data;
518 int port_idx = port % virtio->io_range_size;
519 uint16_t queue_idx = virtio->virtio_cfg.vring_queue_selector;
522 PrintDebug("VIRTIO NIC Read for port %d (index =%d), length=%d\n",
523 port, port_idx, length);
526 case HOST_FEATURES_PORT:
528 PrintError("Illegal read length for host features\n");
532 *(uint32_t *)dst = virtio->virtio_cfg.host_features;
535 case VRING_PG_NUM_PORT:
538 PrintError("Illegal read length for page frame number\n");
544 *(uint32_t *)dst = virtio->rx_vq.pfn;
547 *(uint32_t *)dst = virtio->tx_vq.pfn;
550 *(uint32_t *)dst = virtio->ctrl_vq.pfn;
557 case VRING_SIZE_PORT:
559 PrintError("Illegal read length for vring size\n");
565 *(uint16_t *)dst = virtio->rx_vq.queue_size;
568 *(uint32_t *)dst = virtio->tx_vq.queue_size;
571 *(uint32_t *)dst = virtio->ctrl_vq.queue_size;
578 case VIRTIO_STATUS_PORT:
580 PrintError("Illegal read length for status\n");
584 *(uint8_t *)dst = virtio->virtio_cfg.status;
587 case VIRTIO_ISR_PORT:
588 *(uint8_t *)dst = virtio->virtio_cfg.pci_isr;
589 virtio->virtio_cfg.pci_isr = 0;
590 v3_pci_lower_irq(virtio->pci_bus, 0, virtio->pci_dev);
594 PrintError("Read of Unhandled Virtio Read\n");
602 static struct v3_device_ops dev_ops = {
604 .reset = virtio_reset,
610 int v3_virtio_register_nic(struct vm_device *dev, struct v3_net_ops *ops, void *private_data) {
611 struct virtio_net_state * virtio = (struct virtio_net_state *)dev->private_data;
613 virtio->net_ops = ops;
619 static int virtio_init(struct guest_info * vm, void *cfg_data) {
620 struct vm_device * pci_bus = v3_find_dev(vm, (char *)cfg_data);
621 struct virtio_net_state * virtio_state = NULL;
622 struct pci_device * pci_dev = NULL;
624 PrintDebug("Initializing VIRTIO Network device\n");
626 if (pci_bus == NULL) {
627 PrintError("VirtIO network devices require a PCI Bus");
631 virtio_state = (struct virtio_net_state *)V3_Malloc(sizeof(struct virtio_net_state));
632 memset(virtio_state, 0, sizeof(struct virtio_net_state));
634 struct vm_device * dev = v3_allocate_device("LNX_VIRTIO_NIC", &dev_ops, virtio_state);
635 if (v3_attach_device(vm, dev) == -1) {
636 PrintError("Could not attach device %s\n", "LNX_VIRTIO_NIC");
641 // PCI initialization
643 struct v3_pci_bar bars[6];
644 int num_ports = sizeof(struct virtio_config);
645 int tmp_ports = num_ports;
648 // This gets the number of ports, rounded up to a power of 2
649 virtio_state->io_range_size = 1; // must be a power of 2
651 while (tmp_ports > 0) {
653 virtio_state->io_range_size <<= 1;
656 // this is to account for any low order bits being set in num_ports
657 // if there are none, then num_ports was already a power of 2 so we shift right to reset it
658 if ((num_ports & ((virtio_state->io_range_size >> 1) - 1)) == 0) {
659 virtio_state->io_range_size >>= 1;
662 for (i = 0; i < 6; i++) {
663 bars[i].type = PCI_BAR_NONE;
666 PrintDebug("Virtio-NIC io_range_size = %d\n", virtio_state->io_range_size);
668 bars[0].type = PCI_BAR_IO;
669 bars[0].default_base_port = -1;
670 bars[0].num_ports = virtio_state->io_range_size;
672 bars[0].io_read = virtio_io_read;
673 bars[0].io_write = virtio_io_write;
674 bars[0].private_data = dev;
676 pci_dev = v3_pci_register_device(pci_bus, PCI_STD_DEVICE,
677 0, PCI_AUTO_DEV_NUM, 0,
678 "LNX_VIRTIO_NIC", bars,
679 NULL, NULL, NULL, dev, NULL);
682 PrintError("Could not register PCI Device\n");
686 pci_dev->config_header.vendor_id = VIRTIO_VENDOR_ID;
687 pci_dev->config_header.subsystem_vendor_id = VIRTIO_SUBVENDOR_ID;
690 pci_dev->config_header.device_id = VIRTIO_NET_DEV_ID;
691 pci_dev->config_header.class = PCI_CLASS_NETWORK;
692 pci_dev->config_header.subclass = PCI_NET_SUBCLASS_OTHER;
694 // TODO:how to define new one for virtio net device
695 pci_dev->config_header.subsystem_id = VIRTIO_BLOCK_SUBDEVICE_ID;
698 pci_dev->config_header.intr_pin = 1;
700 pci_dev->config_header.max_latency = 1; // ?? (qemu does it...)
703 virtio_state->pci_dev = pci_dev;
704 virtio_state->pci_bus = pci_bus;
707 virtio_state->virtio_cfg.host_features = 0; //no features support now
709 virtio_state->rx_vq.queue_size = QUEUE_SIZE;
710 virtio_state->tx_vq.queue_size = QUEUE_SIZE;
711 virtio_state->ctrl_vq.queue_size = CTRL_QUEUE_SIZE;
717 virtio_state->net_ops = NULL;
723 device_register("LNX_VIRTIO_NIC", virtio_init)