2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2010, Lei Xia <lxia@northwestern.edu>
11 * Copyright (c) 2009, Yuan Tang <ytang@northwestern.edu>
12 * Copyright (c) 2009, The V3VEE Project <http://www.v3vee.org>
15 * Author: Lei Xia <lxia@northwestern.edu>
16 * Yuan Tang <ytang@northwestern.edu>
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
22 #include <vnet/vnet.h>
23 #include <vnet/vnet_hashtable.h>
24 #include <vnet/vnet_host.h>
25 #include <vnet/vnet_vmm.h>
27 #include <palacios/vmm_queue.h>
29 #ifndef V3_CONFIG_DEBUG_VNET
31 #define PrintDebug(fmt, args...)
34 #define VNET_YIELD_USEC 1000
39 uint8_t dst_mac[ETH_ALEN];
40 uint8_t src_mac[ETH_ALEN];
41 uint16_t type; /* indicates layer 3 protocol type */
42 } __attribute__((packed));
47 uint8_t mac_addr[ETH_ALEN];
48 struct v3_vm_info * vm;
49 struct v3_vnet_dev_ops dev_ops;
53 #define VNET_MAX_QUOTE 64
58 struct list_head node;
59 } __attribute__((packed));
63 struct v3_vm_info * vm;
64 struct v3_vnet_bridge_ops brg_ops;
69 } __attribute__((packed));
73 struct vnet_route_info {
74 struct v3_vnet_route route_def;
76 struct vnet_dev * dst_dev;
77 struct vnet_dev * src_dev;
81 struct list_head node;
82 struct list_head match_node; // used for route matching
87 uint8_t hash_buf[VNET_HASH_SIZE];
90 struct vnet_route_info * routes[0];
91 } __attribute__((packed));
96 struct v3_vnet_pkt pkt;
103 struct list_head routes;
104 struct list_head devs;
113 struct vnet_brg_dev * bridge;
116 struct vnet_stat stats;
118 /* device queue that are waiting to be polled */
119 struct v3_queue * poll_devs;
121 struct vnet_thread * pkt_flush_thread;
123 struct hashtable * route_cache;
127 #ifdef V3_CONFIG_DEBUG_VNET
128 static inline void mac2str(uint8_t * mac, char * buf) {
129 snprintf(buf, 100, "%2x:%2x:%2x:%2x:%2x:%2x",
130 mac[0], mac[1], mac[2],
131 mac[3], mac[4], mac[5]);
134 static void print_route(struct v3_vnet_route * route){
137 mac2str(route->src_mac, str);
138 PrintDebug("Src Mac (%s), src_qual (%d)\n",
139 str, route->src_mac_qual);
140 mac2str(route->dst_mac, str);
141 PrintDebug("Dst Mac (%s), dst_qual (%d)\n",
142 str, route->dst_mac_qual);
143 PrintDebug("Src dev id (%d), src type (%d)",
146 PrintDebug("Dst dev id (%d), dst type (%d)\n",
151 static void dump_routes(){
152 struct vnet_route_info *route;
154 PrintDebug("\n========Dump routes starts ============\n");
155 list_for_each_entry(route, &(vnet_state.routes), node) {
156 PrintDebug("\nroute %d:\n", route->idx);
158 print_route(&(route->route_def));
159 if (route->route_def.dst_type == LINK_INTERFACE) {
160 PrintDebug("dst_dev (%p), dst_dev_id (%d), dst_dev_ops(%p), dst_dev_data (%p)\n",
162 route->dst_dev->dev_id,
163 (void *)&(route->dst_dev->dev_ops),
164 route->dst_dev->private_data);
168 PrintDebug("\n========Dump routes end ============\n");
175 * A VNET packet is a packed struct with the hashed fields grouped together.
176 * This means we can generate the hash from an offset into the pkt struct
178 static inline uint_t hash_fn(addr_t hdr_ptr) {
179 uint8_t * hdr_buf = (uint8_t *)hdr_ptr;
181 return vnet_hash_buffer(hdr_buf, VNET_HASH_SIZE);
184 static inline int hash_eq(addr_t key1, addr_t key2) {
185 return (memcmp((uint8_t *)key1, (uint8_t *)key2, VNET_HASH_SIZE) == 0);
188 static int add_route_to_cache(const struct v3_vnet_pkt * pkt, struct route_list * routes) {
189 memcpy(routes->hash_buf, pkt->hash_buf, VNET_HASH_SIZE);
191 if (vnet_htable_insert(vnet_state.route_cache, (addr_t)routes->hash_buf, (addr_t)routes) == 0) {
192 PrintError("VNET/P Core: Failed to insert new route entry to the cache\n");
199 static int clear_hash_cache() {
200 vnet_free_htable(vnet_state.route_cache, 1, 1);
201 vnet_state.route_cache = vnet_create_htable(0, &hash_fn, &hash_eq);
206 static int look_into_cache(const struct v3_vnet_pkt * pkt,
207 struct route_list ** routes) {
208 *routes = (struct route_list *)vnet_htable_search(vnet_state.route_cache, (addr_t)(pkt->hash_buf));
214 static struct vnet_dev * dev_by_id(int idx) {
215 struct vnet_dev * dev = NULL;
217 list_for_each_entry(dev, &(vnet_state.devs), node) {
218 if (dev->dev_id == idx) {
226 static struct vnet_dev * dev_by_mac(uint8_t * mac) {
227 struct vnet_dev * dev = NULL;
229 list_for_each_entry(dev, &(vnet_state.devs), node) {
230 if (!compare_ethaddr(dev->mac_addr, mac)){
239 int v3_vnet_find_dev(uint8_t * mac) {
240 struct vnet_dev * dev = NULL;
242 dev = dev_by_mac(mac);
252 int v3_vnet_add_route(struct v3_vnet_route route) {
253 struct vnet_route_info * new_route = NULL;
256 new_route = (struct vnet_route_info *)Vnet_Malloc(sizeof(struct vnet_route_info));
259 PrintError("Cannot allocate new route\n");
263 memset(new_route, 0, sizeof(struct vnet_route_info));
265 #ifdef V3_CONFIG_DEBUG_VNET
266 PrintDebug("VNET/P Core: add_route_entry:\n");
270 memcpy(new_route->route_def.src_mac, route.src_mac, ETH_ALEN);
271 memcpy(new_route->route_def.dst_mac, route.dst_mac, ETH_ALEN);
272 new_route->route_def.src_mac_qual = route.src_mac_qual;
273 new_route->route_def.dst_mac_qual = route.dst_mac_qual;
274 new_route->route_def.dst_type = route.dst_type;
275 new_route->route_def.src_type = route.src_type;
276 new_route->route_def.src_id = route.src_id;
277 new_route->route_def.dst_id = route.dst_id;
279 if (new_route->route_def.dst_type == LINK_INTERFACE) {
280 new_route->dst_dev = dev_by_id(new_route->route_def.dst_id);
283 if (new_route->route_def.src_type == LINK_INTERFACE) {
284 new_route->src_dev = dev_by_id(new_route->route_def.src_id);
288 flags = vnet_lock_irqsave(vnet_state.lock);
290 list_add(&(new_route->node), &(vnet_state.routes));
291 new_route->idx = ++ vnet_state.route_idx;
292 vnet_state.num_routes ++;
294 vnet_unlock_irqrestore(vnet_state.lock, flags);
298 #ifdef V3_CONFIG_DEBUG_VNET
302 return new_route->idx;
306 void v3_vnet_del_route(uint32_t route_idx){
307 struct vnet_route_info * route = NULL;
310 flags = vnet_lock_irqsave(vnet_state.lock);
312 list_for_each_entry(route, &(vnet_state.routes), node) {
313 Vnet_Print(0, "v3_vnet_del_route, route idx: %d\n", route->idx);
314 if(route->idx == route_idx){
315 list_del(&(route->node));
321 vnet_unlock_irqrestore(vnet_state.lock, flags);
324 #ifdef V3_CONFIG_DEBUG_VNET
330 /* delete all route entries with specfied src or dst device id */
331 static void inline del_routes_by_dev(int dev_id){
332 struct vnet_route_info * route, *tmp_route;
335 flags = vnet_lock_irqsave(vnet_state.lock);
337 list_for_each_entry_safe(route, tmp_route, &(vnet_state.routes), node) {
338 if((route->route_def.dst_type == LINK_INTERFACE &&
339 route->route_def.dst_id == dev_id) ||
340 (route->route_def.src_type == LINK_INTERFACE &&
341 route->route_def.src_id == dev_id)){
343 list_del(&(route->node));
344 list_del(&(route->match_node));
349 vnet_unlock_irqrestore(vnet_state.lock, flags);
352 /* At the end allocate a route_list
353 * This list will be inserted into the cache so we don't need to free it
355 static struct route_list * match_route(const struct v3_vnet_pkt * pkt) {
356 struct vnet_route_info * route = NULL;
357 struct route_list * matches = NULL;
360 struct list_head match_list;
361 struct eth_hdr * hdr = (struct eth_hdr *)(pkt->data);
362 // uint8_t src_type = pkt->src_type;
363 // uint32_t src_link = pkt->src_id;
365 #ifdef V3_CONFIG_DEBUG_VNET
370 mac2str(hdr->src_mac, src_str);
371 mac2str(hdr->dst_mac, dst_str);
372 PrintDebug("VNET/P Core: match_route. pkt: SRC(%s), DEST(%s)\n", src_str, dst_str);
376 INIT_LIST_HEAD(&match_list);
378 #define UPDATE_MATCHES(rank) do { \
379 if (max_rank < (rank)) { \
381 INIT_LIST_HEAD(&match_list); \
383 list_add(&(route->match_node), &match_list); \
385 } else if (max_rank == (rank)) { \
386 list_add(&(route->match_node), &match_list); \
392 list_for_each_entry(route, &(vnet_state.routes), node) {
393 struct v3_vnet_route * route_def = &(route->route_def);
396 // CHECK SOURCE TYPE HERE
397 if ( (route_def->src_type != LINK_ANY) &&
398 ( (route_def->src_type != src_type) ||
399 ( (route_def->src_id != src_link) &&
400 (route_def->src_id != -1)))) {
405 if ((route_def->dst_mac_qual == MAC_ANY) &&
406 (route_def->src_mac_qual == MAC_ANY)) {
410 if (memcmp(route_def->src_mac, hdr->src_mac, 6) == 0) {
411 if (route_def->src_mac_qual != MAC_NOT) {
412 if (route_def->dst_mac_qual == MAC_ANY) {
414 } else if (route_def->dst_mac_qual != MAC_NOT &&
415 memcmp(route_def->dst_mac, hdr->dst_mac, 6) == 0) {
421 if (memcmp(route_def->dst_mac, hdr->dst_mac, 6) == 0) {
422 if (route_def->dst_mac_qual != MAC_NOT) {
423 if (route_def->src_mac_qual == MAC_ANY) {
425 } else if ((route_def->src_mac_qual != MAC_NOT) &&
426 (memcmp(route_def->src_mac, hdr->src_mac, 6) == 0)) {
432 if ((route_def->dst_mac_qual == MAC_NOT) &&
433 (memcmp(route_def->dst_mac, hdr->dst_mac, 6) != 0)) {
434 if (route_def->src_mac_qual == MAC_ANY) {
436 } else if ((route_def->src_mac_qual != MAC_NOT) &&
437 (memcmp(route_def->src_mac, hdr->src_mac, 6) == 0)) {
442 if ((route_def->src_mac_qual == MAC_NOT) &&
443 (memcmp(route_def->src_mac, hdr->src_mac, 6) != 0)) {
444 if (route_def->dst_mac_qual == MAC_ANY) {
446 } else if ((route_def->dst_mac_qual != MAC_NOT) &&
447 (memcmp(route_def->dst_mac, hdr->dst_mac, 6) == 0)) {
453 if ( (memcmp(route_def->src_mac, hdr->src_mac, 6) == 0) &&
454 (route_def->dst_mac_qual == MAC_NONE)) {
459 PrintDebug("VNET/P Core: match_route: Matches=%d\n", num_matches);
461 if (num_matches <= 0) {
465 matches = (struct route_list *)Vnet_Malloc(sizeof(struct route_list) +
466 (sizeof(struct vnet_route_info *) * num_matches));
470 PrintError("VNET/P Core: Unable to allocate matches\n");
474 matches->num_routes = num_matches;
478 list_for_each_entry(route, &match_list, match_node) {
479 matches->routes[i++] = route;
487 int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
488 struct route_list * matched_routes = NULL;
492 int cpu = V3_Get_CPU();
494 Vnet_Print(2, "VNET/P Core: cpu %d: pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n",
495 cpu, pkt->size, pkt->src_id,
496 pkt->src_type, pkt->dst_id, pkt->dst_type);
499 v3_hexdump(pkt->data, pkt->size, NULL, 0);
502 flags = vnet_lock_irqsave(vnet_state.lock);
504 vnet_state.stats.rx_bytes += pkt->size;
505 vnet_state.stats.rx_pkts++;
507 look_into_cache(pkt, &matched_routes);
509 if (matched_routes == NULL) {
510 PrintDebug("VNET/P Core: sending pkt - matching route\n");
512 matched_routes = match_route(pkt);
514 if (matched_routes) {
515 add_route_to_cache(pkt, matched_routes);
517 PrintDebug("VNET/P Core: Could not find route for packet... discarding packet\n");
518 vnet_unlock_irqrestore(vnet_state.lock, flags);
519 return 0; /* do we return -1 here?*/
523 vnet_unlock_irqrestore(vnet_state.lock, flags);
525 PrintDebug("VNET/P Core: send pkt route matches %d\n", matched_routes->num_routes);
527 for (i = 0; i < matched_routes->num_routes; i++) {
528 struct vnet_route_info * route = matched_routes->routes[i];
530 if (route->route_def.dst_type == LINK_EDGE) {
531 struct vnet_brg_dev * bridge = vnet_state.bridge;
532 pkt->dst_type = LINK_EDGE;
533 pkt->dst_id = route->route_def.dst_id;
535 if (bridge == NULL) {
536 Vnet_Print(2, "VNET/P Core: No active bridge to sent data to\n");
540 if(bridge->brg_ops.input(bridge->vm, pkt, bridge->private_data) < 0){
541 Vnet_Print(2, "VNET/P Core: Packet not sent properly to bridge\n");
544 vnet_state.stats.tx_bytes += pkt->size;
545 vnet_state.stats.tx_pkts ++;
546 } else if (route->route_def.dst_type == LINK_INTERFACE) {
547 if (route->dst_dev == NULL){
548 Vnet_Print(2, "VNET/P Core: No active device to sent data to\n");
552 if(route->dst_dev->dev_ops.input(route->dst_dev->vm, pkt, route->dst_dev->private_data) < 0) {
553 Vnet_Print(2, "VNET/P Core: Packet not sent properly\n");
556 vnet_state.stats.tx_bytes += pkt->size;
557 vnet_state.stats.tx_pkts ++;
559 Vnet_Print(0, "VNET/P Core: Wrong dst type\n");
567 int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac,
568 struct v3_vnet_dev_ops * ops, int quote, int poll_state,
570 struct vnet_dev * new_dev = NULL;
573 new_dev = (struct vnet_dev *)Vnet_Malloc(sizeof(struct vnet_dev));
575 if (new_dev == NULL) {
576 Vnet_Print(0, "VNET/P Core: Unable to allocate a new device\n");
580 memcpy(new_dev->mac_addr, mac, 6);
581 new_dev->dev_ops.input = ops->input;
582 new_dev->dev_ops.poll = ops->poll;
583 new_dev->private_data = priv_data;
586 new_dev->quote = quote<VNET_MAX_QUOTE ? quote : VNET_MAX_QUOTE;
587 new_dev->poll = poll_state;
589 flags = vnet_lock_irqsave(vnet_state.lock);
591 if (dev_by_mac(mac) == NULL) {
592 list_add(&(new_dev->node), &(vnet_state.devs));
593 new_dev->dev_id = ++ vnet_state.dev_idx;
594 vnet_state.num_devs ++;
597 v3_enqueue(vnet_state.poll_devs, (addr_t)new_dev);
600 PrintError("VNET/P: Device with the same MAC has already been added\n");
603 vnet_unlock_irqrestore(vnet_state.lock, flags);
605 /* if the device was found previosly the id should still be 0 */
606 if (new_dev->dev_id == 0) {
607 Vnet_Print(0, "VNET/P Core: Device Already exists\n");
611 PrintDebug("VNET/P Core: Add Device: dev_id %d\n", new_dev->dev_id);
613 return new_dev->dev_id;
617 int v3_vnet_del_dev(int dev_id){
618 struct vnet_dev * dev = NULL;
621 flags = vnet_lock_irqsave(vnet_state.lock);
623 dev = dev_by_id(dev_id);
625 list_del(&(dev->node));
626 //del_routes_by_dev(dev_id);
627 vnet_state.num_devs --;
630 vnet_unlock_irqrestore(vnet_state.lock, flags);
634 PrintDebug("VNET/P Core: Removed Device: dev_id %d\n", dev_id);
640 int v3_vnet_stat(struct vnet_stat * stats){
641 stats->rx_bytes = vnet_state.stats.rx_bytes;
642 stats->rx_pkts = vnet_state.stats.rx_pkts;
643 stats->tx_bytes = vnet_state.stats.tx_bytes;
644 stats->tx_pkts = vnet_state.stats.tx_pkts;
649 static void deinit_devices_list(){
650 struct vnet_dev * dev, * tmp;
652 list_for_each_entry_safe(dev, tmp, &(vnet_state.devs), node) {
653 list_del(&(dev->node));
658 static void deinit_routes_list(){
659 struct vnet_route_info * route, * tmp;
661 list_for_each_entry_safe(route, tmp, &(vnet_state.routes), node) {
662 list_del(&(route->node));
663 list_del(&(route->match_node));
668 int v3_vnet_add_bridge(struct v3_vm_info * vm,
669 struct v3_vnet_bridge_ops * ops,
674 struct vnet_brg_dev * tmp_bridge = NULL;
676 flags = vnet_lock_irqsave(vnet_state.lock);
677 if (vnet_state.bridge == NULL) {
679 vnet_state.bridge = (void *)1;
681 vnet_unlock_irqrestore(vnet_state.lock, flags);
683 if (bridge_free == 0) {
684 PrintError("VNET/P Core: Bridge already set\n");
688 tmp_bridge = (struct vnet_brg_dev *)Vnet_Malloc(sizeof(struct vnet_brg_dev));
690 if (tmp_bridge == NULL) {
691 PrintError("VNET/P Core: Unable to allocate new bridge\n");
692 vnet_state.bridge = NULL;
697 tmp_bridge->brg_ops.input = ops->input;
698 tmp_bridge->brg_ops.poll = ops->poll;
699 tmp_bridge->private_data = priv_data;
700 tmp_bridge->type = type;
702 /* make this atomic to avoid possible race conditions */
703 flags = vnet_lock_irqsave(vnet_state.lock);
704 vnet_state.bridge = tmp_bridge;
705 vnet_unlock_irqrestore(vnet_state.lock, flags);
711 void v3_vnet_del_bridge(uint8_t type) {
713 struct vnet_brg_dev * tmp_bridge = NULL;
715 flags = vnet_lock_irqsave(vnet_state.lock);
717 if (vnet_state.bridge != NULL && vnet_state.bridge->type == type) {
718 tmp_bridge = vnet_state.bridge;
719 vnet_state.bridge = NULL;
722 vnet_unlock_irqrestore(vnet_state.lock, flags);
725 Vnet_Free(tmp_bridge);
730 /* can be instanieoued to multiple threads
731 * that runs on multiple cores
732 * or it could be running on a dedicated side core
734 static int vnet_tx_flush(void * args){
735 struct vnet_dev * dev = NULL;
739 Vnet_Print(0, "VNET/P Polling Thread Starting ....\n");
741 // since there are multiple instances of this thread, and only
742 // one queue of pollable devices, our model here will be to synchronize
743 // on that queue, removing devices as we go, and keeping them
744 // then putting them back on the queue when we are done
745 // in this way, multiple instances of this function will never
746 // be polling the same device at the same time
748 struct v3_queue * tq = v3_create_queue();
751 PrintError("VNET/P polling thread cannot allocate queue\n");
756 while (!vnet_thread_should_stop()) {
758 more=0; // will indicate if any device has more work for us to do
760 while ((dev = (struct vnet_dev *)v3_dequeue(vnet_state.poll_devs))) {
761 // we are handling this device
762 v3_enqueue(tq,(addr_t)dev);
764 if (dev->poll && dev->dev_ops.poll) {
765 // The device's poll function MUST NOT BLOCK
766 rc = dev->dev_ops.poll(dev->vm, dev->quote, dev->private_data);
769 Vnet_Print(0, "VNET/P: poll from device %p error (ignoring) !\n", dev);
776 while ((dev = (struct vnet_dev *)v3_dequeue(tq))) {
777 // now someone else can handle it
778 v3_enqueue(vnet_state.poll_devs, (addr_t)dev);
781 // Yield regardless of whether we handled any devices - need
782 // to allow other threads to run
784 // we have more to do, so we want to get back asap
787 // put ourselves briefly to sleep if we we don't have more
788 V3_Yield_Timed(VNET_YIELD_USEC);
795 Vnet_Print(0, "VNET/P Polling Thread Done.\n");
801 memset(&vnet_state, 0, sizeof(vnet_state));
803 INIT_LIST_HEAD(&(vnet_state.routes));
804 INIT_LIST_HEAD(&(vnet_state.devs));
806 vnet_state.num_devs = 0;
807 vnet_state.num_routes = 0;
809 if (vnet_lock_init(&(vnet_state.lock)) == -1){
810 PrintError("VNET/P: Fails to initiate lock\n");
813 vnet_state.route_cache = vnet_create_htable(0, &hash_fn, &hash_eq);
814 if (vnet_state.route_cache == NULL) {
815 PrintError("VNET/P: Fails to initiate route cache\n");
819 vnet_state.poll_devs = v3_create_queue();
821 vnet_state.pkt_flush_thread = vnet_start_thread(vnet_tx_flush, NULL, "vnetd-1");
823 PrintDebug("VNET/P is initiated\n");
829 void v3_deinit_vnet(){
831 PrintDebug("Stopping flush thread\n");
832 // This will pause until the flush thread is gone
833 vnet_thread_stop(vnet_state.pkt_flush_thread);
834 // At this point there should be no lock-holder
836 Vnet_Free(vnet_state.poll_devs);
839 PrintDebug("Deiniting Device List\n");
840 // close any devices we have open
841 deinit_devices_list();
843 PrintDebug("Deiniting Route List\n");
844 // remove any routes we have
845 deinit_routes_list();
847 PrintDebug("Freeing hash table\n");
848 // remove the hash table
849 vnet_free_htable(vnet_state.route_cache, 1, 1);
852 PrintDebug("Removing Bridge\n");
853 // remove bridge if it was added
854 if (vnet_state.bridge) {
855 Vnet_Free(vnet_state.bridge);
858 PrintDebug("Deleting lock\n");
859 // eliminate the lock
860 vnet_lock_deinit(&(vnet_state.lock));