2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2010, Lei Xia <lxia@northwestern.edu>
11 * Copyright (c) 2009, Yuan Tang <ytang@northwestern.edu>
12 * Copyright (c) 2009, The V3VEE Project <http://www.v3vee.org>
13 * All rights reserved.
15 * Author: Lei Xia <lxia@northwestern.edu>
16 * Yuan Tang <ytang@northwestern.edu>
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
22 #include <vnet/vnet.h>
23 #include <vnet/vnet_hashtable.h>
24 #include <vnet/vnet_host.h>
25 #include <vnet/vnet_vmm.h>
27 #ifndef V3_CONFIG_DEBUG_VNET
29 #define Vnet_Debug(fmt, args...)
35 uint8_t dst_mac[ETH_ALEN];
36 uint8_t src_mac[ETH_ALEN];
37 uint16_t type; /* indicates layer 3 protocol type */
38 } __attribute__((packed));
43 uint8_t mac_addr[ETH_ALEN];
44 struct v3_vm_info * vm;
45 struct v3_vnet_dev_ops dev_ops;
48 struct list_head node;
49 } __attribute__((packed));
53 struct v3_vm_info * vm;
54 struct v3_vnet_bridge_ops brg_ops;
59 } __attribute__((packed));
63 struct vnet_route_info {
64 struct v3_vnet_route route_def;
66 struct vnet_dev * dst_dev;
67 struct vnet_dev * src_dev;
71 struct list_head node;
72 struct list_head match_node; // used for route matching
77 uint8_t hash_buf[VNET_HASH_SIZE];
80 struct vnet_route_info * routes[0];
81 } __attribute__((packed));
86 struct v3_vnet_pkt pkt;
91 #define VNET_QUEUE_SIZE 1024
93 struct queue_entry buf[VNET_QUEUE_SIZE];
100 struct list_head routes;
101 struct list_head devs;
110 struct vnet_brg_dev * bridge;
113 struct vnet_stat stats;
115 struct vnet_thread * pkt_flush_thread;
117 struct vnet_queue pkt_q;
119 struct hashtable * route_cache;
123 #ifdef V3_CONFIG_DEBUG_VNET
124 static inline void mac2str(uint8_t * mac, char * buf) {
125 snprintf(buf, 100, "%2x:%2x:%2x:%2x:%2x:%2x",
126 mac[0], mac[1], mac[2],
127 mac[3], mac[4], mac[5]);
130 static void print_route(struct v3_vnet_route * route){
133 mac2str(route->src_mac, str);
134 Vnet_Debug("Src Mac (%s), src_qual (%d)\n",
135 str, route->src_mac_qual);
136 mac2str(route->dst_mac, str);
137 Vnet_Debug("Dst Mac (%s), dst_qual (%d)\n",
138 str, route->dst_mac_qual);
139 Vnet_Debug("Src dev id (%d), src type (%d)",
142 Vnet_Debug("Dst dev id (%d), dst type (%d)\n",
147 static void dump_routes(){
148 struct vnet_route_info *route;
150 Vnet_Debug("\n========Dump routes starts ============\n");
151 list_for_each_entry(route, &(vnet_state.routes), node) {
152 Vnet_Debug("\nroute %d:\n", route->idx);
154 print_route(&(route->route_def));
155 if (route->route_def.dst_type == LINK_INTERFACE) {
156 Vnet_Debug("dst_dev (%p), dst_dev_id (%d), dst_dev_ops(%p), dst_dev_data (%p)\n",
158 route->dst_dev->dev_id,
159 (void *)&(route->dst_dev->dev_ops),
160 route->dst_dev->private_data);
164 Vnet_Debug("\n========Dump routes end ============\n");
171 * A VNET packet is a packed struct with the hashed fields grouped together.
172 * This means we can generate the hash from an offset into the pkt struct
174 static inline uint_t hash_fn(addr_t hdr_ptr) {
175 uint8_t * hdr_buf = (uint8_t *)hdr_ptr;
177 return vnet_hash_buffer(hdr_buf, VNET_HASH_SIZE);
180 static inline int hash_eq(addr_t key1, addr_t key2) {
181 return (memcmp((uint8_t *)key1, (uint8_t *)key2, VNET_HASH_SIZE) == 0);
184 static int add_route_to_cache(const struct v3_vnet_pkt * pkt, struct route_list * routes) {
185 memcpy(routes->hash_buf, pkt->hash_buf, VNET_HASH_SIZE);
187 if (vnet_htable_insert(vnet_state.route_cache, (addr_t)routes->hash_buf, (addr_t)routes) == 0) {
188 PrintError("VNET/P Core: Failed to insert new route entry to the cache\n");
195 static int clear_hash_cache() {
196 vnet_free_htable(vnet_state.route_cache, 1, 1);
197 vnet_state.route_cache = vnet_create_htable(0, &hash_fn, &hash_eq);
202 static int look_into_cache(const struct v3_vnet_pkt * pkt,
203 struct route_list ** routes) {
204 *routes = (struct route_list *)vnet_htable_search(vnet_state.route_cache, (addr_t)(pkt->hash_buf));
210 static struct vnet_dev * dev_by_id(int idx) {
211 struct vnet_dev * dev = NULL;
213 list_for_each_entry(dev, &(vnet_state.devs), node) {
214 int dev_id = dev->dev_id;
223 static struct vnet_dev * dev_by_mac(uint8_t * mac) {
224 struct vnet_dev * dev = NULL;
226 list_for_each_entry(dev, &(vnet_state.devs), node) {
227 if (!compare_ethaddr(dev->mac_addr, mac)){
236 int v3_vnet_find_dev(uint8_t * mac) {
237 struct vnet_dev * dev = NULL;
239 dev = dev_by_mac(mac);
249 int v3_vnet_add_route(struct v3_vnet_route route) {
250 struct vnet_route_info * new_route = NULL;
253 new_route = (struct vnet_route_info *)Vnet_Malloc(sizeof(struct vnet_route_info));
254 memset(new_route, 0, sizeof(struct vnet_route_info));
256 #ifdef V3_CONFIG_DEBUG_VNET
257 Vnet_Debug("VNET/P Core: add_route_entry:\n");
261 memcpy(new_route->route_def.src_mac, route.src_mac, ETH_ALEN);
262 memcpy(new_route->route_def.dst_mac, route.dst_mac, ETH_ALEN);
263 new_route->route_def.src_mac_qual = route.src_mac_qual;
264 new_route->route_def.dst_mac_qual = route.dst_mac_qual;
265 new_route->route_def.dst_type = route.dst_type;
266 new_route->route_def.src_type = route.src_type;
267 new_route->route_def.src_id = route.src_id;
268 new_route->route_def.dst_id = route.dst_id;
270 if (new_route->route_def.dst_type == LINK_INTERFACE) {
271 new_route->dst_dev = dev_by_id(new_route->route_def.dst_id);
274 if (new_route->route_def.src_type == LINK_INTERFACE) {
275 new_route->src_dev = dev_by_id(new_route->route_def.src_id);
279 flags = vnet_lock_irqsave(vnet_state.lock);
281 list_add(&(new_route->node), &(vnet_state.routes));
282 new_route->idx = ++ vnet_state.route_idx;
283 vnet_state.num_routes ++;
285 vnet_unlock_irqrestore(vnet_state.lock, flags);
289 #ifdef V3_CONFIG_DEBUG_VNET
293 return new_route->idx;
297 void v3_vnet_del_route(uint32_t route_idx){
298 struct vnet_route_info * route = NULL;
301 flags = vnet_lock_irqsave(vnet_state.lock);
303 list_for_each_entry(route, &(vnet_state.routes), node) {
304 V3_Print("v3_vnet_del_route, route idx: %d\n", route->idx);
305 if(route->idx == route_idx){
306 list_del(&(route->node));
312 vnet_unlock_irqrestore(vnet_state.lock, flags);
315 #ifdef V3_CONFIG_DEBUG_VNET
321 /* delete all route entries with specfied src or dst device id */
322 static void inline del_routes_by_dev(int dev_id){
323 struct vnet_route_info * route = NULL;
326 flags = vnet_lock_irqsave(vnet_state.lock);
328 list_for_each_entry(route, &(vnet_state.routes), node) {
329 if((route->route_def.dst_type == LINK_INTERFACE &&
330 route->route_def.dst_id == dev_id) ||
331 (route->route_def.src_type == LINK_INTERFACE &&
332 route->route_def.src_id == dev_id)){
334 list_del(&(route->node));
335 list_del(&(route->match_node));
340 vnet_unlock_irqrestore(vnet_state.lock, flags);
343 /* At the end allocate a route_list
344 * This list will be inserted into the cache so we don't need to free it
346 static struct route_list * match_route(const struct v3_vnet_pkt * pkt) {
347 struct vnet_route_info * route = NULL;
348 struct route_list * matches = NULL;
351 struct list_head match_list;
352 struct eth_hdr * hdr = (struct eth_hdr *)(pkt->data);
353 // uint8_t src_type = pkt->src_type;
354 // uint32_t src_link = pkt->src_id;
356 #ifdef V3_CONFIG_DEBUG_VNET
361 mac2str(hdr->src_mac, src_str);
362 mac2str(hdr->dst_mac, dst_str);
363 Vnet_Debug("VNET/P Core: match_route. pkt: SRC(%s), DEST(%s)\n", src_str, dst_str);
367 INIT_LIST_HEAD(&match_list);
369 #define UPDATE_MATCHES(rank) do { \
370 if (max_rank < (rank)) { \
372 INIT_LIST_HEAD(&match_list); \
374 list_add(&(route->match_node), &match_list); \
376 } else if (max_rank == (rank)) { \
377 list_add(&(route->match_node), &match_list); \
383 list_for_each_entry(route, &(vnet_state.routes), node) {
384 struct v3_vnet_route * route_def = &(route->route_def);
387 // CHECK SOURCE TYPE HERE
388 if ( (route_def->src_type != LINK_ANY) &&
389 ( (route_def->src_type != src_type) ||
390 ( (route_def->src_id != src_link) &&
391 (route_def->src_id != -1)))) {
396 if ((route_def->dst_mac_qual == MAC_ANY) &&
397 (route_def->src_mac_qual == MAC_ANY)) {
401 if (memcmp(route_def->src_mac, hdr->src_mac, 6) == 0) {
402 if (route_def->src_mac_qual != MAC_NOT) {
403 if (route_def->dst_mac_qual == MAC_ANY) {
405 } else if (route_def->dst_mac_qual != MAC_NOT &&
406 memcmp(route_def->dst_mac, hdr->dst_mac, 6) == 0) {
412 if (memcmp(route_def->dst_mac, hdr->dst_mac, 6) == 0) {
413 if (route_def->dst_mac_qual != MAC_NOT) {
414 if (route_def->src_mac_qual == MAC_ANY) {
416 } else if ((route_def->src_mac_qual != MAC_NOT) &&
417 (memcmp(route_def->src_mac, hdr->src_mac, 6) == 0)) {
423 if ((route_def->dst_mac_qual == MAC_NOT) &&
424 (memcmp(route_def->dst_mac, hdr->dst_mac, 6) != 0)) {
425 if (route_def->src_mac_qual == MAC_ANY) {
427 } else if ((route_def->src_mac_qual != MAC_NOT) &&
428 (memcmp(route_def->src_mac, hdr->src_mac, 6) == 0)) {
433 if ((route_def->src_mac_qual == MAC_NOT) &&
434 (memcmp(route_def->src_mac, hdr->src_mac, 6) != 0)) {
435 if (route_def->dst_mac_qual == MAC_ANY) {
437 } else if ((route_def->dst_mac_qual != MAC_NOT) &&
438 (memcmp(route_def->dst_mac, hdr->dst_mac, 6) == 0)) {
444 if ( (memcmp(route_def->src_mac, hdr->src_mac, 6) == 0) &&
445 (route_def->dst_mac_qual == MAC_NONE)) {
450 Vnet_Debug("VNET/P Core: match_route: Matches=%d\n", num_matches);
452 if (num_matches == 0) {
456 matches = (struct route_list *)Vnet_Malloc(sizeof(struct route_list) +
457 (sizeof(struct vnet_route_info *) * num_matches));
459 matches->num_routes = num_matches;
463 list_for_each_entry(route, &match_list, match_node) {
464 matches->routes[i++] = route;
472 int vnet_tx_one_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
473 struct route_list * matched_routes = NULL;
477 int cpu = V3_Get_CPU();
478 Vnet_Print(2, "VNET/P Core: cpu %d: pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n",
479 cpu, pkt->size, pkt->src_id,
480 pkt->src_type, pkt->dst_id, pkt->dst_type);
482 v3_hexdump(pkt->data, pkt->size, NULL, 0);
485 flags = vnet_lock_irqsave(vnet_state.lock);
487 vnet_state.stats.rx_bytes += pkt->size;
488 vnet_state.stats.rx_pkts++;
490 look_into_cache(pkt, &matched_routes);
491 if (matched_routes == NULL) {
492 Vnet_Debug("VNET/P Core: send pkt Looking into routing table\n");
494 matched_routes = match_route(pkt);
496 if (matched_routes) {
497 add_route_to_cache(pkt, matched_routes);
499 Vnet_Debug("VNET/P Core: Could not find route for packet... discards packet\n");
500 vnet_unlock_irqrestore(vnet_state.lock, flags);
501 return 0; /* do we return -1 here?*/
505 vnet_unlock_irqrestore(vnet_state.lock, flags);
507 Vnet_Debug("VNET/P Core: send pkt route matches %d\n", matched_routes->num_routes);
509 for (i = 0; i < matched_routes->num_routes; i++) {
510 struct vnet_route_info * route = matched_routes->routes[i];
512 if (route->route_def.dst_type == LINK_EDGE) {
513 struct vnet_brg_dev * bridge = vnet_state.bridge;
514 pkt->dst_type = LINK_EDGE;
515 pkt->dst_id = route->route_def.dst_id;
517 if (bridge == NULL) {
518 Vnet_Print(2, "VNET/P Core: No active bridge to sent data to\n");
522 if(bridge->brg_ops.input(bridge->vm, pkt, bridge->private_data) < 0){
523 Vnet_Print(2, "VNET/P Core: Packet not sent properly to bridge\n");
526 vnet_state.stats.tx_bytes += pkt->size;
527 vnet_state.stats.tx_pkts ++;
528 } else if (route->route_def.dst_type == LINK_INTERFACE) {
529 if (route->dst_dev == NULL){
530 Vnet_Print(2, "VNET/P Core: No active device to sent data to\n");
534 if(route->dst_dev->dev_ops.input(route->dst_dev->vm, pkt, route->dst_dev->private_data) < 0) {
535 Vnet_Print(2, "VNET/P Core: Packet not sent properly\n");
538 vnet_state.stats.tx_bytes += pkt->size;
539 vnet_state.stats.tx_pkts ++;
541 Vnet_Print(0, "VNET/P Core: Wrong dst type\n");
549 static int vnet_pkt_enqueue(struct v3_vnet_pkt * pkt){
551 struct queue_entry * entry;
552 struct vnet_queue * q = &(vnet_state.pkt_q);
555 flags = vnet_lock_irqsave(q->lock);
557 if (q->count >= VNET_QUEUE_SIZE){
558 Vnet_Print(1, "VNET Queue overflow!\n");
559 vnet_unlock_irqrestore(q->lock, flags);
564 entry = &(q->buf[q->tail++]);
565 q->tail %= VNET_QUEUE_SIZE;
567 vnet_unlock_irqrestore(q->lock, flags);
569 /* this is ugly, but should happen very unlikely */
572 if(entry->size_alloc < pkt->size){
573 if(entry->data != NULL){
574 Vnet_FreePages(Vnet_PAddr(entry->data), (entry->size_alloc / PAGE_SIZE));
578 num_pages = 1 + (pkt->size / PAGE_SIZE);
579 entry->data = Vnet_VAddr(Vnet_AllocPages(num_pages));
580 if(entry->data == NULL){
583 entry->size_alloc = PAGE_SIZE * num_pages;
586 entry->pkt.data = entry->data;
587 memcpy(&(entry->pkt), pkt, sizeof(struct v3_vnet_pkt));
588 memcpy(entry->data, pkt->data, pkt->size);
596 int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data, int synchronize) {
598 vnet_tx_one_pkt(pkt, NULL);
600 vnet_pkt_enqueue(pkt);
601 Vnet_Print(2, "VNET/P Core: Put pkt into Queue: pkt size %d\n", pkt->size);
607 int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac,
608 struct v3_vnet_dev_ops *ops,
610 struct vnet_dev * new_dev = NULL;
613 new_dev = (struct vnet_dev *)Vnet_Malloc(sizeof(struct vnet_dev));
615 if (new_dev == NULL) {
616 Vnet_Print(0, "Malloc fails\n");
620 memcpy(new_dev->mac_addr, mac, 6);
621 new_dev->dev_ops.input = ops->input;
622 new_dev->private_data = priv_data;
626 flags = vnet_lock_irqsave(vnet_state.lock);
628 if (dev_by_mac(mac) == NULL) {
629 list_add(&(new_dev->node), &(vnet_state.devs));
630 new_dev->dev_id = ++ vnet_state.dev_idx;
631 vnet_state.num_devs ++;
634 vnet_unlock_irqrestore(vnet_state.lock, flags);
636 /* if the device was found previosly the id should still be 0 */
637 if (new_dev->dev_id == 0) {
638 Vnet_Print(0, "VNET/P Core: Device Already exists\n");
642 Vnet_Debug("VNET/P Core: Add Device: dev_id %d\n", new_dev->dev_id);
644 return new_dev->dev_id;
648 int v3_vnet_del_dev(int dev_id){
649 struct vnet_dev * dev = NULL;
652 flags = vnet_lock_irqsave(vnet_state.lock);
654 dev = dev_by_id(dev_id);
656 list_del(&(dev->node));
657 //del_routes_by_dev(dev_id);
658 vnet_state.num_devs --;
661 vnet_unlock_irqrestore(vnet_state.lock, flags);
665 Vnet_Debug("VNET/P Core: Remove Device: dev_id %d\n", dev_id);
671 int v3_vnet_stat(struct vnet_stat * stats){
672 stats->rx_bytes = vnet_state.stats.rx_bytes;
673 stats->rx_pkts = vnet_state.stats.rx_pkts;
674 stats->tx_bytes = vnet_state.stats.tx_bytes;
675 stats->tx_pkts = vnet_state.stats.tx_pkts;
680 static void deinit_devices_list(){
681 struct vnet_dev * dev = NULL;
683 list_for_each_entry(dev, &(vnet_state.devs), node) {
684 list_del(&(dev->node));
689 static void deinit_routes_list(){
690 struct vnet_route_info * route = NULL;
692 list_for_each_entry(route, &(vnet_state.routes), node) {
693 list_del(&(route->node));
694 list_del(&(route->match_node));
699 int v3_vnet_add_bridge(struct v3_vm_info * vm,
700 struct v3_vnet_bridge_ops * ops,
705 struct vnet_brg_dev * tmp_bridge = NULL;
707 flags = vnet_lock_irqsave(vnet_state.lock);
708 if (vnet_state.bridge == NULL) {
710 vnet_state.bridge = (void *)1;
712 vnet_unlock_irqrestore(vnet_state.lock, flags);
714 if (bridge_free == 0) {
715 PrintError("VNET/P Core: Bridge already set\n");
719 tmp_bridge = (struct vnet_brg_dev *)Vnet_Malloc(sizeof(struct vnet_brg_dev));
721 if (tmp_bridge == NULL) {
722 PrintError("Malloc Fails\n");
723 vnet_state.bridge = NULL;
728 tmp_bridge->brg_ops.input = ops->input;
729 tmp_bridge->brg_ops.poll = ops->poll;
730 tmp_bridge->private_data = priv_data;
731 tmp_bridge->type = type;
733 /* make this atomic to avoid possible race conditions */
734 flags = vnet_lock_irqsave(vnet_state.lock);
735 vnet_state.bridge = tmp_bridge;
736 vnet_unlock_irqrestore(vnet_state.lock, flags);
742 void v3_vnet_del_bridge(uint8_t type) {
744 struct vnet_brg_dev * tmp_bridge = NULL;
746 flags = vnet_lock_irqsave(vnet_state.lock);
748 if (vnet_state.bridge != NULL && vnet_state.bridge->type == type) {
749 tmp_bridge = vnet_state.bridge;
750 vnet_state.bridge = NULL;
753 vnet_unlock_irqrestore(vnet_state.lock, flags);
756 Vnet_Free(tmp_bridge);
761 static int vnet_tx_flush(void *args){
763 struct queue_entry * entry;
764 struct vnet_queue * q = &(vnet_state.pkt_q);
766 Vnet_Print(0, "VNET/P Handing Pkt Thread Starting ....\n");
768 /* we need thread sleep/wakeup in Palacios */
769 while(!vnet_thread_should_stop()){
770 flags = vnet_lock_irqsave(q->lock);
773 vnet_unlock_irqrestore(q->lock, flags);
777 entry = &(q->buf[q->head++]);
778 q->head %= VNET_QUEUE_SIZE;
780 vnet_unlock_irqrestore(q->lock, flags);
782 /* this is ugly, but should happen very unlikely */
784 vnet_tx_one_pkt(&(entry->pkt), NULL);
786 /* asynchronizely release allocated memory for buffer entry here */
789 Vnet_Print(2, "vnet_tx_flush: pkt (size %d)\n", entry->pkt.size);
797 memset(&vnet_state, 0, sizeof(vnet_state));
799 INIT_LIST_HEAD(&(vnet_state.routes));
800 INIT_LIST_HEAD(&(vnet_state.devs));
802 vnet_state.num_devs = 0;
803 vnet_state.num_routes = 0;
805 if (vnet_lock_init(&(vnet_state.lock)) == -1){
806 PrintError("VNET/P Core: Fails to initiate lock\n");
809 vnet_state.route_cache = vnet_create_htable(0, &hash_fn, &hash_eq);
810 if (vnet_state.route_cache == NULL) {
811 PrintError("VNET/P Core: Fails to initiate route cache\n");
815 vnet_lock_init(&(vnet_state.pkt_q.lock));
817 vnet_state.pkt_flush_thread = vnet_start_thread(vnet_tx_flush, NULL, "VNET_Pkts");
819 Vnet_Debug("VNET/P Core is initiated\n");
825 void v3_deinit_vnet(){
827 vnet_lock_deinit(&(vnet_state.lock));
829 deinit_devices_list();
830 deinit_routes_list();
832 vnet_free_htable(vnet_state.route_cache, 1, 1);
833 Vnet_Free(vnet_state.bridge);