Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Switch VNET to use adaptive yielding by default, to reduce busy-waiting in the bridge...
[palacios.git] / palacios / src / vnet / vnet_core.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2010, Lei Xia <lxia@northwestern.edu> 
11  * Copyright (c) 2009, Yuan Tang <ytang@northwestern.edu>  
12  * Copyright (c) 2009, The V3VEE Project <http://www.v3vee.org> 
13  * All rights reserved
14  *
15  * Author: Lei Xia <lxia@northwestern.edu>
16  *         Yuan Tang <ytang@northwestern.edu>
17  *
18  * This is free software.  You are permitted to use,
19  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
20  */
21  
22 #include <vnet/vnet.h>
23 #include <vnet/vnet_hashtable.h>
24 #include <vnet/vnet_host.h>
25 #include <vnet/vnet_vmm.h>
26
27 #include <palacios/vmm_queue.h>
28
29 #ifndef V3_CONFIG_DEBUG_VNET
30 #undef PrintDebug
31 #define PrintDebug(fmt, args...)
32 #endif
33
34 #define VNET_NUM_TX_KICK_THREADS 1
35
36 #define VNET_ADAPTIVE_TX_KICK 1         // set to 1 to try to sleep when there is nothing to do
37 #define VNET_NOPROGRESS_LIMIT 1000      //   ... after this many tries
38 #define VNET_YIELD_USEC       1000      //   ... and go to sleep for this long
39
40
41 int net_debug = 0;
42
43 struct eth_hdr {
44     uint8_t dst_mac[ETH_ALEN];
45     uint8_t src_mac[ETH_ALEN];
46     uint16_t type; /* indicates layer 3 protocol type */
47 } __attribute__((packed));
48
49
50 struct vnet_dev {
51     int dev_id;
52     uint8_t mac_addr[ETH_ALEN];
53     struct v3_vm_info * vm;
54     struct v3_vnet_dev_ops dev_ops;
55
56     int poll;
57
58 #define VNET_MAX_QUOTE 64
59     int quote;
60         
61     void * private_data;
62
63     struct list_head node;
64 } __attribute__((packed));
65
66
67 struct vnet_brg_dev {
68     struct v3_vm_info * vm;
69     struct v3_vnet_bridge_ops brg_ops;
70
71     uint8_t type;
72
73     void * private_data;
74 } __attribute__((packed));
75
76
77
78 struct vnet_route_info {
79     struct v3_vnet_route route_def;
80
81     struct vnet_dev * dst_dev;
82     struct vnet_dev * src_dev;
83
84     uint32_t idx;
85
86     struct list_head node;
87     struct list_head match_node; // used for route matching
88 };
89
90
91 struct route_list {
92     uint8_t hash_buf[VNET_HASH_SIZE];
93
94     uint32_t num_routes;
95     struct vnet_route_info * routes[0];
96 } __attribute__((packed));
97
98
99 struct queue_entry{
100     uint8_t use;
101     struct v3_vnet_pkt pkt;
102     uint8_t * data;
103     uint32_t size_alloc;
104 };
105
106
107 static struct {
108     struct list_head routes;
109     struct list_head devs;
110
111     uint8_t status; 
112    
113     uint32_t num_routes;
114     uint32_t route_idx;
115     uint32_t num_devs;
116     uint32_t dev_idx;
117
118     struct vnet_brg_dev * bridge;
119
120     vnet_lock_t lock;
121     struct vnet_stat stats;
122
123    /* device queue that are waiting to be polled */
124     struct v3_queue * poll_devs;
125
126     struct vnet_thread * pkt_flush_thread[VNET_NUM_TX_KICK_THREADS];
127
128     struct hashtable * route_cache;
129
130 } vnet_state;
131         
132
133 #ifdef V3_CONFIG_DEBUG_VNET
134 static inline void mac2str(uint8_t * mac, char * buf) {
135     snprintf(buf, 100, "%2x:%2x:%2x:%2x:%2x:%2x", 
136              mac[0], mac[1], mac[2],
137              mac[3], mac[4], mac[5]);
138 }
139
140 static void print_route(struct v3_vnet_route * route){
141     char str[50];
142
143     mac2str(route->src_mac, str);
144     PrintDebug(VM_NONE, VCORE_NONE, "Src Mac (%s),  src_qual (%d)\n", 
145                str, route->src_mac_qual);
146     mac2str(route->dst_mac, str);
147     PrintDebug(VM_NONE, VCORE_NONE, "Dst Mac (%s),  dst_qual (%d)\n", 
148                str, route->dst_mac_qual);
149     PrintDebug(VM_NONE, VCORE_NONE, "Src dev id (%d), src type (%d)", 
150                route->src_id, 
151                route->src_type);
152     PrintDebug(VM_NONE, VCORE_NONE, "Dst dev id (%d), dst type (%d)\n", 
153                route->dst_id, 
154                route->dst_type);
155 }
156
157 static void dump_routes(){
158     struct vnet_route_info *route;
159
160     PrintDebug(VM_NONE, VCORE_NONE, "\n========Dump routes starts ============\n");
161     list_for_each_entry(route, &(vnet_state.routes), node) {
162         PrintDebug(VM_NONE, VCORE_NONE, "\nroute %d:\n", route->idx);
163                 
164         print_route(&(route->route_def));
165         if (route->route_def.dst_type == LINK_INTERFACE) {
166             PrintDebug(VM_NONE, VCORE_NONE, "dst_dev (%p), dst_dev_id (%d), dst_dev_ops(%p), dst_dev_data (%p)\n",
167                 route->dst_dev,
168                 route->dst_dev->dev_id,
169                 (void *)&(route->dst_dev->dev_ops),
170                 route->dst_dev->private_data);
171         }
172     }
173
174     PrintDebug(VM_NONE, VCORE_NONE, "\n========Dump routes end ============\n");
175 }
176
177 #endif
178
179
180 /* 
181  * A VNET packet is a packed struct with the hashed fields grouped together.
182  * This means we can generate the hash from an offset into the pkt struct
183  */
184 static inline uint_t hash_fn(addr_t hdr_ptr) {    
185     uint8_t * hdr_buf = (uint8_t *)hdr_ptr;
186
187     return vnet_hash_buffer(hdr_buf, VNET_HASH_SIZE);
188 }
189
190 static inline int hash_eq(addr_t key1, addr_t key2) {   
191     return (memcmp((uint8_t *)key1, (uint8_t *)key2, VNET_HASH_SIZE) == 0);
192 }
193
194 static int add_route_to_cache(const struct v3_vnet_pkt * pkt, struct route_list * routes) {
195     memcpy(routes->hash_buf, pkt->hash_buf, VNET_HASH_SIZE);    
196
197     if (vnet_htable_insert(vnet_state.route_cache, (addr_t)routes->hash_buf, (addr_t)routes) == 0) {
198         PrintError(VM_NONE, VCORE_NONE, "VNET/P Core: Failed to insert new route entry to the cache\n");
199         return -1;
200     }
201     
202     return 0;
203 }
204
205 static int clear_hash_cache() {
206     vnet_free_htable(vnet_state.route_cache, 1, 1);
207     vnet_state.route_cache = vnet_create_htable(0, &hash_fn, &hash_eq);
208
209     return 0;
210 }
211
212 static int look_into_cache(const struct v3_vnet_pkt * pkt, 
213                            struct route_list ** routes) {
214     *routes = (struct route_list *)vnet_htable_search(vnet_state.route_cache, (addr_t)(pkt->hash_buf));
215    
216     return 0;
217 }
218
219
220 static struct vnet_dev * dev_by_id(int idx) {
221     struct vnet_dev * dev = NULL; 
222
223     list_for_each_entry(dev, &(vnet_state.devs), node) {
224         if (dev->dev_id == idx) {
225             return dev;
226         }
227     }
228
229     return NULL;
230 }
231
232 static struct vnet_dev * dev_by_mac(uint8_t * mac) {
233     struct vnet_dev * dev = NULL; 
234     
235     list_for_each_entry(dev, &(vnet_state.devs), node) {
236         if (!compare_ethaddr(dev->mac_addr, mac)){
237             return dev;
238         }
239     }
240
241     return NULL;
242 }
243
244
245 static int start_vnet_kick_threads(void);
246 static int stop_vnet_kick_threads(void);
247
248
249 int v3_vnet_find_dev(uint8_t  * mac) {
250     struct vnet_dev * dev = NULL;
251
252     dev = dev_by_mac(mac);
253
254     if(dev != NULL) {
255         return dev->dev_id;
256     }
257
258     return -1;
259 }
260
261
262 int v3_vnet_add_route(struct v3_vnet_route route) {
263     struct vnet_route_info * new_route = NULL;
264     vnet_intr_flags_t flags; 
265
266     new_route = (struct vnet_route_info *)Vnet_Malloc(sizeof(struct vnet_route_info));
267
268     if (!new_route) {
269         PrintError(VM_NONE, VCORE_NONE, "Cannot allocate new route\n");
270         return -1;
271     }
272
273     memset(new_route, 0, sizeof(struct vnet_route_info));
274
275 #ifdef V3_CONFIG_DEBUG_VNET
276     PrintDebug(VM_NONE, VCORE_NONE, "VNET/P Core: add_route_entry:\n");
277     print_route(&route);
278 #endif
279     
280     memcpy(new_route->route_def.src_mac, route.src_mac, ETH_ALEN);
281     memcpy(new_route->route_def.dst_mac, route.dst_mac, ETH_ALEN);
282     new_route->route_def.src_mac_qual = route.src_mac_qual;
283     new_route->route_def.dst_mac_qual = route.dst_mac_qual;
284     new_route->route_def.dst_type = route.dst_type;
285     new_route->route_def.src_type = route.src_type;
286     new_route->route_def.src_id = route.src_id;
287     new_route->route_def.dst_id = route.dst_id;
288
289     if (new_route->route_def.dst_type == LINK_INTERFACE) {
290         new_route->dst_dev = dev_by_id(new_route->route_def.dst_id);
291     }
292
293     if (new_route->route_def.src_type == LINK_INTERFACE) {
294         new_route->src_dev = dev_by_id(new_route->route_def.src_id);
295     }
296
297
298     flags = vnet_lock_irqsave(vnet_state.lock);
299
300     list_add(&(new_route->node), &(vnet_state.routes));
301     new_route->idx = ++ vnet_state.route_idx;
302     vnet_state.num_routes ++;
303         
304     vnet_unlock_irqrestore(vnet_state.lock, flags);
305
306     clear_hash_cache();
307
308 #ifdef V3_CONFIG_DEBUG_VNET
309     dump_routes();
310 #endif
311
312     return new_route->idx;
313 }
314
315
316 void v3_vnet_del_route(uint32_t route_idx){
317     struct vnet_route_info * route = NULL;
318     vnet_intr_flags_t flags; 
319
320     flags = vnet_lock_irqsave(vnet_state.lock);
321
322     list_for_each_entry(route, &(vnet_state.routes), node) {
323         Vnet_Print(0, "v3_vnet_del_route, route idx: %d\n", route->idx);
324         if(route->idx == route_idx){
325             list_del(&(route->node));
326             Vnet_Free(route);
327             break;    
328         }
329     }
330
331     vnet_unlock_irqrestore(vnet_state.lock, flags);
332     clear_hash_cache();
333
334 #ifdef V3_CONFIG_DEBUG_VNET
335     dump_routes();
336 #endif  
337 }
338
339
340 /* delete all route entries with specfied src or dst device id */ 
341 static void inline del_routes_by_dev(int dev_id){
342     struct vnet_route_info * route, *tmp_route;
343     vnet_intr_flags_t flags; 
344
345     flags = vnet_lock_irqsave(vnet_state.lock);
346
347     list_for_each_entry_safe(route, tmp_route, &(vnet_state.routes), node) {
348         if((route->route_def.dst_type == LINK_INTERFACE &&
349              route->route_def.dst_id == dev_id) ||
350              (route->route_def.src_type == LINK_INTERFACE &&
351               route->route_def.src_id == dev_id)){
352               
353             list_del(&(route->node));
354             list_del(&(route->match_node));
355             Vnet_Free(route);    
356         }
357     }
358     
359     vnet_unlock_irqrestore(vnet_state.lock, flags);
360 }
361
362
363 // Match classes, must be in order
364 #define NUM_MATCH_CLASSES 4
365 #define NUM_MATCH_CLASSES_BOUND 3
366 #define NONE    0
367 #define NOT     1
368 #define ANY     2
369 #define DIRECT  3
370
371
372 static inline uint8_t match_mac(uint8_t test_mac[ETH_ALEN], 
373                                 uint8_t route_mac[ETH_ALEN], 
374                                 uint8_t route_qual)
375 {
376     switch (route_qual) { 
377         case MAC_NOSET:
378             return NONE;
379             break;
380         case MAC_NONE:
381             return NONE;
382             break;
383         case MAC_ANY:
384             return ANY;
385             break;
386         case MAC_NOT:
387             if (memcmp(test_mac,route_mac,ETH_ALEN)) { 
388                 return NOT;
389             } else {
390                 return NONE;
391             }
392             break;
393         case MAC_ADDR:
394             if (memcmp(test_mac,route_mac,ETH_ALEN)) { 
395                 return NONE;
396             } else {
397                 return DIRECT;
398             }
399             break;
400         default:
401             PrintError(VM_NONE, VCORE_NONE, "Unknown qualifier %u\n",route_qual);
402             return NONE;
403             break;
404     }
405
406 }
407
408 #define QUAL_TO_STR(q)  (       \
409 (q)==MAC_NOSET ? "MAC_NOSET" :  \
410 (q)==MAC_NONE ? "MAC_NONE" :    \
411 (q)==MAC_ANY ? "MAC_ANY" :      \
412 (q)==MAC_NOT ? "MAC_NOT" :      \
413 (q)==MAC_ADDR ? "MAC_ADDR" :    \
414 "***UNDEFINED****"              \
415     )                           \
416
417 #define MATCH_CLASS_TO_STR(c)  (       \
418 (c)==NONE ? "NONE" :  \
419 (c)==NOT ? "NOT" :    \
420 (c)==ANY ? "ANY" :      \
421 (c)==DIRECT ? "DIRECT" :      \
422 "***UNDEFINED****"              \
423     )                           \
424
425
426
427 /*
428
429 Original priority behavior... 
430   
431 priority   src  srcqual   dst  dstqual
432 3              ANY            ANY
433 4        X                    NONE
434 5              ANY     X      NOT
435 5        X     NOT            ANY
436 6        X     ~NOT           ANY
437 6              ANY     X      ~NOT
438 7        X     ~NOT    X      NOT
439 7        X     NOT     X      ~NOT
440 8        X     ~NOT    X      ~NOT
441 8        X     ~NOT    X      ~NOT
442
443 */
444
445 /*
446   Current priority order is given in the following table
447 */
448
449 // [src][dst] => priority
450 static int priority_map[NUM_MATCH_CLASSES][NUM_MATCH_CLASSES] = 
451 {
452     [NONE] = { [ 0 ... NUM_MATCH_CLASSES_BOUND ] = -1},   // ignore if it's not a source match
453     [NOT][NONE]                          = -1,            // ignore it if there is no destination match   
454     [NOT][NOT]                           = 3,                                   
455     [NOT][ANY]                           = 5,
456     [NOT][DIRECT]                        = 7,
457     [ANY][NONE]                          = -1,            // ignore if there is no destination match
458     [ANY][NOT]                           = 5,
459     [ANY][ANY]                           = 6,
460     [ANY][DIRECT]                        = 6,
461     [DIRECT][NONE]                       = -1,            // ignore if there is no destination match
462     [DIRECT][NOT]                        = 7,            
463     [DIRECT][ANY]                        = 8,            
464     [DIRECT][DIRECT]                     = 8,            
465 };
466
467
468
469
470 static inline int match_priority(uint8_t src_mac[ETH_ALEN],
471                                  uint8_t dst_mac[ETH_ALEN],
472                                  uint8_t route_src_mac[ETH_ALEN],
473                                  uint8_t route_src_qual,
474                                  uint8_t route_dst_mac[ETH_ALEN],
475                                  uint8_t route_dst_qual)
476
477 {
478
479     return priority_map[match_mac(src_mac,route_src_mac,route_src_qual)][match_mac(dst_mac,route_dst_mac,route_dst_qual)];
480 }
481
482
483 /*
484   Route matching will return the list of the highest priority routes that
485   match.  It's a list because it's possible to have multiple high priority routes
486  */ 
487 static struct route_list * match_route(const struct v3_vnet_pkt * pkt) 
488 {
489     int i;
490     struct vnet_route_info * route = NULL; 
491     struct route_list * matches = NULL;
492     int num_matches = 0;
493     int max_priority = -1;
494     struct list_head match_list;
495     struct eth_hdr * hdr = (struct eth_hdr *)(pkt->data);
496
497     //
498     //
499     // NOTE: USING THE MATCH_NODE in the route list to record a match list
500     // IS A DISASTER WAITING TO HAPPEN
501     //
502     
503 #ifdef V3_CONFIG_DEBUG_VNET
504     {
505         char dst_str[32], src_str[32];
506         mac2str(hdr->src_mac, src_str);  
507         mac2str(hdr->dst_mac, dst_str);
508         PrintDebug(VM_NONE, VCORE_NONE, "VNET/P Core: match_route. pkt: SRC(%s), DEST(%s)\n", src_str, dst_str);
509     }
510 #endif
511     
512     INIT_LIST_HEAD(&match_list);                        
513     
514     
515     list_for_each_entry(route, &(vnet_state.routes), node) {
516         
517         struct v3_vnet_route * route_def = &(route->route_def);
518         
519         int priority;
520         
521         priority = match_priority(hdr->src_mac,
522                                   hdr->dst_mac,
523                                   route_def->src_mac,
524                                   route_def->src_mac_qual,
525                                   route_def->dst_mac,
526                                   route_def->dst_mac_qual);
527
528         
529
530 #ifdef V3_CONFIG_DEBUG_VNET
531         {
532             char dst_str[32];
533             char src_str[32];
534             
535             mac2str(route_def->src_mac, src_str);  
536             mac2str(route_def->dst_mac, dst_str);
537             
538             PrintDebug(VM_NONE, VCORE_NONE, "Tested match against SRC(%s) SRC_QUAL(%s), DEST(%s) DST_QUAL(%s): "
539                        "SRC_MATCH=%s  DEST_MATCH=%s PRIORITY=%d\n", 
540                        src_str, QUAL_TO_STR(route_def->src_mac_qual), 
541                        dst_str, QUAL_TO_STR(route_def->dst_mac_qual),
542                        MATCH_CLASS_TO_STR(match_mac(hdr->src_mac,route_def->src_mac,route_def->src_mac_qual)),
543                        MATCH_CLASS_TO_STR(match_mac(hdr->dst_mac,route_def->dst_mac,route_def->dst_mac_qual)),
544                    priority);
545         }
546 #endif
547
548         if (priority<0) { 
549             PrintDebug(VM_NONE, VCORE_NONE, "No match to this rule\n");
550             continue;
551         }
552
553         if (priority > max_priority) { 
554             PrintDebug(VM_NONE, VCORE_NONE, "New highest priority match, reseting list\n");
555             max_priority = priority;
556
557             struct vnet_route_info *my_route, *tmp_route;
558
559             list_for_each_entry_safe(my_route, tmp_route, &match_list,match_node) {
560                 list_del(&(my_route->match_node));
561             }
562
563             list_add(&(route->match_node), &match_list);        
564             num_matches = 1;                                    
565             
566         } else if (priority == max_priority) {                      
567             PrintDebug(VM_NONE, VCORE_NONE, "Equal priority match, adding to list\n");
568             
569             list_add(&(route->match_node), &match_list);        
570             num_matches++;                                      
571         }                                                       
572         
573     }
574
575     PrintDebug(VM_NONE, VCORE_NONE, "VNET/P Core: match_route: Matches=%d\n", num_matches);
576
577     if (num_matches <= 0) {
578         return NULL;
579     }
580     
581     matches = (struct route_list *)Vnet_Malloc(sizeof(struct route_list) + 
582                                                (sizeof(struct vnet_route_info *) * num_matches));
583
584
585     if (!matches) {
586         PrintError(VM_NONE, VCORE_NONE, "VNET/P Core: Unable to allocate matches\n");
587         return NULL;
588     }
589
590     matches->num_routes = num_matches;
591
592     i=0;
593     list_for_each_entry(route, &match_list, match_node) {
594         if (i==num_matches) { 
595             // the list should never have more than num_matches on it...
596             PrintError(VM_NONE, VCORE_NONE, "Weird list behavior\n");
597             break;
598         } else {
599             matches->routes[i++] = route;
600         }
601        
602     }
603
604     return matches;
605 }
606
607 int v3_vnet_query_header(uint8_t src_mac[ETH_ALEN], 
608                          uint8_t dest_mac[ETH_ALEN],
609                          int     recv,         // 0 = send, 1=recv
610                          struct v3_vnet_header *header)
611 {
612     struct route_list *routes;
613     struct vnet_route_info *r;
614     struct v3_vnet_pkt p;
615     void *flags;
616
617     p.size=14;
618     p.data=p.header;
619     memcpy(p.header,dest_mac,ETH_ALEN);
620     memcpy(p.header+ETH_ALEN,src_mac,ETH_ALEN);
621     memset(p.header+12,0,2);
622
623     p.src_type = LINK_EDGE;
624     p.src_id = 0;
625
626     memcpy(header->src_mac,src_mac,ETH_ALEN);
627     memcpy(header->dst_mac,dest_mac,ETH_ALEN);
628
629
630     flags = vnet_lock_irqsave(vnet_state.lock);
631     
632     look_into_cache(&p,&routes);
633
634     if (!routes) { 
635         routes = match_route(&p);
636         if (!routes) { 
637             vnet_unlock_irqrestore(vnet_state.lock,flags);
638             PrintError(VM_NONE, VCORE_NONE, "Cannot match route\n");
639             header->header_type=VNET_HEADER_NOMATCH;
640             header->header_len=0;
641             return -1;
642         } else {
643             add_route_to_cache(&p,routes);
644         }
645     }
646
647     vnet_unlock_irqrestore(vnet_state.lock,flags);
648     
649     if (routes->num_routes<1) { 
650         PrintError(VM_NONE, VCORE_NONE, "Less than one route\n");
651         header->header_type=VNET_HEADER_NOMATCH;
652         header->header_len=0;
653         return -1;
654     }
655
656     if (routes->num_routes>1) { 
657         PrintError(VM_NONE, VCORE_NONE, "More than one route, building header for the first one only\n");
658     }
659
660     r=routes->routes[0];
661
662     switch (r->route_def.dst_type) {
663         case LINK_EDGE: {
664             // switch based on the link type
665             // for mac-in-udp, we would want to generate a mac, ip, and udp header
666             // direct transmission
667
668             // for now we will say we have no encapsulation
669             //
670             header->header_type=VNET_HEADER_NONE;
671             header->header_len=0;
672             header->src_mac_qual=r->route_def.src_mac_qual;
673             header->dst_mac_qual=r->route_def.dst_mac_qual;
674             
675         }
676             
677             return 0;
678             break;
679             
680
681         case LINK_INTERFACE:
682             // direct transmission
683             // let's guess that it goes to the same interface...
684             header->header_type=VNET_HEADER_NONE;
685             header->header_len=0;
686             header->src_mac_qual=r->route_def.src_mac_qual;
687             header->dst_mac_qual=r->route_def.dst_mac_qual;
688
689             return 0;
690             break;
691
692         default:
693             PrintError(VM_NONE, VCORE_NONE, "Unknown destination type\n");
694             return -1;
695             break;
696
697     }
698     
699 }
700
701
702
703
704 int v3_vnet_send_pkt(struct v3_vnet_pkt * pkt, void * private_data) {
705     struct route_list * matched_routes = NULL;
706     vnet_intr_flags_t flags;
707     int i;
708
709     int cpu = V3_Get_CPU();
710
711     Vnet_Print(2, "VNET/P Core: cpu %d: pkt (size %d, src_id:%d, src_type: %d, dst_id: %d, dst_type: %d)\n",
712                cpu, pkt->size, pkt->src_id, 
713                pkt->src_type, pkt->dst_id, pkt->dst_type);
714
715     if(net_debug >= 4){
716         v3_hexdump(pkt->data, pkt->size, NULL, 0);
717     }
718
719     flags = vnet_lock_irqsave(vnet_state.lock);
720
721     vnet_state.stats.rx_bytes += pkt->size;
722     vnet_state.stats.rx_pkts++;
723
724     look_into_cache(pkt, &matched_routes);
725
726     if (matched_routes == NULL) {  
727         PrintDebug(VM_NONE, VCORE_NONE, "VNET/P Core: sending pkt - matching route\n");
728         
729         matched_routes = match_route(pkt);
730         
731         if (matched_routes) {
732             add_route_to_cache(pkt, matched_routes);
733         } else {
734             PrintDebug(VM_NONE, VCORE_NONE, "VNET/P Core: Could not find route for packet... discarding packet\n");
735             vnet_unlock_irqrestore(vnet_state.lock, flags);
736             return 0; /* do we return -1 here?*/
737         }
738     }
739
740     vnet_unlock_irqrestore(vnet_state.lock, flags);
741
742     PrintDebug(VM_NONE, VCORE_NONE, "VNET/P Core: send pkt route matches %d\n", matched_routes->num_routes);
743
744     for (i = 0; i < matched_routes->num_routes; i++) {
745         struct vnet_route_info * route = matched_routes->routes[i];
746         
747         if (route->route_def.dst_type == LINK_EDGE) {
748             struct vnet_brg_dev * bridge = vnet_state.bridge;
749             pkt->dst_type = LINK_EDGE;
750             pkt->dst_id = route->route_def.dst_id;
751
752             if (bridge == NULL) {
753                 Vnet_Print(2, "VNET/P Core: No active bridge to sent data to\n");
754                 continue;
755             }
756
757             if(bridge->brg_ops.input(bridge->vm, pkt, bridge->private_data) < 0){
758                 Vnet_Print(2, "VNET/P Core: Packet not sent properly to bridge\n");
759                 continue;
760             }         
761             vnet_state.stats.tx_bytes += pkt->size;
762             vnet_state.stats.tx_pkts ++;
763         } else if (route->route_def.dst_type == LINK_INTERFACE) {
764             if (route->dst_dev == NULL){
765                   Vnet_Print(2, "VNET/P Core: No active device to sent data to\n");
766                 continue;
767             }
768
769             if(route->dst_dev->dev_ops.input(route->dst_dev->vm, pkt, route->dst_dev->private_data) < 0) {
770                 Vnet_Print(2, "VNET/P Core: Packet not sent properly\n");
771                 continue;
772             }
773             vnet_state.stats.tx_bytes += pkt->size;
774             vnet_state.stats.tx_pkts ++;
775         } else {
776             Vnet_Print(0, "VNET/P Core: Wrong dst type\n");
777         }
778     }
779     
780     return 0;
781 }
782
783
784 int v3_vnet_add_dev(struct v3_vm_info * vm, uint8_t * mac, 
785                     struct v3_vnet_dev_ops * ops, int quote, int poll_state,
786                     void * priv_data){
787     struct vnet_dev * new_dev = NULL;
788     vnet_intr_flags_t flags;
789
790     new_dev = (struct vnet_dev *)Vnet_Malloc(sizeof(struct vnet_dev)); 
791
792     if (new_dev == NULL) {
793         Vnet_Print(0, "VNET/P Core: Unable to allocate a new device\n");
794         return -1;
795     }
796    
797     memcpy(new_dev->mac_addr, mac, ETH_ALEN);
798     new_dev->dev_ops.input = ops->input;
799     new_dev->dev_ops.poll = ops->poll;
800     new_dev->private_data = priv_data;
801     new_dev->vm = vm;
802     new_dev->dev_id = 0;
803     new_dev->quote = quote<VNET_MAX_QUOTE ? quote : VNET_MAX_QUOTE;
804     new_dev->poll = poll_state;
805
806     stop_vnet_kick_threads();
807
808     flags = vnet_lock_irqsave(vnet_state.lock);
809
810     if (dev_by_mac(mac) == NULL) {
811         list_add(&(new_dev->node), &(vnet_state.devs));
812         new_dev->dev_id = ++ vnet_state.dev_idx;
813         vnet_state.num_devs ++;
814
815         if(new_dev->poll) {
816             v3_enqueue(vnet_state.poll_devs, (addr_t)new_dev);
817         }
818     } else {
819         PrintError(VM_NONE, VCORE_NONE,"VNET/P: Device with the same MAC has already been added\n");
820     }
821
822     vnet_unlock_irqrestore(vnet_state.lock, flags);
823
824     start_vnet_kick_threads();
825
826     /* if the device was found previosly the id should still be 0 */
827     if (new_dev->dev_id == 0) {
828         Vnet_Print(0, "VNET/P Core: Device Already exists\n");
829         return -1;
830     }
831
832     PrintDebug(VM_NONE, VCORE_NONE, "VNET/P Core: Add Device: dev_id %d\n", new_dev->dev_id);
833
834     return new_dev->dev_id;
835 }
836
837
838 int v3_vnet_del_dev(int dev_id){
839     struct vnet_dev * dev = NULL;
840     vnet_intr_flags_t flags;
841
842     stop_vnet_kick_threads();
843
844     flags = vnet_lock_irqsave(vnet_state.lock);
845         
846     dev = dev_by_id(dev_id);
847     if (dev != NULL){
848         list_del(&(dev->node));
849         //del_routes_by_dev(dev_id);
850         vnet_state.num_devs --;
851     }
852         
853     vnet_unlock_irqrestore(vnet_state.lock, flags);
854
855     start_vnet_kick_threads();
856
857     Vnet_Free(dev);
858
859     PrintDebug(VM_NONE, VCORE_NONE, "VNET/P Core: Removed Device: dev_id %d\n", dev_id);
860
861     return 0;
862 }
863
864
865 int v3_vnet_stat(struct vnet_stat * stats){
866     stats->rx_bytes = vnet_state.stats.rx_bytes;
867     stats->rx_pkts = vnet_state.stats.rx_pkts;
868     stats->tx_bytes = vnet_state.stats.tx_bytes;
869     stats->tx_pkts = vnet_state.stats.tx_pkts;
870
871     return 0;
872 }
873
874 static void deinit_devices_list(){
875     struct vnet_dev * dev, * tmp; 
876
877     list_for_each_entry_safe(dev, tmp, &(vnet_state.devs), node) {
878         list_del(&(dev->node));
879         Vnet_Free(dev);
880     }
881 }
882
883 static void deinit_routes_list(){
884     struct vnet_route_info * route, * tmp; 
885
886     list_for_each_entry_safe(route, tmp, &(vnet_state.routes), node) {
887         list_del(&(route->node));
888         list_del(&(route->match_node));
889         Vnet_Free(route);
890     }
891 }
892
893 int v3_vnet_add_bridge(struct v3_vm_info * vm,
894                        struct v3_vnet_bridge_ops * ops,
895                        uint8_t type,
896                        void * priv_data) {
897     vnet_intr_flags_t flags;
898     int bridge_free = 0;
899     struct vnet_brg_dev * tmp_bridge = NULL;    
900     
901     flags = vnet_lock_irqsave(vnet_state.lock);
902     if (vnet_state.bridge == NULL) {
903         bridge_free = 1;
904         vnet_state.bridge = (void *)1;
905     }
906     vnet_unlock_irqrestore(vnet_state.lock, flags);
907
908     if (bridge_free == 0) {
909         PrintError(VM_NONE, VCORE_NONE, "VNET/P Core: Bridge already set\n");
910         return -1;
911     }
912
913     tmp_bridge = (struct vnet_brg_dev *)Vnet_Malloc(sizeof(struct vnet_brg_dev));
914
915     if (tmp_bridge == NULL) {
916         PrintError(VM_NONE, VCORE_NONE, "VNET/P Core: Unable to allocate new bridge\n");
917         vnet_state.bridge = NULL;
918         return -1;
919     }
920     
921     tmp_bridge->vm = vm;
922     tmp_bridge->brg_ops.input = ops->input;
923     tmp_bridge->brg_ops.poll = ops->poll;
924     tmp_bridge->private_data = priv_data;
925     tmp_bridge->type = type;
926         
927     /* make this atomic to avoid possible race conditions */
928     flags = vnet_lock_irqsave(vnet_state.lock);
929     vnet_state.bridge = tmp_bridge;
930     vnet_unlock_irqrestore(vnet_state.lock, flags);
931
932     return 0;
933 }
934
935
936 void v3_vnet_del_bridge(uint8_t type) {
937     vnet_intr_flags_t flags;
938     struct vnet_brg_dev * tmp_bridge = NULL;    
939     
940     flags = vnet_lock_irqsave(vnet_state.lock);
941         
942     if (vnet_state.bridge != NULL && vnet_state.bridge->type == type) {
943         tmp_bridge = vnet_state.bridge;
944         vnet_state.bridge = NULL;
945     }
946         
947     vnet_unlock_irqrestore(vnet_state.lock, flags);
948
949     if (tmp_bridge) {
950         Vnet_Free(tmp_bridge);
951     }
952 }
953
954
955 /* can be instanieoued to multiple threads
956   * that runs on multiple cores 
957   * or it could be running on a dedicated side core
958   */
959 static int vnet_tx_flush(void * args){
960     struct vnet_dev * dev = NULL;
961     int more;
962     int rc;
963     uint64_t noprogress_count;
964
965     Vnet_Print(0, "VNET/P Polling Thread Starting ....\n");
966
967     // since there are multiple instances of this thread, and only
968     // one queue of pollable devices, our model here will be to synchronize
969     // on that queue, removing devices as we go, and keeping them
970     // then putting them back on the queue when we are done
971     // in this way, multiple instances of this function will never
972     // be polling the same device at the same time
973
974     struct v3_queue * tq = v3_create_queue();
975
976     if (!tq) { 
977         PrintError(VM_NONE, VCORE_NONE, "VNET/P polling thread cannot allocate queue\n");
978         return -1;
979     }
980
981     noprogress_count = 0;
982     
983     while (!vnet_thread_should_stop()) {
984
985         more=0; // will indicate if any device has more work for us to do
986
987         while ((dev = (struct vnet_dev *)v3_dequeue(vnet_state.poll_devs))) { 
988             // we are handling this device
989             v3_enqueue(tq,(addr_t)dev);
990             
991             if (dev->poll && dev->dev_ops.poll) {
992                 // The device's poll function MUST NOT BLOCK
993                 rc = dev->dev_ops.poll(dev->vm, dev->quote, dev->private_data);
994
995                 if (rc<0) { 
996                     Vnet_Print(0, "VNET/P: poll from device %p error (ignoring) !\n", dev);
997                 } else {
998                     more |= rc;  
999                 }
1000             }
1001         }
1002         
1003         while ((dev = (struct vnet_dev *)v3_dequeue(tq))) { 
1004             // now someone else can handle it
1005             v3_enqueue(vnet_state.poll_devs, (addr_t)dev); 
1006         }
1007
1008
1009         if (more) { 
1010             noprogress_count=0;
1011         } else {
1012             if ( ! ((noprogress_count+1) < noprogress_count)) {
1013                 noprogress_count++;
1014             }
1015         }
1016
1017         // adaptively yield 
1018         if ((!VNET_ADAPTIVE_TX_KICK) || (noprogress_count < VNET_NOPROGRESS_LIMIT)) { 
1019             V3_Yield();
1020         } else {
1021             V3_Sleep(VNET_YIELD_USEC);
1022         }
1023
1024     }
1025
1026     Vnet_Free(tq);
1027     
1028     Vnet_Print(0, "VNET/P Polling Thread Done.\n");
1029
1030     return 0;
1031 }
1032
1033 static int start_vnet_kick_threads()
1034 {
1035     int i;
1036
1037     for (i=0; i<VNET_NUM_TX_KICK_THREADS;i++) { 
1038         char name[32];
1039         snprintf(name,32,"vnetd-%d",i);
1040         vnet_state.pkt_flush_thread[i] = vnet_start_thread(vnet_tx_flush, NULL, name);
1041     }
1042     return 0;
1043 }
1044
1045 static int stop_vnet_kick_threads()
1046 {
1047     int i;
1048     for (i=0; i<VNET_NUM_TX_KICK_THREADS;i++) { 
1049         vnet_thread_stop(vnet_state.pkt_flush_thread[i]);
1050     }
1051     return 0;
1052 }
1053
1054
1055 int v3_init_vnet() 
1056 {
1057     memset(&vnet_state, 0, sizeof(vnet_state));
1058         
1059     INIT_LIST_HEAD(&(vnet_state.routes));
1060     INIT_LIST_HEAD(&(vnet_state.devs));
1061
1062     vnet_state.num_devs = 0;
1063     vnet_state.num_routes = 0;
1064
1065     if (vnet_lock_init(&(vnet_state.lock)) == -1){
1066         PrintError(VM_NONE, VCORE_NONE, "VNET/P: Fails to initiate lock\n");
1067     }
1068
1069     vnet_state.route_cache = vnet_create_htable(0, &hash_fn, &hash_eq);
1070     if (vnet_state.route_cache == NULL) {
1071         PrintError(VM_NONE, VCORE_NONE, "VNET/P: Fails to initiate route cache\n");
1072         return -1;
1073     }
1074
1075     vnet_state.poll_devs = v3_create_queue();
1076
1077     start_vnet_kick_threads();
1078
1079     PrintDebug(VM_NONE, VCORE_NONE, "VNET/P is initiated (%d tx kick threads active)\n",VNET_NUM_TX_KICK_THREADS);
1080
1081     return 0;
1082 }
1083
1084
1085 void v3_deinit_vnet() 
1086 {
1087
1088     PrintDebug(VM_NONE, VCORE_NONE, "Stopping kick threads\n");
1089     stop_vnet_kick_threads();
1090
1091
1092     PrintDebug(VM_NONE, VCORE_NONE, "Deiniting poll devices\n");
1093     v3_deinit_queue(vnet_state.poll_devs);
1094     Vnet_Free(vnet_state.poll_devs);
1095
1096
1097     // At this point there should be no lock-holder
1098
1099     Vnet_Free(vnet_state.poll_devs);
1100
1101
1102     PrintDebug(VM_NONE, VCORE_NONE, "Deiniting Device List\n");
1103     // close any devices we have open
1104     deinit_devices_list();  
1105     
1106     PrintDebug(VM_NONE, VCORE_NONE, "Deiniting Route List\n");
1107     // remove any routes we have
1108     deinit_routes_list();
1109
1110     PrintDebug(VM_NONE, VCORE_NONE, "Freeing hash table\n");
1111     // remove the hash table
1112     vnet_free_htable(vnet_state.route_cache, 1, 1);
1113
1114     
1115     PrintDebug(VM_NONE, VCORE_NONE, "Removing Bridge\n");
1116     // remove bridge if it was added
1117     if (vnet_state.bridge) { 
1118         Vnet_Free(vnet_state.bridge);
1119     }
1120
1121     PrintDebug(VM_NONE, VCORE_NONE, "Deleting lock\n");
1122     // eliminate the lock
1123     vnet_lock_deinit(&(vnet_state.lock));
1124
1125 }
1126
1127