Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Enhancements to VNET and to halting:
Peter Dinda [Sun, 5 Aug 2012 23:14:14 +0000 (18:14 -0500)]
- VNET sends UDP packets without checksumming
- VNET threads (bridge and transmit kick thread)
  adaptively choose yielding strategy
- halt handler adaptive chooses yielding strategy

linux_module/palacios-vnet-brg.c
palacios/src/devices/lnx_virtio_nic.c
palacios/src/devices/vnet_nic.c
palacios/src/palacios/vmm_halt.c
palacios/src/vnet/vnet_core.c

index 3764a52..309171d 100644 (file)
 #include <linux/sched.h>
 #include <asm/msr.h>
 
+#include <linux/net.h>
+#include <linux/socket.h>
+#include <net/sock.h>
+
 #include <vnet/vnet.h>
 #include <vnet/vnet_hashtable.h>
 #include "palacios-vnet.h"
 #include "palacios.h"
 
 
+
 #define VNET_SERVER_PORT 9000
 
-#define VNET_YIELD_TIME_USEC 1000
+#define VNET_NOPROGRESS_LIMIT 1000
+
+#define VNET_YIELD_TIME_USEC  1000
 
 struct vnet_link {
     uint32_t dst_ip;
@@ -152,17 +159,26 @@ static uint32_t _create_link(struct vnet_link * link) {
        return -1;
     }
 
+    if (link->sock_proto == UDP) { 
+       // no UDP checksumming
+       lock_sock(link->sock->sk);
+       link->sock->sk->sk_no_check = 1;
+       release_sock(link->sock->sk);
+    }
+
     memset(&link->sock_addr, 0, sizeof(struct sockaddr));
 
     link->sock_addr.sin_family = AF_INET;
     link->sock_addr.sin_addr.s_addr = link->dst_ip;
     link->sock_addr.sin_port = htons(link->dst_port);
 
+
     if ((err = link->sock->ops->connect(link->sock, (struct sockaddr *)&(link->sock_addr), sizeof(struct sockaddr), 0) < 0)) {
        WARNING("Could not connect to remote VNET Server, error %d\n", err);
        return -1;
     }
 
+
     spin_lock_irqsave(&(vnet_brg_s.lock), flags);
     list_add(&(link->node), &(vnet_brg_s.link_list));
     vnet_brg_s.num_links ++;
@@ -237,7 +253,7 @@ _udp_send(struct socket * sock,
     iov.iov_base = buf;
     iov.iov_len = len;
 
-    msg.msg_flags = 0;
+    msg.msg_flags = MSG_NOSIGNAL;
     msg.msg_name = addr;
     msg.msg_namelen = sizeof(struct sockaddr_in);
     msg.msg_control = NULL;
@@ -384,6 +400,13 @@ static int init_vnet_serv(void) {
        return -1;
     }
 
+    if (vnet_brg_s.serv_proto == UDP) { 
+       // No UDP checksumming is done
+       lock_sock(vnet_brg_s.serv_sock->sk);
+       vnet_brg_s.serv_sock->sk->sk_no_check = 1;
+       release_sock(vnet_brg_s.serv_sock->sk);
+    }
+
     memset(&vnet_brg_s.serv_addr, 0, sizeof(struct sockaddr));
 
     vnet_brg_s.serv_addr.sin_family = AF_INET;
@@ -404,6 +427,8 @@ static int init_vnet_serv(void) {
        }
     }
 
+
+
     return 0;
 }
 
@@ -412,6 +437,7 @@ static int _udp_server(void * arg) {
     struct sockaddr_in pkt_addr;
     struct vnet_link * link = NULL;
     int len;
+    uint64_t noprogress_count;
 
     INFO("Palacios VNET Bridge: UDP receiving server ..... \n");
 
@@ -422,6 +448,8 @@ static int _udp_server(void * arg) {
        return -1;
     }
 
+    
+    noprogress_count=0;
 
     while (!kthread_should_stop()) {
 
@@ -436,10 +464,32 @@ static int _udp_server(void * arg) {
        // If it would have blocked, we have no packet, and so
        // we will give other threads on this core a chance
        if (len==-EAGAIN || len==-EWOULDBLOCK || len==-EINTR) { 
-           palacios_yield_cpu_timed(VNET_YIELD_TIME_USEC);
+
+           // avoid rollover in the counter out of paranoia
+           if (! ((noprogress_count + 1) < noprogress_count)) { 
+               noprogress_count++;
+           }
+           
+           // adaptively select yielding strategy depending on
+           // whether we are making progress
+           if (noprogress_count < VNET_NOPROGRESS_LIMIT) { 
+               // Likely making progress, do fast yield so we 
+               // come back immediately if there is no other action
+               palacios_yield_cpu();
+           } else {
+               // Likely not making progress, do potentially slow
+               // yield - we won't come back for until VNET_YIELD_TIME_USEC has passed
+               palacios_yield_cpu_timed(VNET_YIELD_TIME_USEC);
+           }
+
            continue;
        }
        
+
+       // Something interesting has happened, therefore progress!
+       noprogress_count=0;
+           
+
        if(len < 0) {
            WARNING("Receive error: Could not get packet, error %d\n", len);
            continue;
index 92dee7e..5e7d4f2 100644 (file)
@@ -940,6 +940,13 @@ static int connect_fn(struct v3_vm_info * info,
     ops->config.poll = 1;
     ops->config.quote = 64;
     ops->config.fnt_mac = V3_Malloc(ETH_ALEN);  
+
+    if (!ops->config.fnt_mac) { 
+       PrintError("Cannot allocate in connect\n");
+       // should unregister here
+       return -1;
+    }
+
     memcpy(ops->config.fnt_mac, virtio->mac, ETH_ALEN);
 
     return 0;
index 29c1412..375a974 100644 (file)
@@ -111,6 +111,11 @@ static int vnet_nic_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
 
     v3_cfg_tree_t * frontend_cfg = v3_cfg_subtree(cfg, "frontend");
 
+    if (!frontend_cfg || !(v3_cfg_val(frontend_cfg, "tag"))) { 
+       PrintError("No frontend config specified, or frontend has no tag\n");
+       return -1;
+    }
+
     vnetnic = (struct vnet_nic_state *)V3_Malloc(sizeof(struct vnet_nic_state));
 
     if (!vnetnic) {
index e7862c8..604d06b 100644 (file)
 #endif
 
 
-#define YIELD_TIME_USEC 1000
+#define NO_PROGRESS_CYCLE_LIMIT  4000000ULL   // 4 million cycles, about 1ms on a 4 GHz machine
+
+#define YIELD_TIME_USEC    1000
 
 
 //
 // This should trigger a #GP if cpl != 0, otherwise, yield to host
 //
 
-int v3_handle_halt(struct guest_info * info) {
-
+int v3_handle_halt(struct guest_info * info) 
+{
+    
     if (info->cpl != 0) { 
        v3_raise_exception(info, GPF_EXCEPTION);
     } else {
+       uint64_t total_cycles;
+       
+
        PrintDebug("CPU Yield\n");
 
+       total_cycles = 0;
+
        while (!v3_intr_pending(info) && (info->vm_info->run_state == VM_RUNNING)) {
             uint64_t t, cycles;
            /* Yield, allowing time to pass while yielded */
            t = v3_get_host_time(&info->time_state);
-           v3_yield(info,YIELD_TIME_USEC);
+
+           // adaptively select the best yield option
+           if (total_cycles > NO_PROGRESS_CYCLE_LIMIT) { 
+               // Slow yield - will take at least YIELD_TIME_USEC to come back
+               v3_yield(info,YIELD_TIME_USEC);
+           } else {
+               // Fast yield - may come back immediately
+               v3_yield(info,-1);
+           }
+
            cycles = v3_get_host_time(&info->time_state) - t;
+
+           if ((total_cycles + cycles) > total_cycles) { 
+               total_cycles += cycles;
+           }
+
            v3_advance_time(info, &cycles);
 
            v3_update_timers(info);
index c8ce677..0819adf 100644 (file)
@@ -31,7 +31,8 @@
 #define PrintDebug(fmt, args...)
 #endif
 
-#define VNET_YIELD_USEC 1000
+#define VNET_NOPROGRESS_LIMIT 1000
+#define VNET_YIELD_USEC       1000
 
 int net_debug = 0;
 
@@ -345,118 +346,221 @@ static void inline del_routes_by_dev(int dev_id){
            Vnet_Free(route);    
        }
     }
-
+    
     vnet_unlock_irqrestore(vnet_state.lock, flags);
 }
 
 
+// Match classes, must be in order
+#define NUM_MATCH_CLASSES 4
+#define NUM_MATCH_CLASSES_BOUND 3
+#define NONE    0
+#define NOT     1
+#define ANY     2
+#define DIRECT  3
 
 
-/* At the end allocate a route_list
- * This list will be inserted into the cache so we don't need to free it
- */
-static struct route_list * match_route(const struct v3_vnet_pkt * pkt) {
+static inline uint8_t match_mac(uint8_t test_mac[ETH_ALEN], 
+                               uint8_t route_mac[ETH_ALEN], 
+                               uint8_t route_qual)
+{
+    switch (route_qual) { 
+       case MAC_NOSET:
+           return NONE;
+           break;
+       case MAC_NONE:
+           return NONE;
+           break;
+       case MAC_ANY:
+           return ANY;
+           break;
+       case MAC_NOT:
+           if (memcmp(test_mac,route_mac,ETH_ALEN)) { 
+               return NOT;
+           } else {
+               return NONE;
+           }
+           break;
+       case MAC_ADDR:
+           if (memcmp(test_mac,route_mac,ETH_ALEN)) { 
+               return NONE;
+           } else {
+               return DIRECT;
+           }
+           break;
+       default:
+           PrintError("Unknown qualifier %u\n",route_qual);
+           return NONE;
+           break;
+    }
+
+}
+
+#define QUAL_TO_STR(q)  (       \
+(q)==MAC_NOSET ? "MAC_NOSET" :  \
+(q)==MAC_NONE ? "MAC_NONE" :    \
+(q)==MAC_ANY ? "MAC_ANY" :      \
+(q)==MAC_NOT ? "MAC_NOT" :      \
+(q)==MAC_ADDR ? "MAC_ADDR" :    \
+"***UNDEFINED****"              \
+    )                           \
+
+#define MATCH_CLASS_TO_STR(c)  (       \
+(c)==NONE ? "NONE" :  \
+(c)==NOT ? "NOT" :    \
+(c)==ANY ? "ANY" :      \
+(c)==DIRECT ? "DIRECT" :      \
+"***UNDEFINED****"              \
+    )                           \
+
+
+
+/*
+
+Original priority behavior... 
+  
+priority   src  srcqual   dst  dstqual
+3              ANY            ANY
+4        X                    NONE
+5              ANY     X      NOT
+5        X     NOT            ANY
+6        X     ~NOT           ANY
+6              ANY     X      ~NOT
+7        X     ~NOT    X      NOT
+7        X     NOT     X      ~NOT
+8        X     ~NOT    X      ~NOT
+8        X     ~NOT    X      ~NOT
+
+*/
+
+/*
+  Current priority order is given in the following table
+*/
+
+// [src][dst] => priority
+static int priority_map[NUM_MATCH_CLASSES][NUM_MATCH_CLASSES] = 
+{
+    [NONE] = { [ 0 ... NUM_MATCH_CLASSES_BOUND ] = -1},   // ignore if it's not a source match
+    [NOT][NONE]                          = -1,            // ignore it if there is no destination match   
+    [NOT][NOT]                           = 3,                                   
+    [NOT][ANY]                           = 5,
+    [NOT][DIRECT]                        = 7,
+    [ANY][NONE]                          = -1,            // ignore if there is no destination match
+    [ANY][NOT]                           = 5,
+    [ANY][ANY]                           = 6,
+    [ANY][DIRECT]                        = 6,
+    [DIRECT][NONE]                       = -1,            // ignore if there is no destination match
+    [DIRECT][NOT]                        = 7,            
+    [DIRECT][ANY]                        = 8,            
+    [DIRECT][DIRECT]                     = 8,            
+};
+
+
+
+
+static inline int match_priority(uint8_t src_mac[ETH_ALEN],
+                                uint8_t dst_mac[ETH_ALEN],
+                                uint8_t route_src_mac[ETH_ALEN],
+                                uint8_t route_src_qual,
+                                uint8_t route_dst_mac[ETH_ALEN],
+                                uint8_t route_dst_qual)
+
+{
+
+    return priority_map[match_mac(src_mac,route_src_mac,route_src_qual)][match_mac(dst_mac,route_dst_mac,route_dst_qual)];
+}
+
+
+/*
+  Route matching will return the list of the highest priority routes that
+  match.  It's a list because it's possible to have multiple high priority routes
+ */ 
+static struct route_list * match_route(const struct v3_vnet_pkt * pkt) 
+{
+    int i;
     struct vnet_route_info * route = NULL; 
     struct route_list * matches = NULL;
     int num_matches = 0;
-    int max_rank = 0;
+    int max_priority = -1;
     struct list_head match_list;
     struct eth_hdr * hdr = (struct eth_hdr *)(pkt->data);
-    //  uint8_t src_type = pkt->src_type;
-    //  uint32_t src_link = pkt->src_id;
 
+    //
+    //
+    // NOTE: USING THE MATCH_NODE in the route list to record a match list
+    // IS A DISASTER WAITING TO HAPPEN
+    //
+    
 #ifdef V3_CONFIG_DEBUG_VNET
     {
-       char dst_str[100];
-       char src_str[100];
-
+       char dst_str[32], src_str[32];
        mac2str(hdr->src_mac, src_str);  
        mac2str(hdr->dst_mac, dst_str);
        PrintDebug("VNET/P Core: match_route. pkt: SRC(%s), DEST(%s)\n", src_str, dst_str);
     }
 #endif
-
-    INIT_LIST_HEAD(&match_list);
     
-#define UPDATE_MATCHES(rank) do {                              \
-       if (max_rank < (rank)) {                                \
-           max_rank = (rank);                                  \
-           INIT_LIST_HEAD(&match_list);                        \
-                                                               \
-           list_add(&(route->match_node), &match_list);        \
-           num_matches = 1;                                    \
-       } else if (max_rank == (rank)) {                        \
-           list_add(&(route->match_node), &match_list);        \
-           num_matches++;                                      \
-       }                                                       \
-    } while (0)
+    INIT_LIST_HEAD(&match_list);                       
+    
     
-
     list_for_each_entry(route, &(vnet_state.routes), node) {
+       
        struct v3_vnet_route * route_def = &(route->route_def);
+       
+       int priority;
+       
+       priority = match_priority(hdr->src_mac,
+                                 hdr->dst_mac,
+                                 route_def->src_mac,
+                                 route_def->src_mac_qual,
+                                 route_def->dst_mac,
+                                 route_def->dst_mac_qual);
 
-/*
-       // CHECK SOURCE TYPE HERE
-       if ( (route_def->src_type != LINK_ANY) && 
-            ( (route_def->src_type != src_type) || 
-              ( (route_def->src_id != src_link) &&
-                (route_def->src_id != -1)))) {
-           continue;
+       
+
+#ifdef V3_CONFIG_DEBUG_VNET
+       {
+           char dst_str[32];
+           char src_str[32];
+           
+           mac2str(route_def->src_mac, src_str);  
+           mac2str(route_def->dst_mac, dst_str);
+           
+           PrintDebug("Tested match against SRC(%s) SRC_QUAL(%s), DEST(%s) DST_QUAL(%s): "
+                      "SRC_MATCH=%s  DEST_MATCH=%s PRIORITY=%d\n", 
+                      src_str, QUAL_TO_STR(route_def->src_mac_qual), 
+                      dst_str, QUAL_TO_STR(route_def->dst_mac_qual),
+                      MATCH_CLASS_TO_STR(match_mac(hdr->src_mac,route_def->src_mac,route_def->src_mac_qual)),
+                      MATCH_CLASS_TO_STR(match_mac(hdr->dst_mac,route_def->dst_mac,route_def->dst_mac_qual)),
+                  priority);
        }
-*/
+#endif
 
-       if ((route_def->dst_mac_qual == MAC_ANY) &&
-           (route_def->src_mac_qual == MAC_ANY)) {      
-           UPDATE_MATCHES(3);
+       if (priority<0) { 
+           PrintDebug("No match to this rule\n");
+           continue;
        }
-       
-       if (memcmp(route_def->src_mac, hdr->src_mac, 6) == 0) {
-           if (route_def->src_mac_qual != MAC_NOT) {
-               if (route_def->dst_mac_qual == MAC_ANY) {
-                   UPDATE_MATCHES(6);
-               } else if (route_def->dst_mac_qual != MAC_NOT &&
-                          memcmp(route_def->dst_mac, hdr->dst_mac, 6) == 0) {
-                   UPDATE_MATCHES(8);
-               }
+
+       if (priority > max_priority) { 
+            PrintDebug("New highest priority match, reseting list\n");
+           max_priority = priority;
+
+           struct vnet_route_info *my_route, *tmp_route;
+
+           list_for_each_entry_safe(my_route, tmp_route, &match_list,match_node) {
+               list_del(&(my_route->match_node));
            }
-       }
+
+           list_add(&(route->match_node), &match_list);        
+           num_matches = 1;                                    
            
-       if (memcmp(route_def->dst_mac, hdr->dst_mac, 6) == 0) {
-           if (route_def->dst_mac_qual != MAC_NOT) {
-               if (route_def->src_mac_qual == MAC_ANY) {
-                   UPDATE_MATCHES(6);
-               } else if ((route_def->src_mac_qual != MAC_NOT) && 
-                          (memcmp(route_def->src_mac, hdr->src_mac, 6) == 0)) {
-                   UPDATE_MATCHES(8);
-               }
-           }
-       }
+       } else if (priority == max_priority) {                      
+            PrintDebug("Equal priority match, adding to list\n");
            
-       if ((route_def->dst_mac_qual == MAC_NOT) &&
-           (memcmp(route_def->dst_mac, hdr->dst_mac, 6) != 0)) {
-           if (route_def->src_mac_qual == MAC_ANY) {
-               UPDATE_MATCHES(5);
-           } else if ((route_def->src_mac_qual != MAC_NOT) && 
-                      (memcmp(route_def->src_mac, hdr->src_mac, 6) == 0)) {     
-               UPDATE_MATCHES(7);
-           }
-       }
+           list_add(&(route->match_node), &match_list);        
+           num_matches++;                                      
+       }                                                       
        
-       if ((route_def->src_mac_qual == MAC_NOT) &&
-           (memcmp(route_def->src_mac, hdr->src_mac, 6) != 0)) {
-           if (route_def->dst_mac_qual == MAC_ANY) {
-               UPDATE_MATCHES(5);
-           } else if ((route_def->dst_mac_qual != MAC_NOT) &&
-                      (memcmp(route_def->dst_mac, hdr->dst_mac, 6) == 0)) {
-               UPDATE_MATCHES(7);
-           }
-       }
-       
-       // Default route
-       if ( (memcmp(route_def->src_mac, hdr->src_mac, 6) == 0) &&
-            (route_def->dst_mac_qual == MAC_NONE)) {
-           UPDATE_MATCHES(4);
-       }
     }
 
     PrintDebug("VNET/P Core: match_route: Matches=%d\n", num_matches);
@@ -464,7 +568,7 @@ static struct route_list * match_route(const struct v3_vnet_pkt * pkt) {
     if (num_matches <= 0) {
        return NULL;
     }
-
+    
     matches = (struct route_list *)Vnet_Malloc(sizeof(struct route_list) + 
                                               (sizeof(struct vnet_route_info *) * num_matches));
 
@@ -476,24 +580,30 @@ static struct route_list * match_route(const struct v3_vnet_pkt * pkt) {
 
     matches->num_routes = num_matches;
 
-    {
-       int i = 0;
-       list_for_each_entry(route, &match_list, match_node) {
+    i=0;
+    list_for_each_entry(route, &match_list, match_node) {
+       if (i==num_matches) { 
+           // the list should never have more than num_matches on it...
+           PrintError("Weird list behavior\n");
+           break;
+       } else {
            matches->routes[i++] = route;
        }
+       
     }
 
     return matches;
 }
 
-int v3_vnet_query_header(uint8_t src_mac[6], 
-                        uint8_t dest_mac[6],
+int v3_vnet_query_header(uint8_t src_mac[ETH_ALEN], 
+                        uint8_t dest_mac[ETH_ALEN],
                         int     recv,         // 0 = send, 1=recv
                         struct v3_vnet_header *header)
 {
     struct route_list *routes;
     struct vnet_route_info *r;
     struct v3_vnet_pkt p;
+    void *flags;
 
     p.size=14;
     p.data=p.header;
@@ -507,12 +617,15 @@ int v3_vnet_query_header(uint8_t src_mac[6],
     memcpy(header->src_mac,src_mac,6);
     memcpy(header->dst_mac,dest_mac,6);
 
+
+    flags = vnet_lock_irqsave(vnet_state.lock);
     
     look_into_cache(&p,&routes);
 
     if (!routes) { 
        routes = match_route(&p);
        if (!routes) { 
+           vnet_unlock_irqrestore(vnet_state.lock,flags);
            PrintError("Cannot match route\n");
            header->header_type=VNET_HEADER_NOMATCH;
            header->header_len=0;
@@ -521,6 +634,8 @@ int v3_vnet_query_header(uint8_t src_mac[6],
            add_route_to_cache(&p,routes);
        }
     }
+
+    vnet_unlock_irqrestore(vnet_state.lock,flags);
     
     if (routes->num_routes<1) { 
        PrintError("Less than one route\n");
@@ -828,6 +943,7 @@ static int vnet_tx_flush(void * args){
     struct vnet_dev * dev = NULL;
     int more;
     int rc;
+    uint64_t noprogress_count;
 
     Vnet_Print(0, "VNET/P Polling Thread Starting ....\n");
 
@@ -845,6 +961,7 @@ static int vnet_tx_flush(void * args){
        return -1;
     }
 
+    noprogress_count=0;
 
     while (!vnet_thread_should_stop()) {
 
@@ -871,13 +988,19 @@ static int vnet_tx_flush(void * args){
            v3_enqueue(vnet_state.poll_devs, (addr_t)dev); 
        }
 
-       // Yield regardless of whether we handled any devices - need
-       // to allow other threads to run
+
        if (more) { 
-           // we have more to do, so we want to get back asap
+           noprogress_count=0;
+       } else {
+           if ( ! ((noprogress_count+1) < noprogress_count)) {
+               noprogress_count++;
+           }
+       }
+
+       // adaptively yield 
+       if (noprogress_count < VNET_NOPROGRESS_LIMIT) { 
            V3_Yield();
        } else {
-           // put ourselves briefly to sleep if we we don't have more
            V3_Yield_Timed(VNET_YIELD_USEC);
        }