2 * Palacios VNET Host Bridge
6 #include <linux/spinlock.h>
7 #include <linux/seq_file.h>
8 #include <linux/proc_fs.h>
9 #include <asm/uaccess.h>
10 #include <linux/inet.h>
11 #include <linux/kthread.h>
13 #include <linux/netdevice.h>
16 #include <linux/net.h>
17 #include <linux/string.h>
18 #include <linux/preempt.h>
19 #include <linux/sched.h>
22 #include <linux/net.h>
23 #include <linux/socket.h>
26 #include <vnet/vnet.h>
27 #include <vnet/vnet_hashtable.h>
28 #include "palacios-vnet.h"
33 #define VNET_SERVER_PORT 9000
35 #define VNET_ADAPTIVE_BRIDGE 1 // set this to one to have bridge go to sleep if there nothing to do...
36 #define VNET_NOPROGRESS_LIMIT 1000 // ... after this many iterations
37 #define VNET_YIELD_TIME_USEC 1000 // ... and go to sleep for this long
44 struct sockaddr_in sock_addr;
45 vnet_brg_proto_t sock_proto;
47 struct nic_statistics stats;
51 struct list_head node;
55 struct vnet_brg_state {
60 struct list_head link_list;
61 struct hashtable *ip2link;
65 struct socket * serv_sock;
66 struct sockaddr_in serv_addr;
67 vnet_brg_proto_t serv_proto;
69 struct task_struct * serv_thread;
71 void * brg_data; /* private data from vnet_core */
73 struct vnet_brg_stats stats;
77 static struct vnet_brg_state vnet_brg_s;
80 int vnet_brg_stats(struct vnet_brg_stats * stats){
81 memcpy(stats, &(vnet_brg_s.stats), sizeof(*stats));
86 static inline struct vnet_link * _link_by_ip(uint32_t ip) {
87 return (struct vnet_link *)vnet_htable_search(vnet_brg_s.ip2link, (addr_t)&ip);
90 static inline struct vnet_link * _link_by_idx(int idx) {
91 struct vnet_link * link = NULL;
93 list_for_each_entry(link, &(vnet_brg_s.link_list), node) {
95 if (link->idx == idx) {
103 static void _delete_link(struct vnet_link * link){
104 unsigned long flags = 0;
106 link->sock->ops->release(link->sock);
108 palacios_spinlock_lock_irqsave(&(vnet_brg_s.lock), flags);
109 list_del(&(link->node));
110 vnet_htable_remove(vnet_brg_s.ip2link, (addr_t)&(link->dst_ip), 0);
111 vnet_brg_s.num_links --;
112 palacios_spinlock_unlock_irqrestore(&(vnet_brg_s.lock), flags);
114 INFO("VNET Bridge: Link deleted, ip 0x%x, port: %d, idx: %d\n",
123 void vnet_brg_delete_link(uint32_t idx){
124 struct vnet_link * link = _link_by_idx(idx);
131 static void deinit_links_list(void){
132 struct vnet_link * link = NULL, * tmp_link = NULL;
134 list_for_each_entry_safe(link, tmp_link, &(vnet_brg_s.link_list), node) {
139 static uint32_t _create_link(struct vnet_link * link) {
144 switch(link->sock_proto){
146 protocol = IPPROTO_UDP;
149 protocol = IPPROTO_TCP;
153 WARNING("Unsupported VNET Server Protocol\n");
157 if ((err = sock_create(AF_INET, SOCK_DGRAM, protocol, &link->sock)) < 0) {
158 WARNING("Could not create socket for VNET Link, error %d\n", err);
162 if (link->sock_proto == UDP) {
163 // no UDP checksumming
164 lock_sock(link->sock->sk);
165 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
166 link->sock->sk->sk_no_check = 1;
168 link->sock->sk->sk_no_check_tx = 1;
169 link->sock->sk->sk_no_check_rx = 1;
171 release_sock(link->sock->sk);
174 memset(&link->sock_addr, 0, sizeof(struct sockaddr));
176 link->sock_addr.sin_family = AF_INET;
177 link->sock_addr.sin_addr.s_addr = link->dst_ip;
178 link->sock_addr.sin_port = htons(link->dst_port);
181 if ((err = link->sock->ops->connect(link->sock, (struct sockaddr *)&(link->sock_addr), sizeof(struct sockaddr), 0)) < 0) {
182 WARNING("Could not connect to remote VNET Server, error %d\n", err);
187 palacios_spinlock_lock_irqsave(&(vnet_brg_s.lock), flags);
188 list_add(&(link->node), &(vnet_brg_s.link_list));
189 vnet_brg_s.num_links ++;
190 link->idx = ++ vnet_brg_s.link_idx;
191 vnet_htable_insert(vnet_brg_s.ip2link, (addr_t)&(link->dst_ip), (addr_t)link);
192 palacios_spinlock_unlock_irqrestore(&(vnet_brg_s.lock), flags);
194 INFO("VNET Bridge: Link created, ip 0x%x, port: %d, idx: %d, link: %p, protocol: %s\n",
199 ((link->sock_proto==UDP)?"UDP":"TCP"));
205 uint32_t vnet_brg_add_link(uint32_t ip, uint16_t port, vnet_brg_proto_t proto){
206 struct vnet_link * new_link = NULL;
209 new_link = palacios_alloc(sizeof(struct vnet_link));
213 memset(new_link, 0, sizeof(struct vnet_link));
215 new_link->dst_ip = ip;
216 new_link->dst_port = port;
217 new_link->sock_proto = proto;
219 idx = _create_link(new_link);
221 WARNING("Could not create link\n");
222 palacios_free(new_link);
230 int vnet_brg_link_stats(uint32_t link_idx, struct nic_statistics * stats){
231 struct vnet_link * link;
233 link = _link_by_idx(link_idx);
238 memcpy(stats, &(link->stats), sizeof(*stats));
245 _udp_send(struct socket * sock,
246 struct sockaddr_in * addr,
247 unsigned char * buf, int len) {
254 if (sock->sk == NULL) {
261 msg.msg_flags = MSG_NOSIGNAL;
263 msg.msg_namelen = sizeof(struct sockaddr_in);
264 msg.msg_control = NULL;
265 msg.msg_controllen = 0;
266 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0)
270 iov_iter_init(&(msg.msg_iter),WRITE,&iov,1,0);
272 msg.msg_control = NULL;
276 size = sock_sendmsg(sock, &msg, len);
285 _udp_recv(struct socket * sock,
286 struct sockaddr_in * addr,
287 unsigned char * buf, int len, int nonblocking) {
293 if (sock->sk == NULL) {
300 msg.msg_flags = MSG_NOSIGNAL | (nonblocking ? MSG_DONTWAIT : 0);
302 msg.msg_namelen = sizeof(struct sockaddr_in);
303 msg.msg_control = NULL;
304 msg.msg_controllen = 0;
305 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0)
309 iov_iter_init(&(msg.msg_iter),READ,&iov,1,0);
311 msg.msg_control = NULL;
315 size = sock_recvmsg(sock, &msg, len, msg.msg_flags);
322 /* send packets to VNET core */
324 send_to_palacios(unsigned char * buf,
327 struct v3_vnet_pkt pkt;
328 memset(&pkt,0,sizeof(struct v3_vnet_pkt));
330 pkt.dst_type = LINK_NOSET;
331 pkt.src_type = LINK_EDGE;
332 pkt.src_id = link_id;
333 memcpy(pkt.header, buf, ETHERNET_HEADER_LEN);
337 DEBUG("VNET Lnx Bridge: send pkt to VNET core (size: %d, src_id: %d, src_type: %d)\n",
338 pkt.size, pkt.src_id, pkt.src_type);
340 print_hex_dump(NULL, "pkt_data: ", 0, 20, 20, pkt.data, pkt.size, 0);
344 vnet_brg_s.stats.pkt_to_vmm ++;
346 return v3_vnet_send_pkt(&pkt, NULL);
350 /* send packet to extern network */
352 bridge_send_pkt(struct v3_vm_info * vm,
353 struct v3_vnet_pkt * pkt,
354 void * private_data) {
355 struct vnet_link * link = NULL;
358 DEBUG("VNET Lnx Host Bridge: packet received from VNET Core ... pkt size: %d, link: %d\n",
362 print_hex_dump(NULL, "pkt_data: ", 0, 20, 20, pkt->data, pkt->size, 0);
366 vnet_brg_s.stats.pkt_from_vmm ++;
368 link = _link_by_idx(pkt->dst_id);
370 switch(link->sock_proto){
372 _udp_send(link->sock, &(link->sock_addr), pkt->data, pkt->size);
373 vnet_brg_s.stats.pkt_to_phy ++;
376 vnet_brg_s.stats.pkt_to_phy ++;
380 WARNING("VNET Server: Invalid Link Protocol\n");
381 vnet_brg_s.stats.pkt_drop_vmm ++;
383 link->stats.tx_bytes += pkt->size;
384 link->stats.tx_pkts ++;
386 INFO("VNET Bridge Linux Host: wrong dst link, idx: %d, discarding the packet\n", pkt->dst_id);
387 vnet_brg_s.stats.pkt_drop_vmm ++;
394 static int init_vnet_serv(void) {
398 switch(vnet_brg_s.serv_proto){
400 protocol = IPPROTO_UDP;
403 protocol = IPPROTO_TCP;
407 WARNING("Unsupported VNET Server Protocol\n");
411 if ((err = sock_create(AF_INET, SOCK_DGRAM, protocol, &vnet_brg_s.serv_sock)) < 0) {
412 WARNING("Could not create VNET server socket, error: %d\n", err);
416 if (vnet_brg_s.serv_proto == UDP) {
417 // No UDP checksumming is done
418 lock_sock(vnet_brg_s.serv_sock->sk);
419 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
420 vnet_brg_s.serv_sock->sk->sk_no_check = 1;
422 vnet_brg_s.serv_sock->sk->sk_no_check_tx = 1;
423 vnet_brg_s.serv_sock->sk->sk_no_check_rx = 1;
425 release_sock(vnet_brg_s.serv_sock->sk);
428 memset(&vnet_brg_s.serv_addr, 0, sizeof(struct sockaddr));
430 vnet_brg_s.serv_addr.sin_family = AF_INET;
431 vnet_brg_s.serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
432 vnet_brg_s.serv_addr.sin_port = htons(VNET_SERVER_PORT);
434 if ((err = vnet_brg_s.serv_sock->ops->bind(vnet_brg_s.serv_sock, (struct sockaddr *)&(vnet_brg_s.serv_addr), sizeof(struct sockaddr))) < 0) {
435 WARNING("Could not bind VNET server socket to port %d, error: %d\n", VNET_SERVER_PORT, err);
439 INFO("VNET server bind to port: %d\n", VNET_SERVER_PORT);
441 if(vnet_brg_s.serv_proto == TCP){
442 if((err = vnet_brg_s.serv_sock->ops->listen(vnet_brg_s.serv_sock, 32)) < 0){
443 WARNING("VNET Server error listening on port %d, error %d\n", VNET_SERVER_PORT, err);
453 static int _udp_server(void * arg) {
455 struct sockaddr_in pkt_addr;
456 struct vnet_link * link = NULL;
458 uint64_t noprogress_count;
460 INFO("Palacios VNET Bridge: UDP receiving server ..... \n");
462 pkt = palacios_alloc(MAX_PACKET_LEN);
465 ERROR("Unable to allocate packet in VNET UDP Server\n");
472 while (!kthread_should_stop()) {
474 // This is a NONBLOCKING receive
475 // If we block here, we will never detect that this thread
476 // is being signaled to stop, plus we might go uninterrupted on this core
477 // blocking out access to other threads - leave this NONBLOCKING
478 // unless you know what you are doing
479 len = _udp_recv(vnet_brg_s.serv_sock, &pkt_addr, pkt, MAX_PACKET_LEN, 1);
482 // If it would have blocked, we have no packet, and so
483 // we will give other threads on this core a chance
484 if (len==-EAGAIN || len==-EWOULDBLOCK || len==-EINTR) {
486 // avoid rollover in the counter out of paranoia
487 if (! ((noprogress_count + 1) < noprogress_count)) {
491 // adaptively select yielding strategy depending on
492 // whether we are making progress
493 if ((!VNET_ADAPTIVE_BRIDGE) || (noprogress_count < VNET_NOPROGRESS_LIMIT)) {
494 // Likely making progress, do fast yield so we
495 // come back immediately if there is no other action
496 palacios_yield_cpu();
498 // Likely not making progress, do potentially slow
499 // yield - we won't come back for until VNET_YIELD_TIME_USEC has passed
500 palacios_sleep_cpu(VNET_YIELD_TIME_USEC);
507 // Something interesting has happened, therefore progress!
512 WARNING("Receive error: Could not get packet, error %d\n", len);
516 link = _link_by_ip(pkt_addr.sin_addr.s_addr);
519 WARNING("VNET Server: No VNET Link matches the src IP\n");
520 vnet_brg_s.stats.pkt_drop_phy ++;
524 vnet_brg_s.stats.pkt_from_phy ++;
525 link->stats.rx_bytes += len;
526 link->stats.rx_pkts ++;
528 send_to_palacios(pkt, len, link->idx);
531 INFO("VNET Server: UDP thread exiting\n");
539 static int _rx_server(void * arg) {
541 if(vnet_brg_s.serv_proto == UDP){
543 }else if(vnet_brg_s.serv_proto == TCP) {
544 //accept new connection
545 //use select to receive pkt from physical network
546 //or create new kthread to handle each connection?
547 WARNING("VNET Server: TCP is not currently supported\n");
550 WARNING ("VNET Server: Unsupported Protocol\n");
557 static inline unsigned int hash_fn(addr_t hdr_ptr) {
558 return vnet_hash_buffer((uint8_t *)hdr_ptr, sizeof(uint32_t));
561 static inline int hash_eq(addr_t key1, addr_t key2) {
562 return (memcmp((uint8_t *)key1, (uint8_t *)key2, sizeof(uint32_t)) == 0);
566 int vnet_bridge_init(void) {
567 struct v3_vnet_bridge_ops bridge_ops;
569 if(vnet_brg_s.status != 0) {
572 vnet_brg_s.status = 1;
574 memset(&vnet_brg_s, 0, sizeof(struct vnet_brg_state));
576 INIT_LIST_HEAD(&(vnet_brg_s.link_list));
577 palacios_spinlock_init(&(vnet_brg_s.lock));
579 vnet_brg_s.serv_proto = UDP;
581 vnet_brg_s.ip2link = vnet_create_htable(10, hash_fn, hash_eq);
582 if(vnet_brg_s.ip2link == NULL){
583 WARNING("Failure to initiate VNET link hashtable\n");
587 if(init_vnet_serv() < 0){
588 WARNING("Failure to initiate VNET server\n");
592 vnet_brg_s.serv_thread = kthread_run(_rx_server, NULL, "vnet_brgd");
594 bridge_ops.input = bridge_send_pkt;
595 bridge_ops.poll = NULL;
597 if( v3_vnet_add_bridge(NULL, &bridge_ops, HOST_LNX_BRIDGE, NULL) < 0){
598 WARNING("VNET LNX Bridge: Fails to register bridge to VNET core");
601 INFO("VNET Linux Bridge initiated\n");
607 void vnet_bridge_deinit(void){
609 INFO("VNET LNX Bridge Deinit Started\n");
611 v3_vnet_del_bridge(HOST_LNX_BRIDGE);
613 //DEBUG("Stopping bridge service thread\n");
615 kthread_stop(vnet_brg_s.serv_thread);
617 //DEBUG("Releasing bridee service socket\n");
619 vnet_brg_s.serv_sock->ops->release(vnet_brg_s.serv_sock);
621 //DEBUG("Deiniting bridge links\n");
625 //DEBUG("Freeing bridge hash tables\n");
627 vnet_free_htable(vnet_brg_s.ip2link, 0, 0);
629 vnet_brg_s.status = 0;
631 palacios_spinlock_deinit(&(vnet_brg_s.lock));
633 INFO("VNET LNX Bridge Deinit Finished\n");