2 * Palacios VNET Host Bridge
6 #include <linux/spinlock.h>
7 #include <linux/seq_file.h>
8 #include <linux/proc_fs.h>
9 #include <asm/uaccess.h>
10 #include <linux/inet.h>
11 #include <linux/kthread.h>
13 #include <linux/netdevice.h>
16 #include <linux/net.h>
17 #include <linux/string.h>
18 #include <linux/preempt.h>
19 #include <linux/sched.h>
22 #include <linux/net.h>
23 #include <linux/socket.h>
26 #include <vnet/vnet.h>
27 #include <vnet/vnet_hashtable.h>
28 #include "palacios-vnet.h"
33 #define VNET_SERVER_PORT 9000
35 #define VNET_ADAPTIVE_BRIDGE 1 // set this to one to have bridge go to sleep if there nothing to do...
36 #define VNET_NOPROGRESS_LIMIT 1000 // ... after this many iterations
37 #define VNET_YIELD_TIME_USEC 1000 // ... and go to sleep for this long
44 struct sockaddr_in sock_addr;
45 vnet_brg_proto_t sock_proto;
47 struct nic_statistics stats;
51 struct list_head node;
55 struct vnet_brg_state {
60 struct list_head link_list;
61 struct hashtable *ip2link;
65 struct socket * serv_sock;
66 struct sockaddr_in serv_addr;
67 vnet_brg_proto_t serv_proto;
69 struct task_struct * serv_thread;
71 void * brg_data; /* private data from vnet_core */
73 struct vnet_brg_stats stats;
77 static struct vnet_brg_state vnet_brg_s;
80 int vnet_brg_stats(struct vnet_brg_stats * stats){
81 memcpy(stats, &(vnet_brg_s.stats), sizeof(*stats));
86 static inline struct vnet_link * _link_by_ip(uint32_t ip) {
87 return (struct vnet_link *)vnet_htable_search(vnet_brg_s.ip2link, (addr_t)&ip);
90 static inline struct vnet_link * _link_by_idx(int idx) {
91 struct vnet_link * link = NULL;
93 list_for_each_entry(link, &(vnet_brg_s.link_list), node) {
95 if (link->idx == idx) {
103 static void _delete_link(struct vnet_link * link){
104 unsigned long flags = 0;
106 link->sock->ops->release(link->sock);
108 palacios_spinlock_lock_irqsave(&(vnet_brg_s.lock), flags);
109 list_del(&(link->node));
110 vnet_htable_remove(vnet_brg_s.ip2link, (addr_t)&(link->dst_ip), 0);
111 vnet_brg_s.num_links --;
112 palacios_spinlock_unlock_irqrestore(&(vnet_brg_s.lock), flags);
114 INFO("VNET Bridge: Link deleted, ip 0x%x, port: %d, idx: %d\n",
123 void vnet_brg_delete_link(uint32_t idx){
124 struct vnet_link * link = _link_by_idx(idx);
131 static void deinit_links_list(void){
132 struct vnet_link * link = NULL, * tmp_link = NULL;
134 list_for_each_entry_safe(link, tmp_link, &(vnet_brg_s.link_list), node) {
139 static uint32_t _create_link(struct vnet_link * link) {
144 switch(link->sock_proto){
146 protocol = IPPROTO_UDP;
149 protocol = IPPROTO_TCP;
153 WARNING("Unsupported VNET Server Protocol\n");
157 if ((err = sock_create(AF_INET, SOCK_DGRAM, protocol, &link->sock)) < 0) {
158 WARNING("Could not create socket for VNET Link, error %d\n", err);
162 if (link->sock_proto == UDP) {
163 // no UDP checksumming
164 lock_sock(link->sock->sk);
165 link->sock->sk->sk_no_check = 1;
166 release_sock(link->sock->sk);
169 memset(&link->sock_addr, 0, sizeof(struct sockaddr));
171 link->sock_addr.sin_family = AF_INET;
172 link->sock_addr.sin_addr.s_addr = link->dst_ip;
173 link->sock_addr.sin_port = htons(link->dst_port);
176 if ((err = link->sock->ops->connect(link->sock, (struct sockaddr *)&(link->sock_addr), sizeof(struct sockaddr), 0)) < 0) {
177 WARNING("Could not connect to remote VNET Server, error %d\n", err);
182 palacios_spinlock_lock_irqsave(&(vnet_brg_s.lock), flags);
183 list_add(&(link->node), &(vnet_brg_s.link_list));
184 vnet_brg_s.num_links ++;
185 link->idx = ++ vnet_brg_s.link_idx;
186 vnet_htable_insert(vnet_brg_s.ip2link, (addr_t)&(link->dst_ip), (addr_t)link);
187 palacios_spinlock_unlock_irqrestore(&(vnet_brg_s.lock), flags);
189 INFO("VNET Bridge: Link created, ip 0x%x, port: %d, idx: %d, link: %p, protocol: %s\n",
194 ((link->sock_proto==UDP)?"UDP":"TCP"));
200 uint32_t vnet_brg_add_link(uint32_t ip, uint16_t port, vnet_brg_proto_t proto){
201 struct vnet_link * new_link = NULL;
204 new_link = palacios_alloc(sizeof(struct vnet_link));
208 memset(new_link, 0, sizeof(struct vnet_link));
210 new_link->dst_ip = ip;
211 new_link->dst_port = port;
212 new_link->sock_proto = proto;
214 idx = _create_link(new_link);
216 WARNING("Could not create link\n");
217 palacios_free(new_link);
225 int vnet_brg_link_stats(uint32_t link_idx, struct nic_statistics * stats){
226 struct vnet_link * link;
228 link = _link_by_idx(link_idx);
233 memcpy(stats, &(link->stats), sizeof(*stats));
240 _udp_send(struct socket * sock,
241 struct sockaddr_in * addr,
242 unsigned char * buf, int len) {
249 if (sock->sk == NULL) {
256 msg.msg_flags = MSG_NOSIGNAL;
258 msg.msg_namelen = sizeof(struct sockaddr_in);
259 msg.msg_control = NULL;
260 msg.msg_controllen = 0;
263 msg.msg_control = NULL;
267 size = sock_sendmsg(sock, &msg, len);
276 _udp_recv(struct socket * sock,
277 struct sockaddr_in * addr,
278 unsigned char * buf, int len, int nonblocking) {
284 if (sock->sk == NULL) {
291 msg.msg_flags = MSG_NOSIGNAL | (nonblocking ? MSG_DONTWAIT : 0);
293 msg.msg_namelen = sizeof(struct sockaddr_in);
294 msg.msg_control = NULL;
295 msg.msg_controllen = 0;
298 msg.msg_control = NULL;
302 size = sock_recvmsg(sock, &msg, len, msg.msg_flags);
309 /* send packets to VNET core */
311 send_to_palacios(unsigned char * buf,
314 struct v3_vnet_pkt pkt;
315 memset(&pkt,0,sizeof(struct v3_vnet_pkt));
317 pkt.dst_type = LINK_NOSET;
318 pkt.src_type = LINK_EDGE;
319 pkt.src_id = link_id;
320 memcpy(pkt.header, buf, ETHERNET_HEADER_LEN);
324 DEBUG("VNET Lnx Bridge: send pkt to VNET core (size: %d, src_id: %d, src_type: %d)\n",
325 pkt.size, pkt.src_id, pkt.src_type);
327 print_hex_dump(NULL, "pkt_data: ", 0, 20, 20, pkt.data, pkt.size, 0);
331 vnet_brg_s.stats.pkt_to_vmm ++;
333 return v3_vnet_send_pkt(&pkt, NULL);
337 /* send packet to extern network */
339 bridge_send_pkt(struct v3_vm_info * vm,
340 struct v3_vnet_pkt * pkt,
341 void * private_data) {
342 struct vnet_link * link = NULL;
345 DEBUG("VNET Lnx Host Bridge: packet received from VNET Core ... pkt size: %d, link: %d\n",
349 print_hex_dump(NULL, "pkt_data: ", 0, 20, 20, pkt->data, pkt->size, 0);
353 vnet_brg_s.stats.pkt_from_vmm ++;
355 link = _link_by_idx(pkt->dst_id);
357 switch(link->sock_proto){
359 _udp_send(link->sock, &(link->sock_addr), pkt->data, pkt->size);
360 vnet_brg_s.stats.pkt_to_phy ++;
363 vnet_brg_s.stats.pkt_to_phy ++;
367 WARNING("VNET Server: Invalid Link Protocol\n");
368 vnet_brg_s.stats.pkt_drop_vmm ++;
370 link->stats.tx_bytes += pkt->size;
371 link->stats.tx_pkts ++;
373 INFO("VNET Bridge Linux Host: wrong dst link, idx: %d, discarding the packet\n", pkt->dst_id);
374 vnet_brg_s.stats.pkt_drop_vmm ++;
381 static int init_vnet_serv(void) {
385 switch(vnet_brg_s.serv_proto){
387 protocol = IPPROTO_UDP;
390 protocol = IPPROTO_TCP;
394 WARNING("Unsupported VNET Server Protocol\n");
398 if ((err = sock_create(AF_INET, SOCK_DGRAM, protocol, &vnet_brg_s.serv_sock)) < 0) {
399 WARNING("Could not create VNET server socket, error: %d\n", err);
403 if (vnet_brg_s.serv_proto == UDP) {
404 // No UDP checksumming is done
405 lock_sock(vnet_brg_s.serv_sock->sk);
406 vnet_brg_s.serv_sock->sk->sk_no_check = 1;
407 release_sock(vnet_brg_s.serv_sock->sk);
410 memset(&vnet_brg_s.serv_addr, 0, sizeof(struct sockaddr));
412 vnet_brg_s.serv_addr.sin_family = AF_INET;
413 vnet_brg_s.serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
414 vnet_brg_s.serv_addr.sin_port = htons(VNET_SERVER_PORT);
416 if ((err = vnet_brg_s.serv_sock->ops->bind(vnet_brg_s.serv_sock, (struct sockaddr *)&(vnet_brg_s.serv_addr), sizeof(struct sockaddr))) < 0) {
417 WARNING("Could not bind VNET server socket to port %d, error: %d\n", VNET_SERVER_PORT, err);
421 INFO("VNET server bind to port: %d\n", VNET_SERVER_PORT);
423 if(vnet_brg_s.serv_proto == TCP){
424 if((err = vnet_brg_s.serv_sock->ops->listen(vnet_brg_s.serv_sock, 32)) < 0){
425 WARNING("VNET Server error listening on port %d, error %d\n", VNET_SERVER_PORT, err);
435 static int _udp_server(void * arg) {
437 struct sockaddr_in pkt_addr;
438 struct vnet_link * link = NULL;
440 uint64_t noprogress_count;
442 INFO("Palacios VNET Bridge: UDP receiving server ..... \n");
444 pkt = palacios_alloc(MAX_PACKET_LEN);
447 ERROR("Unable to allocate packet in VNET UDP Server\n");
454 while (!kthread_should_stop()) {
456 // This is a NONBLOCKING receive
457 // If we block here, we will never detect that this thread
458 // is being signaled to stop, plus we might go uninterrupted on this core
459 // blocking out access to other threads - leave this NONBLOCKING
460 // unless you know what you are doing
461 len = _udp_recv(vnet_brg_s.serv_sock, &pkt_addr, pkt, MAX_PACKET_LEN, 1);
464 // If it would have blocked, we have no packet, and so
465 // we will give other threads on this core a chance
466 if (len==-EAGAIN || len==-EWOULDBLOCK || len==-EINTR) {
468 // avoid rollover in the counter out of paranoia
469 if (! ((noprogress_count + 1) < noprogress_count)) {
473 // adaptively select yielding strategy depending on
474 // whether we are making progress
475 if ((!VNET_ADAPTIVE_BRIDGE) || (noprogress_count < VNET_NOPROGRESS_LIMIT)) {
476 // Likely making progress, do fast yield so we
477 // come back immediately if there is no other action
478 palacios_yield_cpu();
480 // Likely not making progress, do potentially slow
481 // yield - we won't come back for until VNET_YIELD_TIME_USEC has passed
482 palacios_sleep_cpu(VNET_YIELD_TIME_USEC);
489 // Something interesting has happened, therefore progress!
494 WARNING("Receive error: Could not get packet, error %d\n", len);
498 link = _link_by_ip(pkt_addr.sin_addr.s_addr);
501 WARNING("VNET Server: No VNET Link matches the src IP\n");
502 vnet_brg_s.stats.pkt_drop_phy ++;
506 vnet_brg_s.stats.pkt_from_phy ++;
507 link->stats.rx_bytes += len;
508 link->stats.rx_pkts ++;
510 send_to_palacios(pkt, len, link->idx);
513 INFO("VNET Server: UDP thread exiting\n");
521 static int _rx_server(void * arg) {
523 if(vnet_brg_s.serv_proto == UDP){
525 }else if(vnet_brg_s.serv_proto == TCP) {
526 //accept new connection
527 //use select to receive pkt from physical network
528 //or create new kthread to handle each connection?
529 WARNING("VNET Server: TCP is not currently supported\n");
532 WARNING ("VNET Server: Unsupported Protocol\n");
539 static inline unsigned int hash_fn(addr_t hdr_ptr) {
540 return vnet_hash_buffer((uint8_t *)hdr_ptr, sizeof(uint32_t));
543 static inline int hash_eq(addr_t key1, addr_t key2) {
544 return (memcmp((uint8_t *)key1, (uint8_t *)key2, sizeof(uint32_t)) == 0);
548 int vnet_bridge_init(void) {
549 struct v3_vnet_bridge_ops bridge_ops;
551 if(vnet_brg_s.status != 0) {
554 vnet_brg_s.status = 1;
556 memset(&vnet_brg_s, 0, sizeof(struct vnet_brg_state));
558 INIT_LIST_HEAD(&(vnet_brg_s.link_list));
559 palacios_spinlock_init(&(vnet_brg_s.lock));
561 vnet_brg_s.serv_proto = UDP;
563 vnet_brg_s.ip2link = vnet_create_htable(10, hash_fn, hash_eq);
564 if(vnet_brg_s.ip2link == NULL){
565 WARNING("Failure to initiate VNET link hashtable\n");
569 if(init_vnet_serv() < 0){
570 WARNING("Failure to initiate VNET server\n");
574 vnet_brg_s.serv_thread = kthread_run(_rx_server, NULL, "vnet_brgd");
576 bridge_ops.input = bridge_send_pkt;
577 bridge_ops.poll = NULL;
579 if( v3_vnet_add_bridge(NULL, &bridge_ops, HOST_LNX_BRIDGE, NULL) < 0){
580 WARNING("VNET LNX Bridge: Fails to register bridge to VNET core");
583 INFO("VNET Linux Bridge initiated\n");
589 void vnet_bridge_deinit(void){
591 INFO("VNET LNX Bridge Deinit Started\n");
593 v3_vnet_del_bridge(HOST_LNX_BRIDGE);
595 //DEBUG("Stopping bridge service thread\n");
597 kthread_stop(vnet_brg_s.serv_thread);
599 //DEBUG("Releasing bridee service socket\n");
601 vnet_brg_s.serv_sock->ops->release(vnet_brg_s.serv_sock);
603 //DEBUG("Deiniting bridge links\n");
607 //DEBUG("Freeing bridge hash tables\n");
609 vnet_free_htable(vnet_brg_s.ip2link, 0, 0);
611 vnet_brg_s.status = 0;
613 palacios_spinlock_deinit(&(vnet_brg_s.lock));
615 INFO("VNET LNX Bridge Deinit Finished\n");