From: Lei Xia Date: Wed, 6 Apr 2011 19:09:04 +0000 (-0500) Subject: Merge VNET Linux backend code to Linux_module directory X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=6198c62287d7a33cfade156f4405c82f0f2bae0a;p=palacios.git Merge VNET Linux backend code to Linux_module directory --- diff --git a/linux_module/Makefile b/linux_module/Makefile index 92cb318..4732a3a 100644 --- a/linux_module/Makefile +++ b/linux_module/Makefile @@ -14,13 +14,15 @@ v3vee-objs:= palacios.o \ palacios-serial.o \ palacios-queue.o \ palacios-ringbuffer.o - -#palacios-socket.o -#palacios-vnet.o palacios-packet.o +# palacios-socket.o \ +# palacios-vnet.o \ +# palacios-packet.o \ +# palacios-hashtable.o v3vee-objs += ../libv3vee.a + obj-m := v3vee.o diff --git a/linux_module/palacios-hashtable.c b/linux_module/palacios-hashtable.c new file mode 100644 index 0000000..6c025fc --- /dev/null +++ b/linux_module/palacios-hashtable.c @@ -0,0 +1,508 @@ +/* + * Palacios Hash Table + * (c) Lei Xia, 2011 + */ + +#include +#include +#include +#include + +#include "palacios-hashtable.h" + + +struct hash_entry { + addr_t key; + addr_t value; + uint_t hash; + struct hash_entry * next; +}; + +struct hashtable { + uint_t table_length; + struct hash_entry ** table; + uint_t entry_count; + uint_t load_limit; + uint_t prime_index; + uint_t (*hash_fn) (addr_t key); + int (*eq_fn) (addr_t key1, addr_t key2); +}; + + +/* HASH FUNCTIONS */ + +static inline uint_t do_hash(struct hashtable * htable, addr_t key) { + /* Aim to protect against poor hash functions by adding logic here + * - logic taken from java 1.4 hashtable source */ + uint_t i = htable->hash_fn(key); + i += ~(i << 9); + i ^= ((i >> 14) | (i << 18)); /* >>> */ + i += (i << 4); + i ^= ((i >> 10) | (i << 22)); /* >>> */ + + return i; +} + + +/* HASH AN UNSIGNED LONG */ +/* LINUX UNSIGHED LONG HASH FUNCTION */ +#ifdef __32BIT__ +/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ +#define GOLDEN_RATIO_PRIME 0x9e370001UL +//#define BITS_PER_LONG 32 +#elif defined(__64BIT__) +/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ +#define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL +//#define BITS_PER_LONG 64 +#else +#error Define GOLDEN_RATIO_PRIME for your wordsize. +#endif + +ulong_t palacios_hash_long(ulong_t val, uint_t bits) { + ulong_t hash = val; + +#ifdef __PALACIOS_64BIT__ + /* Sigh, gcc can't optimise this alone like it does for 32 bits. */ + ulong_t n = hash; + n <<= 18; + hash -= n; + n <<= 33; + hash -= n; + n <<= 3; + hash += n; + n <<= 3; + hash -= n; + n <<= 4; + hash += n; + n <<= 2; + hash += n; +#else + /* On some cpus multiply is faster, on others gcc will do shifts */ + hash *= GOLDEN_RATIO_PRIME; +#endif + + /* High bits are more random, so use them. */ + return hash >> (BITS_PER_LONG - bits); +} + +/* HASH GENERIC MEMORY BUFFER */ +/* ELF HEADER HASH FUNCTION */ +ulong_t palacios_hash_buffer(uchar_t * msg, uint_t length) { + ulong_t hash = 0; + ulong_t temp = 0; + uint_t i; + + for (i = 0; i < length; i++) { + hash = (hash << 4) + *(msg + i) + i; + if ((temp = (hash & 0xF0000000))) { + hash ^= (temp >> 24); + } + hash &= ~temp; + } + return hash; +} + +/* indexFor */ +static inline uint_t indexFor(uint_t table_length, uint_t hash_value) { + return (hash_value % table_length); +}; + +#define freekey(X) kfree(X) + + +static void * tmp_realloc(void * old_ptr, uint_t old_size, uint_t new_size) { + void * new_buf = kmalloc(new_size, GFP_KERNEL); + + if (new_buf == NULL) { + return NULL; + } + + memcpy(new_buf, old_ptr, old_size); + kfree(old_ptr); + + return new_buf; +} + + +/* + Credit for primes table: Aaron Krowne + http://br.endernet.org/~akrowne/ + http://planetmath.org/encyclopedia/GoodHashTablePrimes.html +*/ +static const uint_t primes[] = { + 53, 97, 193, 389, + 769, 1543, 3079, 6151, + 12289, 24593, 49157, 98317, + 196613, 393241, 786433, 1572869, + 3145739, 6291469, 12582917, 25165843, + 50331653, 100663319, 201326611, 402653189, + 805306457, 1610612741 }; + + +// this assumes that the max load factor is .65 +static const uint_t load_factors[] = { + 35, 64, 126, 253, + 500, 1003, 2002, 3999, + 7988, 15986, 31953, 63907, + 127799, 255607, 511182, 1022365, + 2044731, 4089455, 8178897, 16357798, + 32715575, 65431158, 130862298, 261724573, + 523449198, 1046898282 }; + +const uint_t prime_table_len = sizeof(primes) / sizeof(primes[0]); + +struct hashtable * palacios_create_htable(uint_t min_size, + uint_t (*hash_fn) (addr_t), + int (*eq_fn) (addr_t, addr_t)) { + struct hashtable * htable; + uint_t prime_index; + uint_t size = primes[0]; + + /* Check requested hashtable isn't too large */ + if (min_size > (1u << 30)) { + return NULL; + } + + /* Enforce size as prime */ + for (prime_index = 0; prime_index < prime_table_len; prime_index++) { + if (primes[prime_index] > min_size) { + size = primes[prime_index]; + break; + } + } + + htable = (struct hashtable *)kmalloc(sizeof(struct hashtable), GFP_KERNEL); + + if (htable == NULL) { + return NULL; /*oom*/ + } + + htable->table = (struct hash_entry **)kmalloc(sizeof(struct hash_entry*) * size, GFP_KERNEL); + + if (htable->table == NULL) { + kfree(htable); + return NULL; /*oom*/ + } + + memset(htable->table, 0, size * sizeof(struct hash_entry *)); + + htable->table_length = size; + htable->prime_index = prime_index; + htable->entry_count = 0; + htable->hash_fn = hash_fn; + htable->eq_fn = eq_fn; + htable->load_limit = load_factors[prime_index]; + + return htable; +} + + +static int hashtable_expand(struct hashtable * htable) { + /* Double the size of the table to accomodate more entries */ + struct hash_entry ** new_table; + struct hash_entry * tmp_entry; + struct hash_entry ** entry_ptr; + uint_t new_size; + uint_t i; + uint_t index; + + /* Check we're not hitting max capacity */ + if (htable->prime_index == (prime_table_len - 1)) { + return 0; + } + + new_size = primes[++(htable->prime_index)]; + + new_table = (struct hash_entry **)kmalloc(sizeof(struct hash_entry*) * new_size, GFP_KERNEL); + + if (new_table != NULL) { + memset(new_table, 0, new_size * sizeof(struct hash_entry *)); + /* This algorithm is not 'stable'. ie. it reverses the list + * when it transfers entries between the tables */ + + for (i = 0; i < htable->table_length; i++) { + + while ((tmp_entry = htable->table[i]) != NULL) { + htable->table[i] = tmp_entry->next; + + index = indexFor(new_size, tmp_entry->hash); + + tmp_entry->next = new_table[index]; + + new_table[index] = tmp_entry; + } + } + + kfree(htable->table); + + htable->table = new_table; + } else { + /* Plan B: realloc instead */ + + //new_table = (struct hash_entry **)realloc(htable->table, new_size * sizeof(struct hash_entry *)); + new_table = (struct hash_entry **)tmp_realloc(htable->table, primes[htable->prime_index - 1], + new_size * sizeof(struct hash_entry *)); + + if (new_table == NULL) { + (htable->prime_index)--; + return 0; + } + + htable->table = new_table; + + memset(new_table[htable->table_length], 0, new_size - htable->table_length); + + for (i = 0; i < htable->table_length; i++) { + + for (entry_ptr = &(new_table[i]), tmp_entry = *entry_ptr; + tmp_entry != NULL; + tmp_entry = *entry_ptr) { + + index = indexFor(new_size, tmp_entry->hash); + + if (i == index) { + entry_ptr = &(tmp_entry->next); + } else { + *entry_ptr = tmp_entry->next; + tmp_entry->next = new_table[index]; + new_table[index] = tmp_entry; + } + } + } + } + + htable->table_length = new_size; + + htable->load_limit = load_factors[htable->prime_index]; + + return -1; +} + +uint_t palacios_htable_count(struct hashtable * htable) { + return htable->entry_count; +} + +int palacios_htable_insert(struct hashtable * htable, addr_t key, addr_t value) { + /* This method allows duplicate keys - but they shouldn't be used */ + uint_t index; + struct hash_entry * new_entry; + + if (++(htable->entry_count) > htable->load_limit) { + /* Ignore the return value. If expand fails, we should + * still try cramming just this value into the existing table + * -- we may not have memory for a larger table, but one more + * element may be ok. Next time we insert, we'll try expanding again.*/ + hashtable_expand(htable); + } + + new_entry = (struct hash_entry *)kmalloc(sizeof(struct hash_entry), GFP_KERNEL); + + if (new_entry == NULL) { + (htable->entry_count)--; + return 0; /*oom*/ + } + + new_entry->hash = do_hash(htable, key); + + index = indexFor(htable->table_length, new_entry->hash); + + new_entry->key = key; + new_entry->value = value; + + new_entry->next = htable->table[index]; + + htable->table[index] = new_entry; + + return -1; +} + + +int palacios_htable_change(struct hashtable * htable, addr_t key, addr_t value, int free_value) { + struct hash_entry * tmp_entry; + uint_t hash_value; + uint_t index; + + hash_value = do_hash(htable, key); + + index = indexFor(htable->table_length, hash_value); + + tmp_entry = htable->table[index]; + + while (tmp_entry != NULL) { + /* Check hash value to short circuit heavier comparison */ + if ((hash_value == tmp_entry->hash) && (htable->eq_fn(key, tmp_entry->key))) { + + if (free_value) { + kfree((void *)(tmp_entry->value)); + } + + tmp_entry->value = value; + return -1; + } + tmp_entry = tmp_entry->next; + } + return 0; +} + + + +int palacios_htable_inc(struct hashtable * htable, addr_t key, addr_t value) { + struct hash_entry * tmp_entry; + uint_t hash_value; + uint_t index; + + hash_value = do_hash(htable, key); + + index = indexFor(htable->table_length, hash_value); + + tmp_entry = htable->table[index]; + + while (tmp_entry != NULL) { + /* Check hash value to short circuit heavier comparison */ + if ((hash_value == tmp_entry->hash) && (htable->eq_fn(key, tmp_entry->key))) { + + tmp_entry->value += value; + return -1; + } + tmp_entry = tmp_entry->next; + } + return 0; +} + + +int palacios_htable_dec(struct hashtable * htable, addr_t key, addr_t value) { + struct hash_entry * tmp_entry; + uint_t hash_value; + uint_t index; + + hash_value = do_hash(htable, key); + + index = indexFor(htable->table_length, hash_value); + + tmp_entry = htable->table[index]; + + while (tmp_entry != NULL) { + /* Check hash value to short circuit heavier comparison */ + if ((hash_value == tmp_entry->hash) && (htable->eq_fn(key, tmp_entry->key))) { + + tmp_entry->value -= value; + return -1; + } + tmp_entry = tmp_entry->next; + } + return 0; +} + + +/* returns value associated with key */ +addr_t palacios_htable_search(struct hashtable * htable, addr_t key) { + struct hash_entry * cursor; + uint_t hash_value; + uint_t index; + + hash_value = do_hash(htable, key); + + index = indexFor(htable->table_length, hash_value); + + cursor = htable->table[index]; + + while (cursor != NULL) { + /* Check hash value to short circuit heavier comparison */ + if ((hash_value == cursor->hash) && + (htable->eq_fn(key, cursor->key))) { + return cursor->value; + } + + cursor = cursor->next; + } + + return (addr_t)NULL; +} + + +/* returns value associated with key */ +addr_t palacios_htable_remove(struct hashtable * htable, addr_t key, int free_key) { + /* TODO: consider compacting the table when the load factor drops enough, + * or provide a 'compact' method. */ + + struct hash_entry * cursor; + struct hash_entry ** entry_ptr; + addr_t value; + uint_t hash_value; + uint_t index; + + hash_value = do_hash(htable, key); + + index = indexFor(htable->table_length, hash_value); + + entry_ptr = &(htable->table[index]); + cursor = *entry_ptr; + + while (cursor != NULL) { + /* Check hash value to short circuit heavier comparison */ + if ((hash_value == cursor->hash) && + (htable->eq_fn(key, cursor->key))) { + + *entry_ptr = cursor->next; + htable->entry_count--; + value = cursor->value; + + if (free_key) { + freekey((void *)(cursor->key)); + } + kfree(cursor); + + return value; + } + + entry_ptr = &(cursor->next); + cursor = cursor->next; + } + return (addr_t)NULL; +} + + +/* destroy */ +void palacios_free_htable(struct hashtable * htable, int free_values, int free_keys) { + uint_t i; + struct hash_entry * cursor; + struct hash_entry * tmp; + struct hash_entry **table = htable->table; + + if (free_values) { + for (i = 0; i < htable->table_length; i++) { + cursor = table[i]; + + while (cursor != NULL) { + tmp = cursor; + cursor = cursor->next; + + if (free_keys) { + freekey((void *)(tmp->key)); + } + kfree((void *)(tmp->value)); + kfree(tmp); + } + } + } else { + for (i = 0; i < htable->table_length; i++) { + cursor = table[i]; + + while (cursor != NULL) { + struct hash_entry * tmp; + + tmp = cursor; + cursor = cursor->next; + + if (free_keys) { + freekey((void *)(tmp->key)); + } + kfree(tmp); + } + } + } + + kfree(htable->table); + kfree(htable); +} + diff --git a/linux_module/palacios-hashtable.h b/linux_module/palacios-hashtable.h new file mode 100644 index 0000000..3d46202 --- /dev/null +++ b/linux_module/palacios-hashtable.h @@ -0,0 +1,132 @@ +#ifndef __PALACIOS_HASHTABLE_H__ +#define __PALACIOS_HASHTABLE_H__ + +struct hashtable; + +#define __32BIT__ + +/* Example of use: + * + * struct hashtable *h; + * struct some_key *k; + * struct some_value *v; + * + * static uint_t hash_from_key_fn( void *k ); + * static int keys_equal_fn ( void *key1, void *key2 ); + * + * h = create_hashtable(16, hash_from_key_fn, keys_equal_fn); + * k = (struct some_key *) malloc(sizeof(struct some_key)); + * v = (struct some_value *) malloc(sizeof(struct some_value)); + * + * (initialise k and v to suitable values) + * + * if (! hashtable_insert(h,k,v) ) + * { exit(-1); } + * + * if (NULL == (found = hashtable_search(h,k) )) + * { printf("not found!"); } + * + * if (NULL == (found = hashtable_remove(h,k) )) + * { printf("Not found\n"); } + * + */ + +/* Macros may be used to define type-safe(r) hashtable access functions, with + * methods specialized to take known key and value types as parameters. + * + * Example: + * + * Insert this at the start of your file: + * + * DEFINE_HASHTABLE_INSERT(insert_some, struct some_key, struct some_value); + * DEFINE_HASHTABLE_SEARCH(search_some, struct some_key, struct some_value); + * DEFINE_HASHTABLE_REMOVE(remove_some, struct some_key, struct some_value); + * + * This defines the functions 'insert_some', 'search_some' and 'remove_some'. + * These operate just like hashtable_insert etc., with the same parameters, + * but their function signatures have 'struct some_key *' rather than + * 'void *', and hence can generate compile time errors if your program is + * supplying incorrect data as a key (and similarly for value). + * + * Note that the hash and key equality functions passed to create_hashtable + * still take 'void *' parameters instead of 'some key *'. This shouldn't be + * a difficult issue as they're only defined and passed once, and the other + * functions will ensure that only valid keys are supplied to them. + * + * The cost for this checking is increased code size and runtime overhead + * - if performance is important, it may be worth switching back to the + * unsafe methods once your program has been debugged with the safe methods. + * This just requires switching to some simple alternative defines - eg: + * #define insert_some hashtable_insert + * + */ + +typedef unsigned char uchar_t; +typedef unsigned int uint_t; +typedef unsigned long long ullong_t; +typedef unsigned long ulong_t; +typedef ulong_t addr_t; + + +#define DEFINE_HASHTABLE_INSERT(fnname, keytype, valuetype) \ + static int fnname (struct hashtable * htable, keytype key, valuetype value) { \ + return v3_htable_insert(htable, (addr_t)key, (addr_t)value); \ + } + +#define DEFINE_HASHTABLE_SEARCH(fnname, keytype, valuetype) \ + static valuetype * fnname (struct hashtable * htable, keytype key) { \ + return (valuetype *) (v3_htable_search(htable, (addr_t)key)); \ + } + +#define DEFINE_HASHTABLE_REMOVE(fnname, keytype, valuetype, free_key) \ + static valuetype * fnname (struct hashtable * htable, keytype key) { \ + return (valuetype *) (v3_htable_remove(htable, (addr_t)key, free_key)); \ + } + + + + + +/* These cannot be inlined because they are referenced as fn ptrs */ +ulong_t palacios_hash_long(ulong_t val, uint_t bits); +ulong_t palacios_hash_buffer(uchar_t * msg, uint_t length); + + + +struct hashtable * palacios_create_htable(uint_t min_size, + uint_t (*hashfunction) (addr_t key), + int (*key_eq_fn) (addr_t key1, addr_t key2)); + +void palacios_free_htable(struct hashtable * htable, int free_values, int free_keys); + +/* + * returns non-zero for successful insertion + * + * This function will cause the table to expand if the insertion would take + * the ratio of entries to table size over the maximum load factor. + * + * This function does not check for repeated insertions with a duplicate key. + * The value returned when using a duplicate key is undefined -- when + * the hashtable changes size, the order of retrieval of duplicate key + * entries is reversed. + * If in doubt, remove before insert. + */ +int palacios_htable_insert(struct hashtable * htable, addr_t key, addr_t value); + +int palacios_htable_change(struct hashtable * htable, addr_t key, addr_t value, int free_value); + + +// returns the value associated with the key, or NULL if none found +addr_t palacios_htable_search(struct hashtable * htable, addr_t key); + +// returns the value associated with the key, or NULL if none found +addr_t palacios_htable_remove(struct hashtable * htable, addr_t key, int free_key); + +uint_t palacios_htable_count(struct hashtable * htable); + +// Specialty functions for a counting hashtable +int palacios_htable_inc(struct hashtable * htable, addr_t key, addr_t value); +int palacios_htable_dec(struct hashtable * htable, addr_t key, addr_t value); + + +#endif diff --git a/linux_module/palacios-packet.c b/linux_module/palacios-packet.c index f6f4c2c..2df4879 100644 --- a/linux_module/palacios-packet.c +++ b/linux_module/palacios-packet.c @@ -1,5 +1,5 @@ /* - * VM Raw Packet + * Palacios Raw Packet * (c) Lei Xia, 2010 */ #include @@ -18,15 +18,56 @@ #include #include #include +#include #include "palacios.h" #include "palacios-packet.h" +#include "palacios-hashtable.h" -//#define DEBUG_PALACIOS_PACKET -static struct socket * raw_sock; +struct palacios_packet_state { + struct socket * raw_sock; + uint8_t inited; + + struct hashtable * mac_vm_cache; + struct task_struct * server_thread; +}; + +static struct palacios_packet_state packet_state; + +static inline uint_t hash_fn(addr_t hdr_ptr) { + uint8_t * hdr_buf = (uint8_t *)hdr_ptr; + + return palacios_hash_buffer(hdr_buf, ETH_ALEN); +} + +static inline int hash_eq(addr_t key1, addr_t key2) { + return (memcmp((uint8_t *)key1, (uint8_t *)key2, ETH_ALEN) == 0); +} + + +static int palacios_packet_add_recver(const char * mac, + struct v3_vm_info * vm){ + char * key; + + key = (char *)kmalloc(ETH_ALEN, GFP_KERNEL); + memcpy(key, mac, ETH_ALEN); + + if (palacios_htable_insert(packet_state.mac_vm_cache, (addr_t)key, (addr_t)vm) == 0) { + printk("Palacios Packet: Failed to insert new mac entry to the hash table\n"); + return -1; + } + + printk("Packet: Add MAC: %2x:%2x:%2x:%2x:%2x:%2x\n", mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); + + return 0; +} -static int packet_inited = 0; +static int palacios_packet_del_recver(const char * mac, + struct v3_vm_info * vm){ + + return 0; +} static int init_raw_socket (const char * eth_dev){ int err; @@ -34,23 +75,22 @@ static int init_raw_socket (const char * eth_dev){ struct ifreq if_req; int dev_idx; - err = sock_create(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL), &raw_sock); + err = sock_create(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL), &(packet_state.raw_sock)); if (err < 0) { printk(KERN_WARNING "Could not create a PF_PACKET Socket, err %d\n", err); return -1; } if(eth_dev == NULL){ - return 0; + eth_dev = "eth0"; /* default "eth0" */ } memset(&if_req, 0, sizeof(if_req)); - strncpy(if_req.ifr_name, eth_dev, sizeof(if_req.ifr_name)); - - err = raw_sock->ops->ioctl(raw_sock, SIOCGIFINDEX, (long)&if_req); + strncpy(if_req.ifr_name, eth_dev, IFNAMSIZ); //sizeof(if_req.ifr_name)); + err = packet_state.raw_sock->ops->ioctl(packet_state.raw_sock, SIOCGIFINDEX, (long)&if_req); if(err < 0){ printk(KERN_WARNING "Palacios Packet: Unable to get index for device %s, error %d\n", if_req.ifr_name, err); - dev_idx = 2; /* default "eth0" */ + dev_idx = 2; /* match ALL 2:"eth0" */ } else{ dev_idx = if_req.ifr_ifindex; @@ -63,7 +103,7 @@ static int init_raw_socket (const char * eth_dev){ sock_addr.sll_protocol = htons(ETH_P_ALL); sock_addr.sll_ifindex = dev_idx; - err = raw_sock->ops->bind(raw_sock, (struct sockaddr *)&sock_addr, sizeof(sock_addr)); + err = packet_state.raw_sock->ops->bind(packet_state.raw_sock, (struct sockaddr *)&sock_addr, sizeof(sock_addr)); if (err < 0){ printk(KERN_WARNING "Error binding raw packet to device %s, %d\n", eth_dev, err); return -1; @@ -80,17 +120,8 @@ palacios_packet_send(const char * pkt, unsigned int len, void * private_data) { struct msghdr msg; struct iovec iov; mm_segment_t oldfs; - int size = 0; - -#ifdef DEBUG_PALACIOS_PACKET - { - printk("Palacios Packet: send pkt to NIC (size: %d)\n", - len); - //print_hex_dump(NULL, "pkt_data: ", 0, 20, 20, pkt, len, 0); - } -#endif + int size = 0; - iov.iov_base = (void *)pkt; iov.iov_len = (__kernel_size_t)len; @@ -104,15 +135,26 @@ palacios_packet_send(const char * pkt, unsigned int len, void * private_data) { oldfs = get_fs(); set_fs(KERNEL_DS); - size = sock_sendmsg(raw_sock, &msg, len); + size = sock_sendmsg(packet_state.raw_sock, &msg, len); set_fs(oldfs); +#if 1 + { + printk("Palacios Packet: send pkt to NIC (size: %d)\n", + len); + print_hex_dump(NULL, "pkt_header: ", 0, 20, 20, pkt, 20, 0); + printk("palacios_packet_send return: %d\n", size); + } +#endif + return size; } static struct v3_packet_hooks palacios_packet_hooks = { .send = palacios_packet_send, + .add_recver = palacios_packet_add_recver, + .del_recver = palacios_packet_del_recver, }; @@ -123,7 +165,7 @@ recv_pkt(char * pkt, int len) { mm_segment_t oldfs; int size = 0; - if (raw_sock == NULL) { + if (packet_state.raw_sock == NULL) { return -1; } @@ -141,7 +183,7 @@ recv_pkt(char * pkt, int len) { oldfs = get_fs(); set_fs(KERNEL_DS); - size = sock_recvmsg(raw_sock, &msg, len, msg.msg_flags); + size = sock_recvmsg(packet_state.raw_sock, &msg, len, msg.msg_flags); set_fs(oldfs); return size; @@ -153,9 +195,11 @@ send_raw_packet_to_palacios(char * pkt, int len, struct v3_vm_info * vm) { struct v3_packet_event event; + char data[ETHERNET_PACKET_LEN]; - event.pkt = kmalloc(len, GFP_KERNEL); - memcpy(event.pkt, pkt, len); + /* one memory copy */ + memcpy(data, pkt, len); + event.pkt = data; event.size = len; v3_deliver_packet_event(vm, &event); @@ -164,25 +208,38 @@ send_raw_packet_to_palacios(char * pkt, static int packet_server(void * arg) { char pkt[ETHERNET_PACKET_LEN]; int size; + struct v3_vm_info *vm; printk("Palacios Raw Packet Bridge: Staring receiving server\n"); while (!kthread_should_stop()) { size = recv_pkt(pkt, ETHERNET_PACKET_LEN); if (size < 0) { - printk(KERN_WARNING "Palacios Packet Socket receive error\n"); + printk(KERN_WARNING "Palacios raw packet receive error, Server terminated\n"); break; } -#ifdef DEBUG_PALACIOS_PACKET +#if 1 { printk("Palacios Packet: receive pkt from NIC (size: %d)\n", size); - //print_hex_dump(NULL, "pkt_data: ", 0, 20, 20, pkt, size, 0); + print_hex_dump(NULL, "pkt_header: ", 0, 10, 10, pkt, 20, 0); } #endif - send_raw_packet_to_palacios(pkt, size, NULL); + /* if VNET is enabled, send to VNET */ + // ... + + + /* if it is broadcast or multicase packet */ + // ... + + + vm = (struct v3_vm_info *)palacios_htable_search(packet_state.mac_vm_cache, (addr_t)pkt); + if(vm != NULL){ + printk("Find destinated VM 0x%p\n", vm); + send_raw_packet_to_palacios(pkt, size, vm); + } } return 0; @@ -191,14 +248,29 @@ static int packet_server(void * arg) { int palacios_init_packet(const char * eth_dev) { - if(packet_inited == 0){ - packet_inited = 1; - init_raw_socket(eth_dev); + if(packet_state.inited == 0){ + packet_state.inited = 1; + + if(init_raw_socket(eth_dev) == -1){ + printk("Error to initiate palacios packet interface\n"); + return -1; + } + V3_Init_Packet(&palacios_packet_hooks); - kthread_run(packet_server, NULL, "raw-packet-server"); + packet_state.mac_vm_cache = palacios_create_htable(0, &hash_fn, &hash_eq); + + packet_state.server_thread = kthread_run(packet_server, NULL, "raw-packet-server"); } return 0; } +void palacios_deinit_packet(const char * eth_dev) { + + kthread_stop(packet_state.server_thread); + packet_state.raw_sock->ops->release(packet_state.raw_sock); + palacios_free_htable(packet_state.mac_vm_cache, 0, 1); + packet_state.inited = 0; +} + diff --git a/linux_module/palacios-vnet.c b/linux_module/palacios-vnet.c index ad51ee5..3f6f899 100644 --- a/linux_module/palacios-vnet.c +++ b/linux_module/palacios-vnet.c @@ -24,7 +24,7 @@ #define VNET_UDP_PORT 9000 -struct vnet_route { +struct palacios_vnet_route { struct v3_vnet_route route; int route_idx; @@ -66,7 +66,7 @@ struct palacios_vnet_state { static struct palacios_vnet_state vnet_state; -struct vnet_link * find_link_by_ip(uint32_t ip) { +struct vnet_link * link_by_ip(uint32_t ip) { struct vnet_link * link = NULL; list_for_each_entry(link, &(vnet_state.link_list), node) { @@ -79,7 +79,7 @@ struct vnet_link * find_link_by_ip(uint32_t ip) { return NULL; } -struct vnet_link * find_link_by_idx(int idx) { +struct vnet_link * link_by_idx(int idx) { struct vnet_link * link = NULL; list_for_each_entry(link, &(vnet_state.link_list), node) { @@ -91,8 +91,8 @@ struct vnet_link * find_link_by_idx(int idx) { return NULL; } -struct vnet_route * find_route_by_idx(int idx) { - struct vnet_route * route = NULL; +struct palacios_vnet_route * route_by_idx(int idx) { + struct palacios_vnet_route * route = NULL; list_for_each_entry(route, &(vnet_state.route_list), node) { @@ -109,119 +109,162 @@ static int parse_mac_str(char * str, uint8_t * qual, uint8_t * mac) { char * token; printk("Parsing MAC (%s)\n", str); + + *qual = MAC_NOSET; + if(strnicmp("any", str, strlen(str)) == 0){ + *qual = MAC_ANY; + return 0; + }else if(strnicmp("none", str, strlen(str)) == 0){ + *qual = MAC_NONE; + return 0; + }else{ + if (strstr(str, "-")) { + token = strsep(&str, "-"); + + if (strnicmp("not", token, strlen("not")) == 0) { + *qual = MAC_NOT; + } else { + printk("Invalid MAC String token (%s)\n", token); + return -1; + } + } - if (strstr(str, "-")) { - token = strsep(&str, "-"); + if (strstr(str, ":")) { + int i = 0; - if (strnicmp("not", token, strlen("not")) == 0) { - *qual = MAC_NOT; - } else { + if(*qual == MAC_NOSET){ + *qual = MAC_ADDR; + } + + for (i = 0; i < 6; i++) { + token = strsep(&str, ":"); + if (!token) { + printk("Invalid MAC String token (%s)\n", token); + return -1; + } + mac[i] = simple_strtol(token, &token, 16); + } + printk("MAC: %2x:%2x:%2x:%2x:%2x:%2x\n", mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); + + }else { printk("Invalid MAC String token (%s)\n", token); return -1; } + } - if (!strstr(str, ":")) { - if (strnicmp("any", str, strlen("any")) == 0) { - printk("qual = any\n"); - *qual = MAC_ANY; - } else if (strnicmp("none", str, strlen("none")) == 0) { - printk("qual = None\n"); - *qual = MAC_NONE; - } else { - printk("Invalid MAC Qual token (%s)\n", str); - return -1; - } - - } else { - int i = 0; + return 0; +} - *qual = MAC_ADDR; - for (i = 0; i < 6; i++) { - token = strsep(&str, ":"); - mac[i] = simple_strtol(token, &token, 16); +static int str2mac(char * str, uint8_t * mac){ + int i = 0; + char *hex = NULL; + + for (i = 0; i < ETH_ALEN; i++) { + hex = strsep(&str, ":"); + if (!hex) { + printk("Invalid MAC String token (%s)\n", str); + return -1; } + mac[i] = simple_strtol(hex, &hex, 16); } - + return 0; } + +/* Format: + * add src-MAC dst-MAC dst-TYPE [dst-ID] src-TYPE [src-ID] + * + * src-MAC = dst-MAC = not-MAC|any|none|MAC + * dst-TYPE = edge|interface + * src-TYPE = edge|interface|any + * dst-ID = src-ID = IP|MAC + * MAC=xx:xx:xx:xx:xx:xx + * IP = xxx.xxx.xxx.xxx + */ static int parse_route_str(char * str, struct v3_vnet_route * route) { char * token = NULL; struct vnet_link *link = NULL; // src MAC token = strsep(&str, " "); - if (!token) { return -1; } - parse_mac_str(token, &(route->src_mac_qual), route->src_mac); // dst MAC token = strsep(&str, " "); - if (!token) { return -1; } - parse_mac_str(token, &(route->dst_mac_qual), route->dst_mac); // dst LINK type token = strsep(&str, " "); - if (!token) { return -1; } + printk("dst type =(%s)\n", token); if (strnicmp("interface", token, strlen("interface")) == 0) { route->dst_type = LINK_INTERFACE; - printk("DST type = INTERFACE\n"); } else if (strnicmp("edge", token, strlen("edge")) == 0) { route->dst_type = LINK_EDGE; - printk("DST type = EDGE\n"); } else { printk("Invalid Destination Link Type (%s)\n", token); return -1; } - - // dst link ID + // dst link token = strsep(&str, " "); - if (!token) { return -1; } + printk("dst link ID=(%s)\n", token); - printk("dst link ID=%s\n", token); - - // Figure out link ID here + // Figure out link here if (route->dst_type == LINK_EDGE) { uint32_t link_ip; - // Figure out Link Here if (in4_pton(token, strlen(token), (uint8_t *)&(link_ip), '\0', NULL) != 1) { printk("Invalid Dst IP address (%s)\n", token); return -EFAULT; } - - printk("link_ip = %d\n", link_ip); - link = find_link_by_ip(link_ip); + link = link_by_ip(link_ip); if (link != NULL){ route->dst_id = link->link_idx; }else{ printk("can not find dst link %s\n", token); return -1; } + + printk("link_ip = %d, link_id = %d\n", link_ip, link->link_idx); + } else if (route->dst_type == LINK_INTERFACE) { + uint8_t mac[ETH_ALEN]; + + if(str2mac(token, mac) == -1){ + printk("wrong MAC format (%s)\n", token); + return -1; + } + + route->dst_id = v3_vnet_find_dev(mac); + if (route->dst_id == -1){ + printk("can not find dst device %s\n", token); + return -1; + } } else { printk("Unsupported dst link type\n"); return -1; } + route->src_id = -1; + route->src_type = -1; + // src LINK token = strsep(&str, " "); @@ -233,13 +276,10 @@ static int parse_route_str(char * str, struct v3_vnet_route * route) { if (strnicmp("interface", token, strlen("interface")) == 0) { route->src_type = LINK_INTERFACE; - printk("SRC type = INTERFACE\n"); } else if (strnicmp("edge", token, strlen("edge")) == 0) { route->src_type = LINK_EDGE; - printk("SRC type = EDGE\n"); } else if (strnicmp("any", token, strlen("any")) == 0) { route->src_type = LINK_ANY; - printk("SRC type = ANY\n"); } else { printk("Invalid Src link type (%s)\n", token); return -1; @@ -247,7 +287,7 @@ static int parse_route_str(char * str, struct v3_vnet_route * route) { if (route->src_type == LINK_ANY) { - route->src_id = (uint32_t)-1; + route->src_id = -1; } else if (route->src_type == LINK_EDGE) { uint32_t src_ip; token = strsep(&str, " "); @@ -262,19 +302,31 @@ static int parse_route_str(char * str, struct v3_vnet_route * route) { return -EFAULT; } - link = find_link_by_ip(src_ip); + link = link_by_ip(src_ip); if (link != NULL){ route->src_id = link->link_idx; }else{ printk("can not find src link %s\n", token); return -1; } + } else if(route->src_type == LINK_INTERFACE){ + uint8_t mac[ETH_ALEN]; + + if(str2mac(token, mac) == -1){ + printk("wrong MAC format (%s)\n", token); + return -1; + } + + route->src_id = v3_vnet_find_dev(mac); + if (route->src_id == -1){ + printk("can not find dst device %s\n", token); + return -1; + } } else { printk("Invalid link type\n"); return -1; } - return 0; } @@ -282,7 +334,7 @@ static int parse_route_str(char * str, struct v3_vnet_route * route) { static void * route_seq_start(struct seq_file * s, loff_t * pos) { - struct vnet_route * route_iter = NULL; + struct palacios_vnet_route * route_iter = NULL; loff_t i = 0; @@ -307,7 +359,6 @@ static void * link_seq_start(struct seq_file * s, loff_t * pos) { struct vnet_link * link_iter = NULL; loff_t i = 0; - if (*pos >= vnet_state.num_links) { return NULL; } @@ -327,9 +378,9 @@ static void * link_seq_start(struct seq_file * s, loff_t * pos) { static void * route_seq_next(struct seq_file * s, void * v, loff_t * pos) { - struct vnet_route * route_iter = NULL; + struct palacios_vnet_route * route_iter = NULL; - route_iter = list_entry(((struct vnet_route *)v)->node.next, struct vnet_route, node); + route_iter = list_entry(((struct palacios_vnet_route *)v)->node.next, struct palacios_vnet_route, node); // Check if the list has looped if (&(route_iter->node) == &(vnet_state.route_list)) { @@ -373,12 +424,12 @@ static void link_seq_stop(struct seq_file * s, void * v) { } static int route_seq_show(struct seq_file * s, void * v) { - struct vnet_route * route_iter = v; + struct palacios_vnet_route * route_iter = v; struct v3_vnet_route * route = &(route_iter->route); - seq_printf(s, "%d:\t", route_iter->route_idx); + seq_printf(s, "\nSrc:\t"); switch (route->src_mac_qual) { case MAC_ANY: seq_printf(s, "any "); @@ -387,7 +438,7 @@ static int route_seq_show(struct seq_file * s, void * v) { seq_printf(s, "none "); break; case MAC_NOT: - seq_printf(s, "not-%x:%x:%x:%x:%x:%x ", + seq_printf(s, "not-%2x:%2x:%2x:%2x:%2x:%2x ", route->src_mac[0], route->src_mac[1], route->src_mac[2], route->src_mac[3], route->src_mac[4], route->src_mac[5]); break; @@ -398,6 +449,7 @@ static int route_seq_show(struct seq_file * s, void * v) { break; } + seq_printf(s, "\nDst:\t"); switch (route->dst_mac_qual) { case MAC_ANY: seq_printf(s, "any "); @@ -417,10 +469,10 @@ static int route_seq_show(struct seq_file * s, void * v) { break; } - + seq_printf(s, "\nDst-Type:\t"); switch (route->dst_type) { case LINK_EDGE: { - struct vnet_link * link = (struct vnet_link *)find_link_by_idx(route->dst_id); + struct vnet_link * link = (struct vnet_link *)link_by_idx(route->dst_id); seq_printf(s, "EDGE %pI4", &link->dst_ip); break; } @@ -434,13 +486,10 @@ static int route_seq_show(struct seq_file * s, void * v) { break; } - - - - + seq_printf(s, "\nSrc-Type:\t"); switch (route->src_type) { case LINK_EDGE: { - struct vnet_link * link = (struct vnet_link *)find_link_by_idx(route->src_id); + struct vnet_link * link = (struct vnet_link *)link_by_idx(route->src_id); seq_printf(s, "EDGE %pI4", &link->dst_ip); break; } @@ -500,9 +549,16 @@ static int link_open(struct inode * inode, struct file * file) { return seq_open(file, &link_seq_ops); } -static int inject_route(struct vnet_route * route) { +static int inject_route(struct palacios_vnet_route * route) { + unsigned long flags; + v3_vnet_add_route(route->route); + spin_lock_irqsave(&(vnet_state.lock), flags); + list_add(&(route->node), &(vnet_state.route_list)); + route->route_idx = vnet_state.num_routes++; + spin_unlock_irqrestore(&(vnet_state.lock), flags); + printk("Palacios-vnet: One route added to VNET core\n"); return 0; @@ -536,14 +592,14 @@ route_write(struct file * file, } if (strnicmp("ADD", token, strlen("ADD")) == 0) { - struct vnet_route * new_route = NULL; - new_route = kmalloc(sizeof(struct vnet_route), GFP_KERNEL); + struct palacios_vnet_route * new_route = NULL; + new_route = kmalloc(sizeof(struct palacios_vnet_route), GFP_KERNEL); if (!new_route) { return -ENOMEM; } - memset(new_route, 0, sizeof(struct vnet_route)); + memset(new_route, 0, sizeof(struct palacios_vnet_route)); if (parse_route_str(buf_iter, &(new_route->route)) == -1) { kfree(new_route); @@ -602,6 +658,8 @@ static int create_link(struct vnet_link * link) { return 0; } + +/* ADD dst-ip 9000 */ static ssize_t link_write(struct file * file, const char * buf, size_t size, loff_t * ppos) { char link_buf[256]; @@ -810,7 +868,7 @@ send_to_palacios(unsigned char * buf, memcpy(pkt.header, buf, ETHERNET_HEADER_LEN); pkt.data = buf; -#ifdef DEBUG_VNET_BRIGE +#ifdef CONFIG_PALACIOS_VNET_DEBUG { printk("VNET Lnx Bridge: send pkt to VNET core (size: %d, src_id: %d, src_type: %d)\n", pkt.size, pkt.src_id, pkt.src_type); @@ -830,7 +888,7 @@ bridge_send_pkt(struct v3_vm_info * vm, void * private_data) { struct vnet_link * link; - #ifdef DEBUG_VNET_BRIGE + #ifdef CONFIG_PALACIOS_VNET_DEBUG { printk("VNET Lnx Host Bridge: packet received from VNET Core ... len: %d, pkt size: %d, link: %d\n", len, @@ -843,7 +901,7 @@ bridge_send_pkt(struct v3_vm_info * vm, vnet_state.pkt_recv ++; - link = find_link_by_idx(pkt->dst_id); + link = link_by_idx(pkt->dst_id); if (link != NULL) { udp_send(link->sock, &(link->sock_addr), pkt->data, pkt->size); vnet_state.pkt_udp_send ++; @@ -905,7 +963,7 @@ static int vnet_server(void * arg) { continue; } - link = find_link_by_ip(ntohl(pkt_addr.sin_addr.s_addr)); + link = link_by_ip(ntohl(pkt_addr.sin_addr.s_addr)); if (link != NULL){ link_id= link->link_idx; } @@ -921,29 +979,6 @@ static int vnet_server(void * arg) { return 0; } -#if 0 -static int profiling(void *args) { - static unsigned long long last_time=0; - unsigned long long cur_time=0; - set_user_nice(current, MAX_PRIO-1); - - while (!kthread_should_stop()) { - rdtscll(cur_time); - if((cur_time - last_time) > 50000000000) { - last_time = cur_time; - printk("Palacios Linux VNET Bridge - profiling: sent: %ld, rxed: %ld, dropped: %ld, upd send: %ld, udp recv: %ld\n", - vnet_state.pkt_sent, - vnet_state.pkt_recv, - vnet_state.pkt_drop, - vnet_state.pkt_udp_send, - vnet_state.pkt_udp_recv); - } - schedule(); - } - - return 0; -} -#endif int palacios_init_vnet(void) { struct v3_vnet_bridge_ops bridge_ops;