From: Peter Dinda Date: Mon, 11 Jan 2010 20:34:16 +0000 (-0600) Subject: Merge branch 'devel' of ssh://palacios@newskysaw.cs.northwestern.edu//home/palacios... X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=73f7dfbeb661474a53ed5b3c73898a431e5e737e;hp=e136664c9a3b9346f3cabef154f4e164362a0d82 Merge branch 'devel' of ssh://palacios@newskysaw.cs.northwestern.edu//home/palacios/palacios into devel --- diff --git a/palacios/include/palacios/vmcs.h b/palacios/include/palacios/vmcs.h index 2a7e42e..11309bf 100644 --- a/palacios/include/palacios/vmcs.h +++ b/palacios/include/palacios/vmcs.h @@ -265,15 +265,21 @@ struct vmcs_segment { struct vmcs_interrupt_state { - uint32_t sti_blocking : 1; - uint32_t mov_ss_blocking : 1; - uint32_t smi_blocking : 1; - uint32_t nmi_blocking : 1; - uint32_t rsvd1 : 28; + union { + uint32_t val; + struct { + uint32_t sti_blocking : 1; + uint32_t mov_ss_blocking : 1; + uint32_t smi_blocking : 1; + uint32_t nmi_blocking : 1; + uint32_t rsvd1 : 28; + } __attribute__((packed)); + } __attribute__((packed)); } __attribute__((packed)); + struct vmcs_data { uint32_t revision ; uint32_t abort ; diff --git a/palacios/include/palacios/vmm.h b/palacios/include/palacios/vmm.h index fb89f28..b05871f 100644 --- a/palacios/include/palacios/vmm.h +++ b/palacios/include/palacios/vmm.h @@ -114,6 +114,7 @@ struct guest_info; if ((os_hooks) && (os_hooks)->malloc) { \ var = (os_hooks)->malloc(size); \ } \ + if (!var) PrintError("MALLOC FAILURE. Memory LEAK!!\n"); \ var; \ }) diff --git a/palacios/include/palacios/vmm_list.h b/palacios/include/palacios/vmm_list.h index 0d5b907..f44933a 100644 --- a/palacios/include/palacios/vmm_list.h +++ b/palacios/include/palacios/vmm_list.h @@ -247,6 +247,24 @@ static inline void list_splice_init(struct list_head *list, }) /** + * list_first_entry - get the struct for the first entry + * @ptr: the list_head head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_first_entry(head, type, member) \ +({ \ + type * first = NULL; \ + if ((head)->next != (head)) { \ + first = list_entry((head)->next, type, member); \ + } \ + first; \ + }) + + + + +/** * list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop counter. * @head: the head for your list. diff --git a/palacios/include/palacios/vmm_sym_swap.h b/palacios/include/palacios/vmm_sym_swap.h index a8646ef..e104acc 100644 --- a/palacios/include/palacios/vmm_sym_swap.h +++ b/palacios/include/palacios/vmm_sym_swap.h @@ -50,6 +50,7 @@ struct v3_sym_swap_state { uint32_t write_faults; uint32_t flushes; uint32_t mapped_pages; + uint32_t list_size; #endif // shadow pointers @@ -73,7 +74,8 @@ int v3_swap_in_notify(struct guest_info * info, int pg_index, int dev_index); int v3_get_vaddr_perms(struct guest_info * info, addr_t vaddr, pte32_t * guest_pte, pf_error_t * page_perms); -addr_t v3_get_swapped_pg_addr(struct guest_info * info, pte32_t * shadow_pte, pte32_t * guest_pte); +addr_t v3_get_swapped_pg_addr(struct guest_info * info, pte32_t * guest_pte); +addr_t v3_map_swp_page(struct guest_info * info, pte32_t * shadow_pte, pte32_t * guest_pte, void * swp_page_ptr); int v3_swap_flush(struct guest_info * info); diff --git a/palacios/src/devices/Kconfig b/palacios/src/devices/Kconfig index 1a449d6..dbce08b 100644 --- a/palacios/src/devices/Kconfig +++ b/palacios/src/devices/Kconfig @@ -270,6 +270,20 @@ config SYM_SWAP help Includes the symbiotic ram based swap disk +config SYM_SWAP2 + bool "Symbiotic Swap disk v2" + default y + depends on SYMBIOTIC_SWAP && (LINUX_VIRTIO_BLOCK || IDE) + help + Includes the symbiotic ram based swap disk + +config DISK_MODEL + bool "Disk Performance Model" + default y + depends on LINUX_VIRTIO_BLOCK || IDE + help + Includes Performance model filter for disk operations + config CGA bool "CGA" default n diff --git a/palacios/src/devices/Makefile b/palacios/src/devices/Makefile index 81ecbf2..a9b7f54 100644 --- a/palacios/src/devices/Makefile +++ b/palacios/src/devices/Makefile @@ -16,6 +16,8 @@ obj-$(CONFIG_OS_DEBUG) += os_debug.o obj-$(CONFIG_PCI) += pci.o obj-$(CONFIG_PIIX3) += piix3.o obj-$(CONFIG_SYM_SWAP) += sym_swap.o +obj-$(CONFIG_SYM_SWAP2) += sym_swap2.o +obj-$(CONFIG_DISK_MODEL) += disk_model.o obj-$(CONFIG_NE2K) += ne2k.o diff --git a/palacios/src/devices/ata.h b/palacios/src/devices/ata.h index 4cf6cd2..cb0193e 100644 --- a/palacios/src/devices/ata.h +++ b/palacios/src/devices/ata.h @@ -64,10 +64,10 @@ static void ata_identify_device(struct ide_drive * drive) { drive_id->lba_enable = 1; // Drive Capacity (28 bit LBA) - drive_id->lba_capacity = drive->ops->get_capacity(drive->private_data); + drive_id->lba_capacity = drive->ops->get_capacity(drive->private_data) / HD_SECTOR_SIZE; // Drive Capacity (48 bit LBA) - drive_id->lba_capacity_2 = drive->ops->get_capacity(drive->private_data); + drive_id->lba_capacity_2 = drive->ops->get_capacity(drive->private_data) / HD_SECTOR_SIZE; // lower byte is the maximum multiple sector size... @@ -124,6 +124,9 @@ static int ata_read(struct vm_device * dev, struct ide_channel * channel, uint8_ } return 0; + + + } @@ -168,7 +171,7 @@ static int ata_get_lba(struct vm_device * dev, struct ide_channel * channel, uin if ((lba_addr.addr + sect_cnt) > - drive->ops->get_capacity(drive->private_data)) { + drive->ops->get_capacity(drive->private_data) / HD_SECTOR_SIZE) { PrintError("IDE: request size exceeds disk capacity (lba=%d) (sect_cnt=%d) (ReadEnd=%d) (capacity=%p)\n", lba_addr.addr, sect_cnt, lba_addr.addr + (sect_cnt * HD_SECTOR_SIZE), diff --git a/palacios/src/devices/ide.c b/palacios/src/devices/ide.c index 7b13e5b..246b1d4 100644 --- a/palacios/src/devices/ide.c +++ b/palacios/src/devices/ide.c @@ -1494,7 +1494,7 @@ static int connect_fn(struct guest_info * info, drive->num_sectors = 63; drive->num_heads = 16; - drive->num_cylinders = ops->get_capacity(private_data) / (drive->num_sectors * drive->num_heads); + drive->num_cylinders = (ops->get_capacity(private_data) / HD_SECTOR_SIZE) / (drive->num_sectors * drive->num_heads); } else { PrintError("invalid IDE drive type\n"); return -1; diff --git a/palacios/src/devices/keyboard.c b/palacios/src/devices/keyboard.c index a6ba864..eb76cc9 100644 --- a/palacios/src/devices/keyboard.c +++ b/palacios/src/devices/keyboard.c @@ -317,6 +317,7 @@ static int pull_from_output_queue(struct vm_device * dev, uint8_t * value) { } +#include static int key_event_handler(struct guest_info * info, @@ -347,11 +348,14 @@ static int key_event_handler(struct guest_info * info, } #endif - else if (evt->scan_code == 0x42) { // F8 Sym test2 + else if (evt->scan_code == 0x42) { // F8 debug toggle extern int v3_dbg_enable; PrintDebug("Toggling Debugging\n"); v3_dbg_enable ^= 1; + } else if (evt->scan_code == 0x41) { // F7 telemetry dump + v3_print_telemetry(info); + } diff --git a/palacios/src/devices/lnx_virtio_nic.c b/palacios/src/devices/lnx_virtio_nic.c index f68d3df..0f76641 100644 --- a/palacios/src/devices/lnx_virtio_nic.c +++ b/palacios/src/devices/lnx_virtio_nic.c @@ -280,16 +280,31 @@ static int send_pkt_to_guest(struct virtio_net_state * virtio, uchar_t * buf, ui //copy header to the header descriptor memcpy((void *)hdr_addr, &hdr, sizeof(struct virtio_net_hdr)); + //Zheng 01/02/2010: zero payload + if (offset >= data_len) { + hdr_desc->flags &= ~VIRTIO_NEXT_FLAG; + } + //copy data to the next descriptors - for (buf_idx = 0; offset < data_len; buf_idx = q->desc[hdr_idx].next) { + //Zheng 01/02/2010: put data into the next descriptor, rather than 0! + for (buf_idx = hdr_desc->next; offset < data_len; buf_idx = q->desc[hdr_idx].next) { + // for (buf_idx = 0; offset < data_len; buf_idx = q->desc[hdr_idx].next) { struct vring_desc * buf_desc = &(q->desc[buf_idx]); uint32_t len = 0; - buf_desc->flags = VIRTIO_NEXT_FLAG; + //Zheng 01/02/2010: commented this - we need to check + // if there still is some data left + //buf_desc->flags = VIRTIO_NEXT_FLAG; len = copy_data_to_desc(virtio, buf_desc, buf + offset, data_len - offset); offset += len; + + //Zheng 01/02/2010: check if there still is some data left + if (offset < data_len) { + buf_desc->flags = VIRTIO_NEXT_FLAG; + } + buf_desc->length = len; // TODO: do we need this? } diff --git a/palacios/src/devices/pci_passthrough.c b/palacios/src/devices/pci_passthrough.c index 7b0c2d3..a799e1f 100644 --- a/palacios/src/devices/pci_passthrough.c +++ b/palacios/src/devices/pci_passthrough.c @@ -191,7 +191,6 @@ static int pci_bar_init(int bar_num, uint32_t * dst,void * private_data) { pbar->size = (uint16_t)~PCI_IO_BASE(max_val) + 1; - V3_Print("IO Bar with %d (%x) ports %x->%x\n", pbar->size, pbar->size, pbar->addr, pbar->addr + pbar->size); // setup a set of null io hooks @@ -262,8 +261,6 @@ static int pci_bar_init(int bar_num, uint32_t * dst,void * private_data) { } } - - // Initially the virtual bars match the physical ones @@ -279,8 +276,6 @@ static int pci_bar_init(int bar_num, uint32_t * dst,void * private_data) { state->virt_bars[bar_num].type, state->virt_bars[bar_num].addr, state->virt_bars[bar_num].size); - - // Update the pci subsystem versions *dst = bar_val; diff --git a/palacios/src/devices/sym_swap2.c b/palacios/src/devices/sym_swap2.c new file mode 100644 index 0000000..3946636 --- /dev/null +++ b/palacios/src/devices/sym_swap2.c @@ -0,0 +1,620 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#include +#include +#include +#include +#include + + +#ifdef CONFIG_SYMBIOTIC_SWAP_TELEMETRY +#include +#endif + + +#undef PrintDebug +#define PrintDebug(fmt, ...) + + +/* This is the first page that linux writes to the swap area */ +/* Taken from Linux */ +union swap_header { + struct { + char reserved[PAGE_SIZE - 10]; + char magic[10]; /* SWAP-SPACE or SWAPSPACE2 */ + } magic; + struct { + char bootbits[1024]; /* Space for disklabel etc. */ + uint32_t version; + uint32_t last_page; + uint32_t nr_badpages; + unsigned char sws_uuid[16]; + unsigned char sws_volume[16]; + uint32_t type; // The index into the swap_map + uint32_t padding[116]; + + uint32_t badpages[1]; + } info; +}; + + +struct cache_entry { + uint32_t disk_index; + struct list_head cache_node; +}; + +// Per instance data structure +struct swap_state { + int active; + int disabled; + + struct guest_info * vm; + struct swap_state * swap_info; + + int symbiotic; + + union swap_header hdr; + + uint_t swapped_pages; + uint_t unswapped_pages; + uint32_t disk_writes; + uint32_t disk_reads; + + + uint32_t seek_usecs; + + struct v3_dev_blk_ops * ops; + void * private_data; + +#ifdef CONFIG_SYMBIOTIC_SWAP_TELEMETRY + uint32_t pages_in; + uint32_t pages_out; +#endif + + int io_flag; + + uint64_t cache_size; + uint8_t * cache; + uint64_t cache_base_addr; + uint_t pages_in_cache; + + struct cache_entry * entry_map; + struct list_head entry_list; + struct list_head free_list; + + struct hashtable * entry_ht; +}; + + + +void __udelay(unsigned long usecs); + +static uint_t cache_hash_fn(addr_t key) { + return v3_hash_long(key, 32); +} + + +static int cache_eq_fn(addr_t key1, addr_t key2) { + return (key1 == key2); +} + + + + + +static inline uint32_t get_swap_index_from_offset(uint32_t offset) { + // CAREFUL: The index might be offset by 1, because the first 4K is the header + return (offset / 4096); +} + +static inline uint32_t get_swap_offset_from_index(uint32_t index) { + // CAREFUL: The index might be offset by 1, because the first 4K is the header + return (index * 4096); +} + + +static inline uint32_t get_cache_entry_index(struct swap_state * swap, struct cache_entry * entry) { + return (entry - swap->entry_map); // / sizeof(struct cache_entry); +} + + + + + +static inline void * get_swap_entry(uint32_t pg_index, void * private_data) { + struct swap_state * swap = (struct swap_state *)private_data; + struct cache_entry * entry = NULL; + void * pg_addr = NULL; + uint32_t swap_index = pg_index * 4096; + + if (swap->disabled) { + return NULL; + } + + PrintDebug("Getting swap entry for index %d\n", pg_index); + + entry = (struct cache_entry *)v3_htable_search(swap->entry_ht, swap_index); + + if (entry != NULL) { + uint32_t cache_index = get_cache_entry_index(swap, entry); + PrintDebug("Found cached entry (%d)\n", cache_index); + pg_addr = swap->cache + (cache_index * 4096); + } + + return pg_addr; +} + + + +static int read_disk(uint8_t * buf, uint64_t lba, uint64_t num_bytes, struct swap_state * swap) { + if ((swap->io_flag == 0) && (swap->seek_usecs > 0)) { + __udelay(swap->seek_usecs); + swap->io_flag = 1; + } + + swap->disk_reads += num_bytes / 4096; + return swap->ops->read(buf, lba, num_bytes, swap->private_data); + +} + + +static int write_disk(uint8_t * buf, uint64_t lba, uint64_t num_bytes, struct swap_state * swap) { + if ((swap->io_flag == 0) && (swap->seek_usecs > 0)) { + __udelay(swap->seek_usecs); + swap->io_flag = 1; + } + + swap->disk_writes += num_bytes / 4096; + + + return swap->ops->write(buf, lba, num_bytes, swap->private_data); +} + + +static uint64_t swap_get_capacity(void * private_data) { + struct swap_state * swap = (struct swap_state *)private_data; + return swap->ops->get_capacity(swap->private_data); +} + + +static struct v3_swap_ops swap_ops = { + .get_swap_entry = get_swap_entry, +}; + + + +static int buf_read(uint8_t * buf, uint64_t lba, uint64_t num_bytes, void * private_data) { + struct swap_state * swap = (struct swap_state *)private_data; + uint32_t offset = lba; + uint32_t length = num_bytes; + + swap->io_flag = 0; + + if (length % 4096) { + PrintError("Swapping in length that is not a page multiple\n"); + } + + if (swap->disabled) { + return read_disk(buf, lba, num_bytes, swap); + } + + + PrintDebug("SymSwap: Reading %d bytes to %p (lba=%p)\n", (uint32_t)num_bytes, buf, (void *)(addr_t)lba); + + + if (length % 4096) { + PrintError("Swapping in length that is not a page multiple\n"); + return -1; + } + + + if ((swap->active == 1) && (offset >= 4096)) { + int i = 0; + int read_pages = (length / 4096); + + + // Notify the shadow paging layer + + swap->unswapped_pages += (length / 4096); + + +#ifdef CONFIG_SYMBIOTIC_SWAP_TELEMETRY + swap->pages_in += length / 4096; +#endif + + for (i = 0; i < read_pages; i++) { + uint32_t swap_index = offset + (i * 4096); + uint32_t cache_index = 0; + struct cache_entry * entry = NULL; + + if (swap->symbiotic == 1) { + v3_swap_in_notify(swap->vm, get_swap_index_from_offset(offset + i), swap->hdr.info.type); + } + + PrintDebug("Searching for swap index %d\n", swap_index); + + entry = (struct cache_entry *)v3_htable_search(swap->entry_ht, (addr_t)swap_index); + + if (entry != NULL) { + + cache_index = get_cache_entry_index(swap, entry); + + PrintDebug("Reading from cache entry %d\n", cache_index); + + memcpy(buf, swap->cache + (cache_index * 4096), 4096); + + } else { + PrintDebug("Reading from disk offset = %p\n", (void *)(addr_t)offset); + + if (read_disk(buf, offset, 4096, swap) == -1) { + PrintError("Error reading disk\n"); + return -1; + } + } + + offset += 4096; + buf += 4096; + } + } else { + return read_disk(buf, lba, num_bytes, swap); + } + + + return 0; +} + + +static int flush_cache(struct swap_state * swap, int num_to_flush) { + int i; + + PrintDebug("Flushing %d pages\n", num_to_flush); + + for (i = 0; i < num_to_flush; i++) { + struct cache_entry * entry = NULL; + uint32_t entry_index = 0; + + entry = list_first_entry(&(swap->entry_list), struct cache_entry, cache_node); + + entry_index = get_cache_entry_index(swap, entry); + PrintDebug("Flushing cache entry %d\n", entry_index); + + if (write_disk(swap->cache + (entry_index * 4096), entry->disk_index, 4096, swap) == -1) { + PrintError("Error in disk write\n"); + return -1; + } + + + if (swap->symbiotic == 1) { + v3_swap_in_notify(swap->vm, entry->disk_index / 4096, swap->hdr.info.type); + } + + // invalidate swap entry + + + v3_htable_remove(swap->entry_ht, entry->disk_index, 0); + + list_move(&(entry->cache_node), &(swap->free_list)); + + swap->pages_in_cache--; + } + + return 0; +} + + + + + + + + +static int buf_write(uint8_t * buf, uint64_t lba, uint64_t num_bytes, void * private_data) { + struct swap_state * swap = (struct swap_state *)private_data; + uint32_t offset = lba; + uint32_t length = num_bytes; + + swap->io_flag = 0; + + + + if (swap->disabled) { + return write_disk(buf, lba, num_bytes, swap); + } + + + /* + PrintDebug("SymSwap: Writing %d bytes to %p from %p\n", length, + (void *)(swap->swap_space + offset), buf); + */ + + + if ((swap->active == 0) && (offset == 0)) { + // This is the swap header page + + swap->active = 1; + + // store a local copy + memcpy(&(swap->hdr), buf, sizeof(union swap_header)); + + + PrintError("Swap Type=%d (magic=%s)\n", swap->hdr.info.type, swap->hdr.magic.magic); + + if (swap->symbiotic == 1) { + if (v3_register_swap_disk(swap->vm, swap->hdr.info.type, &swap_ops, swap) == -1) { + PrintError("Error registering symbiotic swap disk\n"); + return -1; + } + + PrintError("Swap disk registered\n"); + } + + + if (write_disk(buf, lba, num_bytes, swap) == -1) { + PrintError("Error writing swap header to disk\n"); + return -1; + } + + PrintDebug("Wrote header to disk\n"); + + return 0; + } + + if ((swap->active == 1) && (offset >= 4096)) { + int i = 0; + int written_pages = (length / 4096); + int avail_space = (swap->cache_size / 4096) - swap->pages_in_cache; + + + swap->swapped_pages += written_pages; + +#ifdef CONFIG_SYMBIOTIC_SWAP_TELEMETRY + swap->pages_out += length / 4096; +#endif + + PrintDebug("available cache space = %d, pages written = %d\n", avail_space, written_pages); + + if (avail_space < written_pages) { + flush_cache(swap, written_pages - avail_space); + } + + + for (i = 0; i < written_pages; i += 1) { + // set_index_usage(swap, get_swap_index_from_offset(offset + i), 1); + struct cache_entry * new_entry = NULL; + uint32_t swap_index = offset + (i * 4096); + uint32_t cache_index = 0; + + new_entry = (struct cache_entry *)v3_htable_search(swap->entry_ht, (addr_t)swap_index); + + if (new_entry == NULL) { + new_entry = list_tail_entry(&(swap->free_list), struct cache_entry, cache_node); + + new_entry->disk_index = swap_index; + + list_move_tail(&(new_entry->cache_node), &(swap->entry_list)); + + v3_htable_insert(swap->entry_ht, (addr_t)swap_index, (addr_t)new_entry); + + swap->pages_in_cache++; + } + + cache_index = get_cache_entry_index(swap, new_entry); + + PrintDebug("Writing to cache entry %d\n", cache_index); + + memcpy(swap->cache + (cache_index * 4096), buf, 4096); + + buf += 4096; + } + } else { + if (write_disk(buf, lba, num_bytes, swap) == -1) { + PrintError("Error writing swap header to disk\n"); + return -1; + } + } + + return 0; +} + + + + +static uint8_t write_buf[4096]; + + +static int swap_write(uint8_t * buf, uint64_t lba, uint64_t num_bytes, void * private_data) { + int idx = lba % 4096; + + if (num_bytes != 512) { + PrintError("Write for %d bytes\n", (uint32_t)num_bytes); + return -1; + } + + + memcpy(write_buf + idx, buf, num_bytes); + + if (idx + num_bytes == 4096) { + return buf_write(write_buf, lba - idx, 4096, private_data); + } + + return 0; +} + + + +static uint8_t read_buf[4096]; + + + +static int swap_read(uint8_t * buf, uint64_t lba, uint64_t num_bytes, void * private_data) { + int idx = lba % 4096; + + + if (num_bytes != 512) { + PrintError("Read for %d bytes\n", (uint32_t)num_bytes); + return -1; + } + + if (idx == 0) { + if (buf_read(read_buf, lba - idx, 4096, private_data) == -1) { + PrintError("Error reading buffer\n"); + return -1; + } + } + + memcpy(buf, read_buf + idx, num_bytes); + + return 0; +} + + +static int swap_free(struct vm_device * dev) { + return -1; +} + + +static struct v3_dev_blk_ops blk_ops = { + .read = swap_read, + .write = swap_write, + .get_capacity = swap_get_capacity, +}; + + + +static struct v3_device_ops dev_ops = { + .free = swap_free, + .reset = NULL, + .start = NULL, + .stop = NULL, +}; + + +#ifdef CONFIG_SYMBIOTIC_SWAP_TELEMETRY +static void telemetry_cb(struct guest_info * info, void * private_data, char * hdr) { + struct swap_state * swap = (struct swap_state *)private_data; + + V3_Print("%sSwap Device:\n", hdr); + V3_Print("%s\tPages Swapped in=%d\n", hdr, swap->pages_in); + V3_Print("%s\tPages Swapped out=%d\n", hdr, swap->pages_out); + V3_Print("%s\tPages Written to Disk=%d\n", hdr, swap->disk_writes); + V3_Print("%s\tPages Read from Disk=%d\n", hdr, swap->disk_reads); +} +#endif + + +static int connect_fn(struct guest_info * info, + void * frontend_data, + struct v3_dev_blk_ops * ops, + v3_cfg_tree_t * cfg, + void * private_data) { + v3_cfg_tree_t * frontend_cfg = v3_cfg_subtree(cfg, "frontend"); + uint32_t cache_size = atoi(v3_cfg_val(cfg, "cache_size")) * 1024 * 1024; + uint32_t seek_us = atoi(v3_cfg_val(cfg, "seek_us")); + int symbiotic = atoi(v3_cfg_val(cfg, "symbiotic")); + struct swap_state * swap = NULL; + int i; + + if (!frontend_cfg) { + PrintError("Initializing sym swap without a frontend device\n"); + return -1; + } + + PrintError("Creating Swap filter (cache size=%dMB)\n", cache_size / (1024 * 1024)); + + swap = (struct swap_state *)V3_Malloc(sizeof(struct swap_state)); + + swap->vm = info; + swap->cache_size = cache_size; + swap->io_flag = 0; + swap->seek_usecs = seek_us; + swap->symbiotic = symbiotic; + + swap->ops = ops; + swap->private_data = private_data; + + swap->swapped_pages = 0; + swap->unswapped_pages = 0; + // swap->cached_pages = 0; + + if (cache_size == 0) { + swap->disabled = 1; + } else { + swap->disabled = 0; + + INIT_LIST_HEAD(&(swap->entry_list)); + INIT_LIST_HEAD(&(swap->free_list)); + swap->entry_map = (struct cache_entry *)V3_Malloc(sizeof(struct cache_entry) * (cache_size / 4096)); + + for (i = 0; i < (cache_size / 4096); i++) { + list_add(&(swap->entry_map[i].cache_node), &(swap->free_list)); + } + + swap->entry_ht = v3_create_htable(0, cache_hash_fn, cache_eq_fn); + + swap->active = 0; + + swap->cache_base_addr = (addr_t)V3_AllocPages(swap->cache_size / 4096); + swap->cache = (uint8_t *)V3_VAddr((void *)(swap->cache_base_addr)); + memset(swap->cache, 0, swap->cache_size); + } + + if (v3_dev_connect_blk(info, v3_cfg_val(frontend_cfg, "tag"), + &blk_ops, frontend_cfg, swap) == -1) { + PrintError("Could not connect to frontend %s\n", + v3_cfg_val(frontend_cfg, "tag")); + return -1; + } + + +#ifdef CONFIG_SYMBIOTIC_SWAP_TELEMETRY + + if (info->enable_telemetry == 1) { + v3_add_telemetry_cb(info, telemetry_cb, swap); + } + +#endif + + return 0; +} + + + + +static int swap_init(struct guest_info * vm, v3_cfg_tree_t * cfg) { + + char * name = v3_cfg_val(cfg, "name"); + + struct vm_device * dev = v3_allocate_device(name, &dev_ops, NULL); + + if (v3_attach_device(vm, dev) == -1) { + PrintError("Could not attach device %s\n", name); + return -1; + } + + if (v3_dev_add_blk_frontend(vm, name, connect_fn, NULL) == -1) { + PrintError("Could not register %s as block frontend\n", name); + return -1; + } + + + return 0; +} + +device_register("SWAPCACHE", swap_init) diff --git a/palacios/src/devices/tmpdisk.c b/palacios/src/devices/tmpdisk.c index fde3cf3..b573cf6 100644 --- a/palacios/src/devices/tmpdisk.c +++ b/palacios/src/devices/tmpdisk.c @@ -32,7 +32,7 @@ struct blk_state { static uint64_t blk_get_capacity(void * private_data) { struct blk_state * blk = (struct blk_state *)private_data; - PrintDebug("SymBlk: Getting Capacity %d\n", (uint32_t)(blk->capacity)); + // PrintDebug("SymBlk: Getting Capacity %d\n", (uint32_t)(blk->capacity)); return blk->capacity; } @@ -42,6 +42,8 @@ static uint64_t blk_get_capacity(void * private_data) { static int blk_read(uint8_t * buf, uint64_t lba, uint64_t num_bytes, void * private_data) { struct blk_state * blk = (struct blk_state *)private_data; + // PrintDebug("TmpDisk Reading %d bytes to %p (lba=%p)\n", (uint32_t)num_bytes, buf, (void *)(addr_t)lba); + if (lba + num_bytes > blk->capacity) { PrintError("TMPDISK Read past end of disk\n"); return -1; @@ -58,6 +60,8 @@ static int blk_read(uint8_t * buf, uint64_t lba, uint64_t num_bytes, void * priv static int blk_write(uint8_t * buf, uint64_t lba, uint64_t num_bytes, void * private_data) { struct blk_state * blk = (struct blk_state *)private_data; + // PrintDebug("TmpDisk Writing %d bytes to %p (lba=%p)\n", (uint32_t)num_bytes, buf, (void *)(addr_t)lba); + if (lba + num_bytes > blk->capacity) { PrintError("TMPDISK Write past end of disk\n"); return -1; diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c index fa1f3ec..acb5353 100644 --- a/palacios/src/palacios/svm.c +++ b/palacios/src/palacios/svm.c @@ -465,7 +465,7 @@ int v3_svm_enter(struct guest_info * info) { rdtscll(info->time_state.cached_host_tsc); - // guest_ctrl->TSC_OFFSET = info->time_state.guest_tsc - info->time_state.cached_host_tsc; + guest_ctrl->TSC_OFFSET = info->time_state.guest_tsc - info->time_state.cached_host_tsc; v3_svm_launch((vmcb_t *)V3_PAddr(info->vmm_data), &(info->vm_regs), (vmcb_t *)host_vmcbs[info->cpu_id]); diff --git a/palacios/src/palacios/vmm_shadow_paging_32.h b/palacios/src/palacios/vmm_shadow_paging_32.h index c358c3c..fc2df95 100644 --- a/palacios/src/palacios/vmm_shadow_paging_32.h +++ b/palacios/src/palacios/vmm_shadow_paging_32.h @@ -230,8 +230,18 @@ static int handle_pte_shadow_pagefault_32(struct guest_info * info, addr_t fault #ifdef CONFIG_SYMBIOTIC_SWAP if (is_swapped_pte32(guest_pte)) { + pf_error_t swap_perms; + + /* + int sym_ret = v3_get_vaddr_perms(info, fault_addr, guest_pte, &swap_perms); + sym_ret = 0; + */ + addr_t swp_pg_addr = 0; + + + #ifdef CONFIG_SYMBIOTIC_SWAP_TELEMETRY if (error_code.write == 0) { info->swap_state.read_faults++; @@ -240,54 +250,72 @@ static int handle_pte_shadow_pagefault_32(struct guest_info * info, addr_t fault } #endif - if (v3_get_vaddr_perms(info, fault_addr, guest_pte, &swap_perms) == -1) { - PrintError("Error getting Swapped page permissions\n"); - return -1; - } + swp_pg_addr = v3_get_swapped_pg_addr(info, guest_pte); - // swap_perms.write == 1 || error_code.write == 0 - // swap_perms.user == 0 || error_code.user == 1 + if (swp_pg_addr != 0) { + PrintDebug("Swapped page address=%p\n", (void *)swp_pg_addr); - // This checks for permissions violations that require a guest PF injection - if ( (swap_perms.present == 1) && - ( (swap_perms.write == 1) || - (error_code.write == 0) ) && - ( (swap_perms.user == 1) || - (error_code.user == 0) ) ) { - addr_t swp_pg_addr = 0; + /* + if (info->cpl == 0) { + PrintError("Swapped Page fault in kernel mode.... bad...\n"); + goto inject; + } + */ - PrintDebug("Page fault on swapped out page (vaddr=%p) (pte=%x) (error_code=%x)\n", - (void *)fault_addr, *(uint32_t *)guest_pte, *(uint32_t *)&error_code); + int sym_ret = v3_get_vaddr_perms(info, fault_addr, guest_pte, &swap_perms); - swp_pg_addr = v3_get_swapped_pg_addr(info, shadow_pte, guest_pte); + if (sym_ret == -1) { + PrintError("Symcall error...\n"); + return -1; + } else if (sym_ret == 0) { - PrintDebug("Swapped page address=%p\n", (void *)swp_pg_addr); - if (swp_pg_addr != 0) { - shadow_pte->writable = swap_perms.write; - shadow_pte->user_page = swap_perms.user; - - shadow_pte->write_through = 0; - shadow_pte->cache_disable = 0; - shadow_pte->global_page = 0; - - shadow_pte->present = 1; + if (swap_perms.present == 0) { + PrintError("Nonpresent swapped page\n"); + } - shadow_pte->page_base_addr = swp_pg_addr; + // swap_perms.write ==1 || error_code.write == 0 + // swap_perms.user == 0 || error_code.user == 1 + // This checks for permissions violations that require a guest PF injection + if ( (swap_perms.present == 1) && + ( (swap_perms.write == 1) || + (error_code.write == 0) ) && + ( (swap_perms.user == 1) || + (error_code.user == 0) ) ) { + addr_t swp_pg_pa = 0; + + swp_pg_pa = v3_map_swp_page(info, shadow_pte, guest_pte, (void *)swp_pg_addr); + + PrintDebug("Page fault on swapped out page (vaddr=%p) (pte=%x) (error_code=%x)\n", + (void *)fault_addr, *(uint32_t *)guest_pte, *(uint32_t *)&error_code); + + shadow_pte->writable = swap_perms.write; + shadow_pte->user_page = swap_perms.user; + + shadow_pte->write_through = 0; + shadow_pte->cache_disable = 0; + shadow_pte->global_page = 0; + + shadow_pte->present = 1; + + shadow_pte->page_base_addr = swp_pg_pa; + #ifdef CONFIG_SYMBIOTIC_SWAP_TELEMETRY - info->swap_state.mapped_pages++; + info->swap_state.mapped_pages++; #endif - - return 0; + // PrintError("Swap fault handled\n"); + return 0; + } } } else { PrintDebug("Not a sym swappable page\n"); } + } #endif - + // inject: if (inject_guest_pf(info, fault_addr, error_code) == -1) { PrintError("Could not inject guest page fault for vaddr %p\n", (void *)fault_addr); diff --git a/palacios/src/palacios/vmm_sym_swap.c b/palacios/src/palacios/vmm_sym_swap.c index 3b1620c..5e1aa80 100644 --- a/palacios/src/palacios/vmm_sym_swap.c +++ b/palacios/src/palacios/vmm_sym_swap.c @@ -78,6 +78,7 @@ static void telemetry_cb(struct guest_info * info, void * private_data, char * h V3_Print("%s\tWrite faults=%d\n", hdr, swap_state->write_faults); V3_Print("%s\tMapped Pages=%d\n", hdr, swap_state->mapped_pages); V3_Print("%s\tFlushes=%d\n", hdr, swap_state->flushes); + V3_Print("%s\tlist size=%d\n", hdr, swap_state->list_size); } #endif @@ -199,29 +200,38 @@ int v3_get_vaddr_perms(struct guest_info * info, addr_t vaddr, pte32_t * guest_p // V3_Print("page perms = %x\n", *(uint32_t *)page_perms); + if (vaddr == 0) { + return 1; + } + return 0; } -addr_t v3_get_swapped_pg_addr(struct guest_info * info, pte32_t * shadow_pte, pte32_t * guest_pte) { - struct list_head * shdw_ptr_list = NULL; +addr_t v3_get_swapped_pg_addr(struct guest_info * info, pte32_t * guest_pte) { struct v3_sym_swap_state * swap_state = &(info->swap_state); - struct shadow_pointer * shdw_ptr = NULL; - void * swp_page_ptr = NULL; int dev_index = get_dev_index(guest_pte); struct v3_swap_dev * swp_dev = &(swap_state->devs[dev_index]); + if (! swp_dev->present ) { return 0; } + return (addr_t)swp_dev->ops->get_swap_entry(get_pg_index(guest_pte), swp_dev->private_data); +} + + +addr_t v3_map_swp_page(struct guest_info * info, pte32_t * shadow_pte, pte32_t * guest_pte, void * swp_page_ptr) { + struct list_head * shdw_ptr_list = NULL; + struct v3_sym_swap_state * swap_state = &(info->swap_state); + struct shadow_pointer * shdw_ptr = NULL; - swp_page_ptr = swp_dev->ops->get_swap_entry(get_pg_index(guest_pte), swp_dev->private_data); if (swp_page_ptr == NULL) { - PrintError("Swapped out page not found on swap device\n"); + // PrintError("Swapped out page not found on swap device\n"); return 0; } @@ -229,12 +239,19 @@ addr_t v3_get_swapped_pg_addr(struct guest_info * info, pte32_t * shadow_pte, pt if (shdw_ptr_list == NULL) { shdw_ptr_list = (struct list_head *)V3_Malloc(sizeof(struct list_head *)); + swap_state->list_size++; INIT_LIST_HEAD(shdw_ptr_list); v3_htable_insert(swap_state->shdw_ptr_ht, (addr_t)*(uint32_t *)guest_pte, (addr_t)shdw_ptr_list); } shdw_ptr = (struct shadow_pointer *)V3_Malloc(sizeof(struct shadow_pointer)); + if (shdw_ptr == NULL) { + PrintError("MEMORY LEAK\n"); + telemetry_cb(info, NULL, ""); + return 0; + } + shdw_ptr->shadow_pte = shadow_pte; shdw_ptr->guest_pte = *(uint32_t *)guest_pte; shdw_ptr->pg_index = get_pg_index(guest_pte); @@ -245,3 +262,23 @@ addr_t v3_get_swapped_pg_addr(struct guest_info * info, pte32_t * shadow_pte, pt return PAGE_BASE_ADDR((addr_t)V3_PAddr(swp_page_ptr)); } + + + +/* +int v3_is_mapped_fault(struct guest_info * info, pte32_t * shadow_pte, pte32_t * guest_pte) { + struct list_head * shdw_ptr_list = NULL; + + shdw_ptr_list = (struct list_head * )v3_htable_search(swap_state->shdw_ptr_ht, *(addr_t *)&(guest_pte)); + + + if (shdw_ptr_list != NULL) { + PrintError("We faulted on a mapped in page....\n"); + return -1; + } + + return 0; +} + + +*/ diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c index 81d333e..aba5656 100644 --- a/palacios/src/palacios/vmx.c +++ b/palacios/src/palacios/vmx.c @@ -466,9 +466,11 @@ static int update_irq_exit_state(struct guest_info * info) { static int update_irq_entry_state(struct guest_info * info) { struct vmx_exit_idt_vec_info idt_vec_info; + struct vmcs_interrupt_state intr_state; struct vmx_data * vmx_info = (struct vmx_data *)(info->vmm_data); check_vmcs_read(VMCS_IDT_VECTOR_INFO, &(idt_vec_info.value)); + check_vmcs_read(VMCS_GUEST_INT_STATE, &(intr_state)); /* Check for pending exceptions to inject */ if (v3_excp_pending(info)) { @@ -498,7 +500,8 @@ static int update_irq_entry_state(struct guest_info * info) { v3_injecting_excp(info, int_info.vector); - } else if (((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) { + } else if ((((struct rflags *)&(info->ctrl_regs.rflags))->intr == 1) && + (intr_state.val == 0)) { if ((info->intr_state.irq_started == 1) && (idt_vec_info.valid == 1)) { @@ -587,6 +590,35 @@ static int update_irq_entry_state(struct guest_info * info) { } + +static struct vmx_exit_info exit_log[10]; + +static void print_exit_log(struct guest_info * info) { + int cnt = info->num_exits % 10; + int i = 0; + + + V3_Print("\nExit Log (%d total exits):\n", (uint32_t)info->num_exits); + + for (i = 0; i < 10; i++) { + struct vmx_exit_info * tmp = &exit_log[cnt]; + + V3_Print("%d:\texit_reason = %p\n", i, (void *)(addr_t)tmp->exit_reason); + V3_Print("\texit_qual = %p\n", (void *)tmp->exit_qual); + V3_Print("\tint_info = %p\n", (void *)(addr_t)tmp->int_info); + V3_Print("\tint_err = %p\n", (void *)(addr_t)tmp->int_err); + V3_Print("\tinstr_info = %p\n", (void *)(addr_t)tmp->instr_info); + + cnt--; + + if (cnt == -1) { + cnt = 9; + } + + } + +} + /* * CAUTION and DANGER!!! * @@ -620,6 +652,11 @@ int v3_vmx_enter(struct guest_info * info) { update_irq_entry_state(info); #endif + { + addr_t guest_cr3; + vmcs_read(VMCS_GUEST_CR3, &guest_cr3); + vmcs_write(VMCS_GUEST_CR3, guest_cr3); + } rdtscll(info->time_state.cached_host_tsc); @@ -650,6 +687,8 @@ int v3_vmx_enter(struct guest_info * info) { /* Update guest state */ v3_vmx_save_vmcs(info); + // info->cpl = info->segments.cs.selector & 0x3; + info->mem_mode = v3_get_vm_mem_mode(info); info->cpu_mode = v3_get_vm_cpu_mode(info); @@ -664,6 +703,8 @@ int v3_vmx_enter(struct guest_info * info) { //PrintDebug("VMX Exit taken, id-qual: %u-%lu\n", exit_info.exit_reason, exit_info.exit_qual); + exit_log[info->num_exits % 10] = exit_info; + #ifdef CONFIG_SYMBIOTIC if (info->sym_state.sym_call_active == 0) { @@ -699,6 +740,7 @@ int v3_start_vmx_guest(struct guest_info* info) { while (1) { if (v3_vmx_enter(info) == -1) { v3_print_vmcs(); + print_exit_log(info); return -1; }