From: Peter Dinda Date: Wed, 26 Mar 2008 01:29:10 +0000 (+0000) Subject: *** empty log message *** X-Git-Tag: working-cdboot-physical-but-not-qemu~54 X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=271ad3fd89672d441db3c5ffa673ee649e4ce532 *** empty log message *** --- diff --git a/palacios/include/geekos/vm_guest.h b/palacios/include/geekos/vm_guest.h index 6a6d86b..e18f1f0 100644 --- a/palacios/include/geekos/vm_guest.h +++ b/palacios/include/geekos/vm_guest.h @@ -4,6 +4,8 @@ #include #include #include +#include + struct guest_gprs { ullong_t rbx; @@ -17,13 +19,14 @@ struct guest_gprs { - typedef struct guest_info { ullong_t rip; ullong_t rsp; - vmm_mem_list_t mem_list; - vmm_mem_layout_t mem_layout; + shadow_paging_state_t shadow_paging_state; + + // vmm_mem_list_t mem_list; + // vmm_mem_layout_t mem_layout; vmm_io_map_t io_map; // device_map diff --git a/palacios/include/geekos/vmm.h b/palacios/include/geekos/vmm.h index 3b80c46..336c9e0 100644 --- a/palacios/include/geekos/vmm.h +++ b/palacios/include/geekos/vmm.h @@ -6,7 +6,7 @@ #include #include -#include +//#include #include @@ -87,6 +87,9 @@ struct vmm_os_hooks { void *(*malloc)(uint_t size); void (*free)(void * addr); + void *(*physical_to_virtual)(void *addr); + void *(*virtual_to_physical)(void *addr); + void (*start_kernel_thread)(); // include pointer to function }; diff --git a/palacios/include/geekos/vmm_mem.h b/palacios/include/geekos/vmm_mem.h index 877382c..00ea157 100644 --- a/palacios/include/geekos/vmm_mem.h +++ b/palacios/include/geekos/vmm_mem.h @@ -4,93 +4,103 @@ #include +typedef ulong_t addr_t; /* - * The mem list is TEMPORARY, simply to lock down which pages are assigned to the VM - * We will remove it and use the host page allocation mechanism in the future - */ - -typedef unsigned long addr_t; - - -typedef struct mem_region { - addr_t addr; - uint_t num_pages; - - struct mem_region * next; - struct mem_region * prev; -} mem_region_t; - - -typedef struct vmm_mem_list { - uint_t num_pages; - bool long_mode; - - uint_t num_regions; - mem_region_t * head; - // mem_region_t * tail; -} vmm_mem_list_t; - - - -/** Memory layout **/ -/* Describes the layout of memory for the guest */ -/* We use this to build the guest page tables */ - -typedef enum region_type {GUEST, UNMAPPED, SHARED} region_type_t; - - -typedef struct layout_region { - addr_t start; - addr_t end; - - region_type_t type; - - addr_t host_addr; - - struct layout_region * next; - struct layout_region * prev; -} layout_region_t; - - -typedef struct vmm_mem_layout { - uint_t num_pages; + Guest Shadow Host + Virtual Physical Virtual Physical Virtual Physical + OK OK + OK NOK + NOK OK + NOK NOK + +*/ + +// These are the types of physical memory address regions +// from the perspective of the guest +typedef enum guest_region_type { + GUEST_REGION_PHYSICAL_MEMORY, + GUEST_REGION_NOTHING, + GUEST_REGION_MEMORY_MAPPED_DEVICE} guest_region_type_t; + +// These are the types of physical memory address regions +// from the perspective of the HOST +typedef enum host_region_type { + HOST_REGION_PHYSICAL_MEMORY, + HOST_REGION_UNALLOCATED, + HOST_REGION_NOTHING, + HOST_REGION_MEMORY_MAPPED_DEVICE, + HOST_REGION_REMOTE, + HOST_REGION_SWAPPED, +} host_region_type_t; + + + +typedef struct shadow_map_entry { + guest_region_type_t guest_type; + addr_t guest_start; + addr_t guest_end; + + host_region_type_t host_type; + union host_addr_t { + struct physical_addr { + addr_t host_start; + addr_t host_end; + } phys_addr; + // Other addresses, like on disk, etc, would go here + } host_addr; + struct shadow_map_entry *next, *prev; +} shadow_map_entry_t; + + + +typedef struct shadow_map { uint_t num_regions; - layout_region_t * head; -} vmm_mem_layout_t; - - -/*** FOR THE LOVE OF GOD WRITE SOME UNIT TESTS FOR THIS THING ***/ - -void init_mem_list(vmm_mem_list_t * list); -void free_mem_list(vmm_mem_list_t * list); + shadow_map_entry_t * head; +} shadow_map_t; -int add_mem_list_pages(vmm_mem_list_t * list, addr_t addr, uint_t num_pages); -int remove_mem_list_pages(vmm_mem_list_t * list, addr_t addr, uint_t num_pages); -mem_region_t * get_mem_list_cursor(vmm_mem_list_t * list, addr_t addr); +void init_shadow_map_entry(shadow_map_entry_t *entry, + addr_t guest_addr_start, + addr_t guest_addr_end, + guest_region_type_t guest_region_type, + host_region_type_t host_region_type); -addr_t get_mem_list_addr(vmm_mem_list_t * list, uint_t index); +void init_shadow_map_entry_physical(shadow_map_entry_t *entry, + addr_t guest_addr_start, + addr_t guest_addr_end, + guest_region_type_t guest_region_type, + addr_t host_addr_start, + addr_t host_addr_end, + host_region_type_t host_region_type); + +void init_shadow_map(shadow_map_t *map); +void free_shadow_map(shadow_map_t *map); -void print_mem_list(vmm_mem_list_t * list); +shadow_map_entry_t * get_shadow_map_region_by_addr(shadow_map_t *map, addr_t guest_addr); +shadow_map_entry_t * get_shadow_map_region_by_index(shadow_map_t * map, uint_t index); -void init_mem_layout(vmm_mem_layout_t * layout); -void free_mem_layout(vmm_mem_layout_t * layout); +int map_guest_physical_to_host_physical(shadow_map_entry_t *entry, + addr_t guest_addr, + addr_t *host_addr); -int add_mem_range(vmm_mem_layout_t * layout, layout_region_t * region); -int add_shared_mem_range(vmm_mem_layout_t * layout, addr_t start, addr_t end, addr_t host_addr); -int add_unmapped_mem_range(vmm_mem_layout_t * layout, addr_t start, addr_t end); -int add_guest_mem_range(vmm_mem_layout_t * layout, addr_t start, addr_t end); +// Semantics: +// Adding a region that overlaps with an existing region results is undefined +// and will probably fail +int add_shadow_map_region(shadow_map_t * map, shadow_map_entry_t *entry); +// Semantics: +// Deletions result in splitting +int delete_shadow_map_region(shadow_map_t *map, + addr_t guest_start, + addr_t guest_end); -addr_t get_mem_layout_addr(vmm_mem_layout_t * layout, uint_t index); -layout_region_t * get_mem_layout_region(vmm_mem_layout_t * layout, addr_t addr); -void print_mem_layout(vmm_mem_layout_t * layout); +void print_shadow_map(shadow_map_t * map); diff --git a/palacios/include/geekos/vmm_paging.h b/palacios/include/geekos/vmm_paging.h index 6125d5d..626049c 100644 --- a/palacios/include/geekos/vmm_paging.h +++ b/palacios/include/geekos/vmm_paging.h @@ -1,6 +1,7 @@ #ifndef __VMM_PAGING_H #define __VMM_PAGING_H + #include @@ -8,6 +9,73 @@ #include #include +/* + +In the following, when we say "page table", we mean the whole 2 or 4 layer +page table (PDEs, PTEs), etc. + + +guest-visible paging state + This is the state that the guest thinks the machine is using + It consists of + - guest physical memory + The physical memory addresses the guest is allowed to use + (see shadow page maps, below) + - guest page tables + (we care about when the current one changes) + - guest paging registers (these are never written to hardware) + CR0 + CR3 + + +shadow paging state + This the state that the machine will actually use when the guest + is running. It consists of: + - current shadow page table + This is the page table actually useed when the guest is running. + It is changed/regenerated when the guest page table changes + It mostly reflects the guest page table, except that it restricts + physical addresses to those the VMM allocates to the guest. + - shadow page maps + This is a mapping from guest physical memory addresses to + the current location of the guest physical memory content. + It maps from regions of physical memory addresses to regions + located in physical memory or elsewhere. + (8192,16384) -> MEM(8912,...) + (0,8191) -> DISK(65536,..) + - guest paging registers (these are written to guest state) + CR0 + CR3 + +host paging state + This is the state we expect to be operative when the VMM is running. + Typically, this is set up by the host os into which we have embedded + the VMM, but we include the description here for clarity. + - current page table + This is the page table we use when we are executing in + the VMM (or the host os) + - paging regisers + CR0 + CR3 + + +The reason why the shadow paging state and the host paging state are +distinct is to permit the guest to use any virtual address it wants, +irrespective of the addresses the VMM or the host os use. These guest +virtual addresses are reflected in the shadow paging state. When we +exit from the guest, we switch to the host paging state so that any +virtual addresses that overlap between the guest and VMM/host now map +to the physical addresses epxected by the VMM/host. On AMD SVM, this +switch is done by the hardware. On Intel VT, the switch is done +by the hardware as well, but we are responsible for manually updating +the host state in the vmcs before entering the guest. + + +*/ + + + + #define MAX_PAGE_TABLE_ENTRIES 1024 #define MAX_PAGE_DIR_ENTRIES 1024 @@ -20,8 +88,10 @@ #define PAGE_TABLE_INDEX(x) ((((uint_t)x) >> 12) & 0x3ff) #define PAGE_OFFSET(x) ((((uint_t)x) & 0xfff)) -#define PAGE_ALLIGNED_ADDR(x) (((uint_t) (x)) >> 12) -#define PAGE_ADDR(x) (PAGE_ALLIGNED_ADDR(x) << 12) +#define PAGE_ALIGNED_ADDR(x) (((uint_t) (x)) >> 12) +#ifndef PAGE_ADDR +#define PAGE_ADDR(x) (PAGE_ALIGNED_ADDR(x) << 12) +#endif #define PAGE_POWER 12 @@ -122,10 +192,41 @@ typedef struct pml4e { } pml4e64_t; -vmm_pde_t * generate_guest_page_tables(vmm_mem_layout_t * layout, vmm_mem_list_t * list); -pml4e64_t * generate_guest_page_tables_64(vmm_mem_layout_t * layout, vmm_mem_list_t * list); -void free_guest_page_tables(vmm_pde_t * pde); +typedef enum { PDE32 } page_directory_type_t; + + +typedef struct shadow_paging_state { + // these two reflect the top-level page directory + // of the guest page table + page_directory_type_t guest_page_directory_type; + void *guest_page_directory; // points to guest's current page table + + // This reflects the guest physical to host physical mapping + shadow_map_t shadow_map; + + // these two reflect the top-level page directory + // the shadow page table + page_directory_type_t shadow_page_directory_type; + void *shadow_page_directory; + +} shadow_paging_state_t; + + + +int init_shadow_paging_state(shadow_paging_state_t *state); + +// This function will cause the shadow page table to be deleted +// and rewritten to reflect the guest page table and the shadow map +int wholesale_update_shadow_paging_state(shadow_paging_state_t *state); + +//void free_guest_page_tables(vmm_pde_t * pde); + +//generate_shadow_ + +//vmm_pde_t * generate_guest_page_tables(shadow_map_t * map, vmm_mem_list_t * list); +//pml4e64_t * generate_guest_page_tables_64(shadow_map_t * map, vmm_mem_list_t * list); + void PrintDebugPageTables(vmm_pde_t * pde); diff --git a/palacios/include/geekos/vmm_stubs.h b/palacios/include/geekos/vmm_stubs.h index db960a3..1193b04 100644 --- a/palacios/include/geekos/vmm_stubs.h +++ b/palacios/include/geekos/vmm_stubs.h @@ -12,6 +12,6 @@ void Free_VMM_Page(void * page); void * VMM_Malloc(uint_t size); void VMM_Free(void * addr); - +void * Identity(void *addr) { return addr; }; #endif diff --git a/palacios/include/geekos/vmm_util.h b/palacios/include/geekos/vmm_util.h index 16931b4..25994c3 100644 --- a/palacios/include/geekos/vmm_util.h +++ b/palacios/include/geekos/vmm_util.h @@ -4,7 +4,6 @@ #include - #ifndef PAGE_SIZE #define PAGE_SIZE 4096 #endif @@ -34,6 +33,9 @@ struct VMM_GPRs { }; +#define GET_LOW_32(x) (*((uint_t*)(&(x)))) +#define GET_HIGH_32(x) (*((uint_t*)(((char*)(&(x)))+4))) + void PrintTraceHex(unsigned char x); diff --git a/palacios/src/geekos/main.c b/palacios/src/geekos/main.c index e59be37..65d8d18 100644 --- a/palacios/src/geekos/main.c +++ b/palacios/src/geekos/main.c @@ -3,7 +3,7 @@ * Copyright (c) 2001,2003,2004 David H. Hovemeyer * Copyright (c) 2003, Jeffrey K. Hollingsworth * Copyright (c) 2004, Iulian Neamtiu - * $Revision: 1.20 $ + * $Revision: 1.21 $ * * This is free software. You are permitted to use, * redistribute, and modify it as specified in the file "COPYING". @@ -120,7 +120,13 @@ void BuzzVM() int x; int j; unsigned char init; - + +#if 0 + __asm__ __volatile__ ( + "popf" + ); + +#endif PrintBoth("Starting To Buzz\n"); @@ -247,11 +253,11 @@ void Main(struct Boot_Info* bootInfo) // Init_IDE(); - Print("Done; stalling\n"); + // Print("Done; stalling\n"); -#if 1 +#if 0 SerialPrint("Dumping VM kernel Code (first 128 bytes @ 0x%x)\n", 0x100000); SerialMemDump((unsigned char *)0xfe000, 4096); /* @@ -260,12 +266,21 @@ void Main(struct Boot_Info* bootInfo) */ #endif +#if 0 + SerialPrint("Dumping BIOS code f0000-fffff\n\n"); + SerialMemDump((unsigned char *)0xf0000, 65536); + /* + SerialPrint("Dumping kernel Code (first 512 bytes @ 0x%x)\n",KERNEL_START); + SerialMemDump((unsigned char *)VM_KERNEL_START, 512); + */ +#endif - while (1); +#if 1 SerialPrintLevel(1000,"Launching Noisemaker and keyboard listener threads\n"); key_thread = Start_Kernel_Thread(Keyboard_Listener, (ulong_t)&doIBuzz, PRIORITY_NORMAL, false); spkr_thread = Start_Kernel_Thread(Buzzer, (ulong_t)&doIBuzz, PRIORITY_NORMAL, false); +#endif { struct vmm_os_hooks os_hooks; @@ -285,26 +300,25 @@ void Main(struct Boot_Info* bootInfo) os_hooks.free_page = &Free_VMM_Page; os_hooks.malloc = &VMM_Malloc; os_hooks.free = &VMM_Free; + os_hooks.virtual_to_physical=&Identity; + os_hooks.physical_to_virtual=&Identity; + // DumpGDT(); Init_VMM(&os_hooks, &vmm_ops); - init_mem_layout(&(vm_info.mem_layout)); - init_mem_list(&(vm_info.mem_list)); + init_shadow_paging_state(&(vm_info.shadow_paging_state)); + + init_vmm_io_map(&(vm_info.io_map)); - add_mem_list_pages(&(vm_info.mem_list), vm_range_start, (vm_range_end - vm_range_start) / PAGE_SIZE); - // add_unmapped_mem_range(&(vm_info.mem_layout), 0, 256); - //add_shared_mem_range(&(vm_info.mem_layout), guest_kernel_start, (guest_kernel_end - guest_kernel_start) / PAGE_SIZE, guest_kernel_start); - //add_guest_mem_range(&(vm_info.mem_layout), guest_kernel_end, 20); - if (0) { // add_shared_mem_range(&(vm_info.mem_layout), 0, 0x800000, 0x10000); - add_shared_mem_range(&(vm_info.mem_layout), 0, 0x1000000, 0); + //add_shared_mem_range(&(vm_info.mem_layout), 0, 0x1000000, 0); rip = (ulong_t)(void*)&BuzzVM; // rip -= 0x10000; @@ -318,12 +332,20 @@ void Main(struct Boot_Info* bootInfo) } else { //add_shared_mem_range(&(vm_info.mem_layout), 0x0, 0x1000, 0x100000); - add_shared_mem_range(&(vm_info.mem_layout), 0x0, 0x100000, 0x0); + // add_shared_mem_range(&(vm_info.mem_layout), 0x0, 0x100000, 0x0); + shadow_map_entry_t *ent = Malloc(sizeof(shadow_map_entry_t));; + init_shadow_map_entry_physical(ent,0,0x100000,GUEST_REGION_PHYSICAL_MEMORY, + 0,0x100000,HOST_REGION_PHYSICAL_MEMORY); + add_shadow_map_region(&(vm_info.shadow_paging_state.shadow_map),ent); hook_io_port(&(vm_info.io_map), 0x61, &IO_Read, &IO_Write); - - vm_info.rip = 0xff00; + /* + vm_info.cr0 = 0; + vm_info.cs.base=0xf000; + vm_info.cs.limit=0xffff; + */ + vm_info.rip = 0xfff0; vm_info.rsp = 0x0; } diff --git a/palacios/src/geekos/screen.c b/palacios/src/geekos/screen.c index b43dbf4..c952b42 100644 --- a/palacios/src/geekos/screen.c +++ b/palacios/src/geekos/screen.c @@ -1,7 +1,7 @@ /* * GeekOS text screen output * Copyright (c) 2001,2003,2004 David H. Hovemeyer - * $Revision: 1.2 $ + * $Revision: 1.3 $ * * This is free software. You are permitted to use, * redistribute, and modify it as specified in the file "COPYING". @@ -398,6 +398,34 @@ void Init_Screen(void) s_cons.currentAttr = DEFAULT_ATTRIBUTE; Clear_Screen(); +#if 0 + { + unsigned int z = (unsigned int)&Print_Emit; + int i; + Put_Char(' '); + Put_Char('0'); + Put_Char('x'); + + for (i = 0; i < 8; i++) { + int j = z & 0xf0000000; + + j = j >> 28; + j &= 0x0000000f; + + if (j > 9) { + j += 55; + } else { + j += 48; + } + + Put_Char(j); + + z = z << 4; + } + } + +#endif + End_Int_Atomic(iflag); Print("Screen Inited\n"); } diff --git a/palacios/src/geekos/svm.c b/palacios/src/geekos/svm.c index a3d9c89..2c865b3 100644 --- a/palacios/src/geekos/svm.c +++ b/palacios/src/geekos/svm.c @@ -106,11 +106,11 @@ int init_svm_guest(struct guest_info *info) { PrintDebug("Generating Guest nested page tables\n"); - print_mem_list(&(info->mem_list)); - print_mem_layout(&(info->mem_layout)); + // print_mem_list(&(info->mem_list)); + //print_mem_layout(&(info->mem_layout)); info->page_tables = NULL; //info->page_tables = generate_guest_page_tables_64(&(info->mem_layout), &(info->mem_list)); - info->page_tables = generate_guest_page_tables(&(info->mem_layout), &(info->mem_list)); + //info->page_tables = generate_guest_page_tables(&(info->mem_layout), &(info->mem_list)); //PrintDebugPageTables(info->page_tables); diff --git a/palacios/src/geekos/vmm_mem.c b/palacios/src/geekos/vmm_mem.c index 1071b74..20899b0 100644 --- a/palacios/src/geekos/vmm_mem.c +++ b/palacios/src/geekos/vmm_mem.c @@ -5,205 +5,44 @@ extern struct vmm_os_hooks * os_hooks; -void init_mem_list(vmm_mem_list_t * list) { - list->num_pages = 0; - list->long_mode = false; - - list->num_regions = 0; - list->head = NULL; +void init_shadow_map_entry(shadow_map_entry_t *entry, + addr_t guest_addr_start, + addr_t guest_addr_end, + guest_region_type_t guest_region_type, + host_region_type_t host_region_type) +{ + entry->guest_type=guest_region_type; + entry->guest_start=guest_addr_start; + entry->guest_end=guest_addr_end; + entry->host_type=host_region_type; + entry->next=entry->prev=NULL; } - -void free_mem_list(vmm_mem_list_t * list) { - mem_region_t * cursor = list->head; - mem_region_t * tmp = NULL; - - while(cursor) { - tmp = cursor; - cursor = cursor->next; - VMMFree(tmp); - } - - VMMFree(list); -} - -/*** FOR THE LOVE OF GOD WRITE SOME UNIT TESTS FOR THIS THING ***/ - - - -// Scan the current list, and extend an existing region if one exists -// Otherwise create a new region and merge it into the correct location in the list -// -// We scan to find the position at which to add the new region and insert it -// Then we clean up any region following the new region that overlaps -// -// JRL: This is pretty hairy... -int add_mem_list_pages(vmm_mem_list_t * list, addr_t addr, uint_t num_pages) { - - uint_t num_new_pages = num_pages; - addr_t new_end = addr + (num_pages * PAGE_SIZE) - 1; - - mem_region_t * cursor = get_mem_list_cursor(list, addr); - - - // PrintDebug("Adding: 0x%x - 0x%x\n", addr, num_pages * PAGE_SIZE); - - - // Make a new region at the head of the list - if (cursor == NULL) { - cursor = os_hooks->malloc(sizeof(mem_region_t)); - - cursor->prev = NULL; - cursor->addr = addr; - cursor->num_pages = num_pages; - - cursor->next = list->head; - list->head = cursor; - - if (cursor->next) { - cursor->next->prev = cursor; - } - - list->num_regions++; - } else { - addr_t cursor_end = cursor->addr + (cursor->num_pages * PAGE_SIZE) - 1; - - if (addr > cursor_end + 1) { - // address falls after cursor region - - mem_region_t * new_region = os_hooks->malloc(sizeof(mem_region_t)); - - new_region->prev = cursor; - new_region->next = cursor->next; - - if (cursor->next) { - cursor->next->prev = new_region; - } - cursor->next = new_region; - - new_region->addr = addr; - new_region->num_pages = num_pages; - - list->num_regions++; - - cursor = new_region; - } else if ((addr >= cursor->addr) && - (addr <= cursor_end + 1)) { - // address falls inside the cursor region - - - // The region has already been added - if (new_end <= cursor_end) { - return -1; - } - - // We need to extend the old region - num_new_pages = (new_end - cursor_end) / PAGE_SIZE; - cursor->num_pages += num_new_pages; - - } - } - - - // Clean up any overlaps that follow - while ((cursor->next) && (cursor->next->addr <= new_end + 1)) { - mem_region_t * overlap = cursor->next; - addr_t overlap_end = overlap->addr + (overlap->num_pages * PAGE_SIZE) - 1; - - cursor->next = overlap->next; - if (overlap->next) { - overlap->next->prev = cursor; - } - - if (overlap_end > new_end) { - uint_t extension = (overlap_end - new_end) / PAGE_SIZE; - - cursor->num_pages += extension; - num_new_pages -= (overlap->num_pages - extension); - } else { - num_new_pages -= overlap->num_pages; - } - - VMMFree(overlap); - - list->num_regions--; - } - - - list->num_pages += num_new_pages; - - return 0; -} - - -/* this function returns a pointer to the location in the memory list that - * corresponds to addr. - * Rules: - * IF addr is in a region, a ptr to that region is returned - * IF addr is not in a region, a ptr to the previous region is returned - * IF addr is before all regions, returns NULL - * IF list is empty, returns NULL - */ -mem_region_t * get_mem_list_cursor(vmm_mem_list_t * list, addr_t addr) { - mem_region_t * prev_region = list->head; - - while (prev_region != NULL) { - if ( (addr >= prev_region->addr) && - (addr < (prev_region->addr + (prev_region->num_pages * PAGE_SIZE) - 1)) ) { - return prev_region; - } else if (addr < prev_region->addr) { - // If this region is the current head, then this should return NULL - return prev_region->prev; - } else if (addr >= (prev_region->addr + (prev_region->num_pages * PAGE_SIZE))) { - if (prev_region->next) { - prev_region = prev_region->next; - } else { - return prev_region; - } - } - } - - return prev_region; -} - - - -/* Returns the page address of page number 'index' in the memory list - * If index is out of bounds... returns -1 (an invalid page address) - */ -addr_t get_mem_list_addr(vmm_mem_list_t * list, uint_t index) { - mem_region_t * reg = list->head; - uint_t i = index; - - // Memory List overrun - if (index > list->num_pages - 1) { - return -1; - } - - while (i >= 0) { - if (reg->num_pages <= index) { - i -= reg->num_pages; - reg = reg->next; - } else { - return reg->addr + (i * PAGE_SIZE); - } - } - - return -1; +void init_shadow_map_entry_physical(shadow_map_entry_t *entry, + addr_t guest_addr_start, + addr_t guest_addr_end, + guest_region_type_t guest_region_type, + addr_t host_addr_start, + addr_t host_addr_end, + host_region_type_t host_region_type) +{ + init_shadow_map_entry(entry,guest_addr_start,guest_addr_end,guest_region_type,host_region_type); + entry->host_addr.phys_addr.host_start=host_addr_start; + entry->host_addr.phys_addr.host_end=host_addr_end; } + +void init_shadow_map(shadow_map_t * map) +{ + map->num_regions = 0; -void init_mem_layout(vmm_mem_layout_t * layout) { - layout->num_pages = 0; - layout->num_regions = 0; - - layout->head = NULL; + map->head = NULL; } -void free_mem_layout(vmm_mem_layout_t * layout) { - layout_region_t * cursor = layout->head; - layout_region_t * tmp = NULL; +void free_shadow_map(shadow_map_t * map) { + shadow_map_entry_t * cursor = map->head; + shadow_map_entry_t * tmp = NULL; while(cursor) { tmp = cursor; @@ -211,208 +50,165 @@ void free_mem_layout(vmm_mem_layout_t * layout) { VMMFree(tmp); } - VMMFree(layout); + VMMFree(map); } -/* This is slightly different semantically from the mem list, in that we don't allow overlaps - * we could probably allow overlappig regions of the same type... but I'll let someone else deal with that +/* This is slightly different semantically from the mem list, in that + * we don't allow overlaps we could probably allow overlappig regions + * of the same type... but I'll let someone else deal with that */ -int add_mem_range(vmm_mem_layout_t * layout, layout_region_t * region) { - layout_region_t * cursor = layout->head; +int add_shadow_map_region(shadow_map_t * map, + shadow_map_entry_t * region) +{ + shadow_map_entry_t * cursor = map->head; - if ((!cursor) || (cursor->start >= region->end)) { + if ((!cursor) || (cursor->guest_start >= region->guest_end)) { region->prev = NULL; region->next = cursor; - layout->num_pages += (region->end - region->start) / PAGE_SIZE; - layout->num_regions++; - layout->head = region; - + map->num_regions++; + map->head = region; return 0; } while (cursor) { // Check if it overlaps with the current cursor - if ((cursor->end > region->start) && (cursor->start < region->start)) { + if ((cursor->guest_end > region->guest_start) && (cursor->guest_start < region->guest_start)) { // overlaps not allowed return -1; } - // add to the end of the list if (!(cursor->next)) { + // add to the end of the list cursor->next = region; region->prev = cursor; - layout->num_regions++; - layout->num_pages += (region->end - region->start) / PAGE_SIZE; + region->next = NULL; + map->num_regions++; return 0; - } else if (cursor->next->start >= region->end) { + } else if (cursor->next->guest_start >= region->guest_end) { // add here region->next = cursor->next; region->prev = cursor; - + cursor->next->prev = region; cursor->next = region; - layout->num_regions++; - layout->num_pages += (region->end - region->start) / PAGE_SIZE; + map->num_regions++; return 0; - } else if (cursor->next->end < region->start) { + } else if (cursor->next->guest_end < region->guest_start) { cursor = cursor->next; } else { + // This cannot happen! + // we should panic here return -1; } } - + + // This cannot happen + // We should panic here return -1; } - - - -int add_shared_mem_range(vmm_mem_layout_t * layout, addr_t start, addr_t end, addr_t host_addr) { - layout_region_t * shared_region = os_hooks->malloc(sizeof(layout_region_t)); - int ret; - - shared_region->next = NULL; - shared_region->prev = NULL; - shared_region->start = start; - shared_region->end = end; - shared_region->type = SHARED; - shared_region->host_addr = host_addr; - - ret = add_mem_range(layout, shared_region); - - if (ret != 0) { - VMMFree(shared_region); - } - - return ret; -} - -int add_unmapped_mem_range(vmm_mem_layout_t * layout, addr_t start, addr_t end) { - layout_region_t * unmapped_region = os_hooks->malloc(sizeof(layout_region_t)); - int ret; - - unmapped_region->next = NULL; - unmapped_region->prev = NULL; - unmapped_region->start = start; - unmapped_region->end = end; - unmapped_region->type = UNMAPPED; - unmapped_region->host_addr = 0; - - ret = add_mem_range(layout, unmapped_region); - - if (ret != 0) { - VMMFree(unmapped_region); - } - - return ret; -} - -int add_guest_mem_range(vmm_mem_layout_t * layout, addr_t start, addr_t end) { - layout_region_t * guest_region = os_hooks->malloc(sizeof(layout_region_t)); - int ret; - - guest_region->next = NULL; - guest_region->prev = NULL; - guest_region->start = start; - guest_region->end = end; - guest_region->type = GUEST; - guest_region->host_addr = 0; - - ret = add_mem_range(layout, guest_region); - - if (ret != 0) { - VMMFree(guest_region); - } - - return ret; +int delete_shadow_map_region(shadow_map_t *map, + addr_t guest_start, + addr_t guest_end) +{ + return -1; } -/* Returns the page address of page number 'index' in the memory list - * If index is out of bounds... returns -1 (an invalid page address) - */ -addr_t get_mem_layout_addr(vmm_mem_layout_t * layout, uint_t index) { - layout_region_t * reg = layout->head; - uint_t i = index; +shadow_map_entry_t *get_shadow_map_region_by_index(shadow_map_t * map, + uint_t index) +{ + shadow_map_entry_t * reg = map->head; + uint_t i = 0; - // Memory List overrun - if (index > layout->num_pages - 1) { - return -1; - } - - while (i >= 0) { - if (!reg) { - return -1; - } - - int num_reg_pages = reg->end - reg->start; - - if (num_reg_pages <= index) { - i -= num_reg_pages; - reg = reg->next; - } else { - return reg->start + (i * PAGE_SIZE); + while (reg) { + if (i==index) { + return reg; } + reg=reg->next; + i++; } - - return -1; + return NULL; } -layout_region_t * get_mem_layout_region(vmm_mem_layout_t * layout, addr_t addr) { - layout_region_t * tmp_reg = layout->head; +shadow_map_entry_t * get_shadow_map_region_by_addr(shadow_map_t *map, + addr_t addr) +{ + shadow_map_entry_t * reg = map->head; - while (tmp_reg) { - if ((tmp_reg->start <= addr) && (tmp_reg->end > addr)) { - return tmp_reg; - } else if (tmp_reg->start > addr) { + + while (reg) { + if ((reg->guest_start <= addr) && (reg->guest_end > addr)) { + return reg; + } else if (reg->guest_start > addr) { return NULL; } else { - tmp_reg = tmp_reg->next; + reg = reg->next; } } - return NULL; } +int map_guest_physical_to_host_physical(shadow_map_entry_t *entry, + addr_t guest_addr, + addr_t *host_addr) +{ + if (!(guest_addr>=entry->guest_start && guest_addrguest_end)) { + return -1; + } -void print_mem_list(vmm_mem_list_t * list) { - mem_region_t * cur = list->head; - int i = 0; - - PrintDebug("Memory Region List (regions: %d) (pages: %d)\n", list->num_regions, list->num_pages); - - while (cur) { - PrintDebug("%d: 0x%x - 0x%x\n", i, cur->addr, cur->addr + (cur->num_pages * PAGE_SIZE) - 1); - cur = cur->next; - i++; + switch (entry->host_type) { + case HOST_REGION_PHYSICAL_MEMORY: + case HOST_REGION_MEMORY_MAPPED_DEVICE: + case HOST_REGION_UNALLOCATED: + *host_addr=(guest_addr-entry->guest_start) + entry->host_addr.phys_addr.host_start; + return 0; + break; + default: + return -1; + break; } - PrintDebug("\n"); } - -void print_mem_layout(vmm_mem_layout_t * layout) { - layout_region_t * cur = layout->head; +void print_shadow_map(shadow_map_t *map) { + shadow_map_entry_t * cur = map->head; int i = 0; - PrintDebug("Memory Layout (regions: %d) (pages: %d)\n", layout->num_regions, layout->num_pages); + PrintDebug("Memory Layout (regions: %d) \n", map->num_regions); while (cur) { - PrintDebug("%d: 0x%x - 0x%x\n", i, cur->start, cur->end -1); + PrintDebug("%d: 0x%x - 0x%x (%s) -> ", i, cur->guest_start, cur->guest_end -1, + cur->guest_type == GUEST_REGION_PHYSICAL_MEMORY ? "GUEST_REGION_PHYSICAL_MEMORY" : + cur->guest_type == GUEST_REGION_NOTHING ? "GUEST_REGION_NOTHING" : + cur->guest_type == GUEST_REGION_MEMORY_MAPPED_DEVICE ? "GUEST_REGION_MEMORY_MAPPED_DEVICE" : + "UNKNOWN"); + if (cur->host_type==HOST_REGION_PHYSICAL_MEMORY || + cur->host_type==HOST_REGION_UNALLOCATED || + cur->host_type==HOST_REGION_MEMORY_MAPPED_DEVICE) { + PrintDebug("0x%x - 0x%x ", cur->host_addr.phys_addr.host_start, cur->host_addr.phys_addr.host_end); + } + PrintDebug("(%s)\n", + cur->host_type == HOST_REGION_PHYSICAL_MEMORY ? "HOST_REGION_PHYSICAL_MEMORY" : + cur->host_type == HOST_REGION_UNALLOCATED ? "HOST_REGION_UNALLOACTED" : + cur->host_type == HOST_REGION_NOTHING ? "HOST_REGION_NOTHING" : + cur->host_type == HOST_REGION_MEMORY_MAPPED_DEVICE ? "HOST_REGION_MEMORY_MAPPED_DEVICE" : + cur->host_type == HOST_REGION_REMOTE ? "HOST_REGION_REMOTE" : + cur->host_type == HOST_REGION_SWAPPED ? "HOST_REGION_SWAPPED" : + "UNKNOWN"); cur = cur->next; i++; } - PrintDebug("\n"); } @@ -424,7 +220,6 @@ void print_mem_layout(vmm_mem_layout_t * layout) { - #ifdef VMM_MEM_TEST diff --git a/palacios/src/geekos/vmm_paging.c b/palacios/src/geekos/vmm_paging.c index 2a8575f..4c1469d 100644 --- a/palacios/src/geekos/vmm_paging.c +++ b/palacios/src/geekos/vmm_paging.c @@ -6,9 +6,215 @@ extern struct vmm_os_hooks * os_hooks; +void delete_page_tables_pde32(vmm_pde_t * pde) { + int i, j; + + if (pde==NULL) { + return ; + } + + for (i = 0; (i < MAX_PAGE_DIR_ENTRIES); i++) { + if (pde[i].present) { + vmm_pte_t * pte = (vmm_pte_t *)(pde[i].pt_base_addr << PAGE_POWER); + + for (j = 0; (j < MAX_PAGE_TABLE_ENTRIES); j++) { + if ((pte[j].present) && (pte[j].vmm_info & GUEST_PAGE)){ + os_hooks->free_page((void *)(pte[j].page_base_addr << PAGE_POWER)); + } + } + + os_hooks->free_page(pte); + } + } + + os_hooks->free_page(pde); +} + + +int init_shadow_paging_state(shadow_paging_state_t *state) +{ + state->guest_page_directory_type=state->shadow_page_directory_type=PDE32; + + state->guest_page_directory=state->shadow_page_directory=NULL; + + init_shadow_map(&(state->shadow_map)); + return 0; +} + + +int wholesale_update_shadow_paging_state(shadow_paging_state_t *state) +{ + unsigned i, j; + vmm_pde_t *cur_guest_pde, *cur_shadow_pde; + vmm_pte_t *cur_guest_pte, *cur_shadow_pte; + + // For now, we'll only work with PDE32 + if (state->guest_page_directory_type!=PDE32) { + return -1; + } + + cur_shadow_pde=(vmm_pde_t*)(state->shadow_page_directory); + + cur_guest_pde = (vmm_pde_t*)(os_hooks->physical_to_virtual(state->guest_page_directory)); + + // Delete the current page table + delete_page_tables_pde32(cur_shadow_pde); + + cur_shadow_pde = os_hooks->allocate_pages(1); + + state->shadow_page_directory = cur_shadow_pde; + state->shadow_page_directory_type=PDE32; + + + for (i=0;ishadow_map),guest_addr); + + if (!ent) { + // FIXME Panic here - guest is trying to map to physical memory + // it does not own in any way! + return -1; + } + + // FIXME Bounds check here to see if it's trying to trick us + + switch (ent->host_type) { + case HOST_REGION_PHYSICAL_MEMORY: + // points into currently allocated physical memory, so we just + // set up the shadow to point to the mapped location + if (map_guest_physical_to_host_physical(ent,guest_addr,&host_addr)) { + // Panic here + return -1; + } + cur_shadow_pde[i].pt_base_addr = PAGE_ALIGNED_ADDR(host_addr); + // FIXME set vmm_info bits here + break; + case HOST_REGION_UNALLOCATED: + // points to physical memory that is *allowed* but that we + // have not yet allocated. We mark as not present and set a + // bit to remind us to allocate it later + cur_shadow_pde[i].present=0; + // FIXME Set vminfo bits here so that we know that we will be + // allocating it later + break; + case HOST_REGION_NOTHING: + // points to physical memory that is NOT ALLOWED. + // We will mark it as not present and set a bit to remind + // us that it's bad later and insert a GPF then + cur_shadow_pde[i].present=0; + break; + case HOST_REGION_MEMORY_MAPPED_DEVICE: + case HOST_REGION_REMOTE: + case HOST_REGION_SWAPPED: + default: + // Panic. Currently unhandled + return -1; + break; + } + } else { + addr_t host_addr; + addr_t guest_addr; + + // small page - set PDE and follow down to the child table + cur_shadow_pde[i] = cur_guest_pde[i]; + + // Allocate a new second level page table for the shadow + cur_shadow_pte = os_hooks->allocate_pages(1); + + // make our first level page table in teh shadow point to it + cur_shadow_pde[i].pt_base_addr = PAGE_ALIGNED_ADDR(cur_shadow_pte); + + shadow_map_entry_t *ent; + + guest_addr=PAGE_ADDR(cur_guest_pde[i].pt_base_addr); + + ent = get_shadow_map_region_by_addr(&(state->shadow_map),guest_addr); + + if (!ent) { + // FIXME Panic here - guest is trying to map to physical memory + // it does not own in any way! + return -1; + } + + // Address of the relevant second level page table in the guest + if (map_guest_physical_to_host_physical(ent,guest_addr,&host_addr)) { + // Panic here + return -1; + } + // host_addr now contains the host physical address for the guest's 2nd level page table + + // Now we transform it to relevant virtual address + cur_guest_pte = os_hooks->physical_to_virtual((void*)host_addr); + // Now we walk through the second level guest page table + // and clone it into the shadow + for (j=0;jshadow_map),guest_addr); + + if (!ent) { + // FIXME Panic here - guest is trying to map to physical memory + // it does not own in any way! + return -1; + } + + switch (ent->host_type) { + case HOST_REGION_PHYSICAL_MEMORY: + // points into currently allocated physical memory, so we just + // set up the shadow to point to the mapped location + if (map_guest_physical_to_host_physical(ent,guest_addr,&host_addr)) { + // Panic here + return -1; + } + cur_shadow_pte[j].page_base_addr = PAGE_ALIGNED_ADDR(host_addr); + // FIXME set vmm_info bits here + break; + case HOST_REGION_UNALLOCATED: + // points to physical memory that is *allowed* but that we + // have not yet allocated. We mark as not present and set a + // bit to remind us to allocate it later + cur_shadow_pte[j].present=0; + // FIXME Set vminfo bits here so that we know that we will be + // allocating it later + break; + case HOST_REGION_NOTHING: + // points to physical memory that is NOT ALLOWED. + // We will mark it as not present and set a bit to remind + // us that it's bad later and insert a GPF then + cur_shadow_pte[j].present=0; + break; + case HOST_REGION_MEMORY_MAPPED_DEVICE: + case HOST_REGION_REMOTE: + case HOST_REGION_SWAPPED: + default: + // Panic. Currently unhandled + return -1; + break; + } + } + } + } + return 0; +} + +#if 0 /* We generate a page table to correspond to a given memory layout * pulling pages from the mem_list when necessary * If there are any gaps in the layout, we add them as unmapped pages @@ -124,27 +330,8 @@ vmm_pde_t * generate_guest_page_tables(vmm_mem_layout_t * layout, vmm_mem_list_t return pde; } +#endif -void free_guest_page_tables(vmm_pde_t * pde) { - int i, j; - - - for (i = 0; (i < MAX_PAGE_DIR_ENTRIES); i++) { - if (pde[i].present) { - vmm_pte_t * pte = (vmm_pte_t *)(pde[i].pt_base_addr << PAGE_POWER); - - for (j = 0; (j < MAX_PAGE_TABLE_ENTRIES); j++) { - if ((pte[j].present) && (pte[j].vmm_info & GUEST_PAGE)){ - os_hooks->free_page((void *)(pte[j].page_base_addr << PAGE_POWER)); - } - } - - os_hooks->free_page(pte); - } - } - - os_hooks->free_page(pde); -} @@ -217,7 +404,7 @@ void PrintDebugPageTables(vmm_pde_t * pde) - +#if 0 pml4e64_t * generate_guest_page_tables_64(vmm_mem_layout_t * layout, vmm_mem_list_t * list) { pml4e64_t * pml = os_hooks->allocate_pages(1); @@ -409,3 +596,5 @@ pml4e64_t * generate_guest_page_tables_64(vmm_mem_layout_t * layout, vmm_mem_lis } return pml; } + +#endif