From: Oscar Mondragon <omondrag@cs.unm.edu>
Date: Wed, 6 Feb 2013 17:50:52 +0000 (-0700)
Subject: Initial commit of new scheduling infrastructure. The EDF scheduler does not
X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=df510a069624b48dda1ad66368954f028757b1bc;p=palacios.releases.git

Initial commit of new scheduling infrastructure. The EDF scheduler does not
yet work, Linux/Kitten hooks still need to be added for sleep/wakeup, and
we still need a mechanism to specify options to Palacios when it starts up.
it is booted.
---

diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h
index 5e18244..66ac526 100644
--- a/palacios/include/palacios/vm_guest.h
+++ b/palacios/include/palacios/vm_guest.h
@@ -41,6 +41,7 @@
 #include <palacios/vmm_timeout.h>
 #include <palacios/vmm_exits.h>
 #include <palacios/vmm_events.h>
+#include <palacios/vmm_scheduler.h>
 
 #include <palacios/vmm_perftune.h>
 
@@ -73,6 +74,7 @@ struct guest_info {
 
     struct vm_core_time time_state;
     struct v3_core_timeouts timeouts;
+    void * sched_priv_data;
 
     v3_paging_mode_t shdw_pg_mode;
     struct v3_shdw_pg_state shdw_pg_state;
@@ -159,6 +161,7 @@ struct v3_vm_info {
     struct v3_mem_hooks mem_hooks;
 
     struct v3_shdw_impl_state shdw_impl;
+    void * sched_priv_data;
 
     struct v3_io_map io_map;
     struct v3_msr_map msr_map;
@@ -207,6 +210,8 @@ struct v3_vm_info {
 
     int num_cores;
 
+    int avail_cores; // Available logical cores
+
     // JRL: This MUST be the last entry...
     struct guest_info cores[0];
 };
diff --git a/palacios/include/palacios/vmm_scheduler.h b/palacios/include/palacios/vmm_scheduler.h
new file mode 100644
index 0000000..efe47fa
--- /dev/null
+++ b/palacios/include/palacios/vmm_scheduler.h
@@ -0,0 +1,64 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Oscar Mondragon <omondrag@cs.unm.edu>
+ *         Patrick G. Bridges <bridges@cs.unm.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __VMM_SCHEDULER_H__
+#define __VMM_SCHEDULER_H__
+
+struct vm_scheduler_impl {
+	char *name;
+	int (*init)();
+	int (*deinit)();
+	int (*vm_init)(struct v3_vm_info *vm);
+	int (*vm_deinit)(struct v3_vm_info *vm);
+	int (*core_init)(struct guest_info *vm);
+	int (*core_deinit)(struct guest_info *vm);
+	void (*schedule)(struct guest_info *vm);
+	void (*yield)(struct guest_info *vm, int usec);
+	int (*admit)(struct v3_vm_info *vm);
+	int (*remap)(struct v3_vm_info *vm);
+	int (*dvfs)(struct v3_vm_info *vm);
+};
+
+struct vm_sched_state {
+	struct vm_scheduler *sched;
+	void *priv_data;
+};
+
+struct vm_core_sched_state {
+	struct vm_scheduler *sched;
+	void *priv_data;
+};
+
+void v3_schedule(struct guest_info *core);
+void v3_yield(struct guest_info *core, int usec);
+
+int v3_scheduler_register_vm(struct v3_vm_info *vm);
+int v3_scheduler_register_core(struct guest_info *vm); /* ? */
+int v3_scheduler_admit_vm(struct v3_vm_info *vm);
+
+void v3_scheduler_remap_notify(struct v3_vm_info *vm);
+void v3_scheduler_dvfs_notify(struct v3_vm_info *vm);
+
+int V3_init_scheduling();
+int v3_register_scheduler(struct vm_scheduler_impl *vm);
+struct vm_scheduler_impl *v3_scheduler_lookup(char *name);
+int V3_enable_scheduler();
+
+#endif /* __VMM_SCHEDULER_H__ */
diff --git a/palacios/src/extensions/Kconfig b/palacios/src/extensions/Kconfig
index 6b497b9..a8beae6 100644
--- a/palacios/src/extensions/Kconfig
+++ b/palacios/src/extensions/Kconfig
@@ -31,4 +31,10 @@ config EXT_VMWARE
 	help
 	  Provides a VMWare persona to allow TSC calibration
 
+config EXT_SCHED_EDF
+	bool "EDF Real-time Scheduler"
+	default n
+	help
+	  Provides a full real-time EDF scheduler for VM cores
+
 endmenu
diff --git a/palacios/src/extensions/Makefile b/palacios/src/extensions/Makefile
index 24d68c3..7f4b5cb 100644
--- a/palacios/src/extensions/Makefile
+++ b/palacios/src/extensions/Makefile
@@ -5,3 +5,4 @@ obj-$(V3_CONFIG_EXT_VTIME) += ext_vtime.o
 obj-$(V3_CONFIG_EXT_INSPECTOR) += ext_inspector.o
 obj-$(V3_CONFIG_EXT_MACH_CHECK) += ext_mcheck.o
 obj-$(V3_CONFIG_EXT_VMWARE) += ext_vmware.o
+obj-$(V3_CONFIG_EXT_SCHED_EDF) += ext_sched_edf.o
diff --git a/palacios/src/extensions/ext_sched_edf.c b/palacios/src/extensions/ext_sched_edf.c
new file mode 100644
index 0000000..0c111f5
--- /dev/null
+++ b/palacios/src/extensions/ext_sched_edf.c
@@ -0,0 +1,694 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Oscar Mondragon <omondrag@cs.unm.edu>
+ *         Patrick G. Bridges <bridges@cs.unm.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+
+#include <palacios/vmm.h>
+#include <palacios/vmm_time.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_hashtable.h>
+#include <palacios/vmm_config.h>
+#include <palacios/vmm_extensions.h>
+#include <palacios/vmm_edf_sched.h>
+
+
+
+#ifndef V3_CONFIG_DEBUG_EDF_SCHED
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+/* Overview 
+ *
+ * EDF Scheduling
+ *
+ * The EDF scheduler uses a dynamic calculated priority as scheduling criteria to choose
+ * what thread will be scheduled.That priority is calculated according with the relative 
+ * deadline of the threads that are ready to run in the runqueue. This runqueue is a per-logical
+ * core data structure used to keep the runnable virtual cores (threads) allocated to that 
+ * logical core.The threads with less time before its deadline will receive better priorities. 
+ * The runqueue is sorted each time that a vCPU becomes runnable. At that time the vCPU is 
+ * enqueue and a new scheduling decision is taken. Each time a vCPU is scheduled, the parameter
+ * slice used time is set to zero and the current deadline is calculated using its period. Once
+ * the vCPU uses the logical core for slice seconds, that vCPU sleeps until its next scheduling 
+ * period (when is re-inserted in the runqueue) and  yields the CPU to allow the scheduling 
+ * of the vCPU with best priority in the runqueue. 
+ */
+
+// Default configuration values for the EDF Scheduler
+// time parameters in microseconds 
+
+#define MAX_PERIOD 1000000000
+#define MIN_PERIOD 50000
+#define MAX_SLICE 1000000000
+#define MIN_SLICE 10000
+#define CPU_PERCENT 100
+
+
+/*
+ * init_edf_config: Initialize scheduler configuration
+ */
+
+static void 
+init_edf_config(struct vm_edf_sched_config *edf_config){
+
+    edf_config->min_slice = MIN_SLICE;
+    edf_config->max_slice = MAX_SLICE;
+    edf_config->min_period = MIN_PERIOD;
+    edf_config->max_period = MAX_PERIOD;
+    edf_config->cpu_percent = CPU_PERCENT;
+}
+
+
+/*
+ * edf_sched_init: Initialize the run queue
+ */
+
+int 
+edf_sched_init(struct v3_vm_info *vm){
+
+    PrintDebug(vm, VCORE_NONE,"EDF Sched. Initializing vm %s\n", vm->name);
+
+    struct vm_sched_state *sched_state = &vm->sched; 
+    sched_state->priv_data = V3_Malloc( vm->avail_cores * sizeof(struct vm_edf_rq));
+
+    if (!sched_state->priv_data) {
+	PrintError(vm, VCORE_NONE,"Cannot allocate in priv_data in edf_sched_init\n");
+	return -1;
+    }
+
+    int lcore = 0;
+  
+    PrintDebug(vm, VCORE_NONE,"EDF Sched. edf_sched_init. Available cores %d\n", vm->avail_cores);
+
+    for(lcore = 0; lcore < vm->avail_cores ; lcore++){
+
+        PrintDebug(vm, VCORE_NONE,"EDF Sched. edf_sched_init. Initializing logical core %d\n", lcore);
+
+        struct vm_edf_rq * edf_rq_list =   (struct vm_edf_rq *) sched_state->priv_data;
+        struct vm_edf_rq * edf_rq = &edf_rq_list[lcore];
+    
+        edf_rq->vCPUs_tree = RB_ROOT;
+        edf_rq->cpu_u=0;
+        edf_rq->nr_vCPU=0;
+        edf_rq->curr_vCPU=NULL;
+        edf_rq->rb_leftmost=NULL;
+        edf_rq->last_sched_time=0;
+        init_edf_config(&edf_rq->edf_config);
+
+    }
+ 
+   return 0;
+   
+}
+
+
+/*
+ * is_admissible_core: Decides if a core is admited to the red black tree according with 
+ * the admisibility formula.
+ */
+
+static bool 
+is_admissible_core(struct vm_core_edf_sched * new_sched_core, struct vm_edf_rq *runqueue){
+
+    int curr_utilization = runqueue->cpu_u;
+    int new_utilization = curr_utilization + (100 * new_sched_core->slice / new_sched_core->period);
+    int cpu_percent = (runqueue->edf_config).cpu_percent; 
+
+    if (new_utilization <= cpu_percent)
+        return true;
+    else
+	return false;    
+
+}
+
+
+/*
+ * count_cores: Function useful to count the number of cores in a runqueue (Not used for now)
+ *
+ */
+
+
+/*static int count_cores(struct vm_edf_rq *runqueue){
+
+  struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
+  struct vm_core_edf_sched *curr_core;
+  int number_cores = 0;    
+
+    while(node){
+        
+        curr_core = container_of(node, struct vm_core_edf_sched, node);
+        node = v3_rb_next(node);
+        number_cores++;
+    }
+
+   return number_cores;
+}*/ 
+
+
+
+/*
+ * insert_core_edf: Finds a place in the tree for a newly activated core, adds the node 
+ * and rebalaces the tree
+ */
+
+static bool 
+insert_core_edf(struct vm_core_edf_sched *core, struct vm_edf_rq *runqueue){
+
+    struct rb_node **new_core = &(runqueue->vCPUs_tree.rb_node);
+    struct rb_node *parent = NULL;
+    struct vm_core_edf_sched *curr_core;
+
+    // Find out place in the tree for the new core 
+    while (*new_core) {
+    
+        curr_core = container_of(*new_core, struct vm_core_edf_sched, node);
+        parent = *new_core;
+        
+  	if (core->current_deadline < curr_core->current_deadline)
+	    new_core = &((*new_core)->rb_left);
+	else if (core->current_deadline > curr_core->current_deadline)
+	    new_core = &((*new_core)->rb_right);
+        else // Is Possible to have same current deadlines in both cores!
+            return false;
+    }
+    // Add new node and rebalance tree. 
+    rb_link_node(&core->node, parent, new_core);
+    v3_rb_insert_color(&core->node, &runqueue->vCPUs_tree);
+    
+    return true;
+ } 
+
+
+/*
+ * get_curr_host_time: Calculates the current host time (microseconds)
+ */
+
+static uint64_t 
+get_curr_host_time(struct vm_core_time *core_time){
+
+    uint64_t cur_cycle = v3_get_host_time(core_time);
+    uint64_t cpu_khz = core_time->host_cpu_freq;
+    uint64_t curr_time_us = 1000 * cur_cycle / cpu_khz;
+
+    return curr_time_us;
+
+}
+
+
+/*
+ * next_start_period: Given the current host time and the period of a given vCPU, 
+ * calculates the time in which its next period starts.
+ *
+ */
+
+static uint64_t 
+next_start_period(uint64_t curr_time_us, uint64_t period_us){
+
+    uint64_t time_period_us = curr_time_us % period_us;
+    uint64_t remaining_time_us = period_us - time_period_us;
+    uint64_t next_start_us = curr_time_us + remaining_time_us;
+
+    return next_start_us;
+
+}
+
+/*
+ * get_runqueue: Get the runqueue assigned to a virtual core.
+ */
+
+struct vm_edf_rq * get_runqueue(struct guest_info *info){
+
+    struct vm_edf_rq *runqueue_list = (struct vm_edf_rq *) info->vm_info->sched.priv_data;
+    struct vm_edf_rq *runqueue = &runqueue_list[info->pcpu_id]; 
+    return runqueue;
+}
+
+
+/*
+ * wakeup_core: Wakeup a given vCPU thread
+ */
+
+static void 
+wakeup_core(struct guest_info *info){
+
+    struct vm_core_edf_sched *core = info->core_sched.priv_data;
+    struct vm_edf_rq *runqueue = get_runqueue(info);
+
+    if (!info->core_thread) {
+              PrintError(info->vm_info, info,"ERROR: Tried to wakeup non-existent core thread vCPU_id %d \n",info->vcpu_id);
+    } 
+    else {
+
+        PrintDebug(info->vm_info, info,"EDF Sched. run_next_core. vcpu_id %d, logical id %d, Total time %llu, Miss_deadlines %d, slice_overuses %d extra_time %llu, thread (%p)\n", 
+            core->info->vcpu_id,
+            core->info->pcpu_id,
+            core->total_time,
+            core->miss_deadline,
+            core->slice_overuse,
+            core->extra_time_given,
+            (struct task_struct *)info->core_thread); 
+       
+       V3_Wakeup(info->core_thread);
+       core->last_wakeup_time = get_curr_host_time(&core->info->time_state);
+       runqueue->curr_vCPU = core;
+
+    }
+
+}
+
+
+/*
+ * activate_core - Moves a core to the red-black tree.
+ * used time is set to zero and current deadline is calculated 
+ */
+
+static void 
+activate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){
+    
+    if (is_admissible_core(core, runqueue)){
+	     
+        uint64_t curr_time_us = get_curr_host_time(&core->info->time_state);
+        uint64_t curr_deadline = next_start_period(curr_time_us, core->period);
+        
+        core->current_deadline = curr_deadline;
+        core->used_time=0; 
+        core->remaining_time=core->slice; 
+        
+        bool ins = insert_core_edf(core, runqueue);
+        /* 
+         * If not inserted is possible that there is other core with the same deadline.
+         * Then, the deadline is modified and try again 
+         */   
+        while(!ins){  
+            core->current_deadline ++;
+            ins = insert_core_edf(core, runqueue);  
+        }    
+     
+        runqueue->cpu_u += 100 * core->slice / core->period;
+        runqueue->nr_vCPU ++;
+        
+        /*
+         * If this is the first time to be activated pick first earliest deadline core to wakeup.
+         */
+
+        if(core->last_wakeup_time == 0){
+
+            struct vm_core_edf_sched *next_core;
+        
+            /*
+     	     * Pick first earliest deadline core
+             */
+            struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
+            next_core = container_of(node, struct vm_core_edf_sched, node);
+          
+            // Wakeup next_core
+            wakeup_core(next_core->info);
+       
+            //Sleep old core
+  
+            V3_Sleep(0);
+        }
+        
+      }
+      else 
+          PrintError(core->info->vm_info, core->info,"EDF Sched. activate_core. CPU cannot activate the core. It is not admissible");	
+}
+
+
+/*
+ * edf_sched_core_init: Initializes per core data structure and 
+ * calls activate function.
+ */
+
+int 
+edf_sched_core_init(struct guest_info * info){
+
+    struct vm_edf_rq *runqueue = get_runqueue(info);
+    struct vm_core_edf_sched *core_edf;
+
+    PrintDebug(info->vm_info, info,"EDF Sched. Initializing vcore %d\n", info->vcpu_id);
+
+    core_edf = (struct vm_core_edf_sched *) V3_Malloc(sizeof (struct vm_core_edf_sched));
+    if (!core_edf) {
+	PrintError(info->vm_info, info,"Cannot allocate private_data in edf_sched_core_init\n");
+	return -1;
+    }
+    info->core_sched.priv_data = core_edf;
+    
+    // Default configuration if not specified in configuration file  
+  
+    core_edf->info = info; 
+    core_edf->period = 500000;
+    core_edf->slice = 50000;
+    core_edf->used_time = 0;
+    core_edf->last_wakeup_time = 0;
+    core_edf->remaining_time = core_edf->slice;  
+    core_edf->miss_deadline = 0;
+    core_edf->extra_time = true;
+    core_edf->total_time = 0;
+    core_edf->slice_overuse = 0;
+    core_edf->extra_time_given = 0;
+
+    v3_cfg_tree_t * cfg_tree = core_edf->info->vm_info->cfg_data->cfg;
+    v3_cfg_tree_t * core = v3_cfg_subtree(v3_cfg_subtree(cfg_tree, "cores"), "core");
+    
+    while (core){
+        char *id = v3_cfg_val(core, "vcpu_id");
+        char *period = v3_cfg_val(core, "period");
+        char *slice = v3_cfg_val(core, "slice");
+        char *extra_time = v3_cfg_val(core, "extra_time");
+        
+        if (atoi(id) == core_edf->info->vcpu_id){
+   
+            core_edf->period = atoi(period);
+            core_edf->slice = atoi(slice);
+            core_edf->remaining_time = core_edf->slice;  
+            if (strcasecmp(extra_time, "true") == 0)
+                core_edf->extra_time = true;
+            else    
+                core_edf->extra_time = false;
+            break;
+        }
+        core = v3_cfg_next_branch(core);
+    }
+
+    activate_core(core_edf,runqueue); 
+    return 0; 
+}
+
+/*
+ * search_core_edf: Searches a core in the red-black tree by using its vcpu_id
+ */
+static struct vm_core_edf_sched * 
+search_core_edf(struct vm_core_edf_sched *core_edf, struct vm_edf_rq *runqueue){
+
+    struct rb_node *node = runqueue->vCPUs_tree.rb_node;
+	
+    while (node) {
+     
+        struct vm_core_edf_sched *core = container_of(node, struct vm_core_edf_sched, node);
+	
+        if (core_edf->current_deadline < core->current_deadline)
+            node = node->rb_left;
+	else if (core_edf->current_deadline > core->current_deadline)
+	    node = node->rb_right;
+        else
+            if(core->info->vcpu_id == core_edf->info->vcpu_id){
+                return core;
+            }
+    }
+    return NULL;
+}
+
+
+/* 
+ * delete_core_edf: Deletes a core from the red black tree, generally when it has 
+ * consumed its time slice within the current period.
+ */
+
+static bool 
+delete_core_edf( struct vm_core_edf_sched *core_edf  , struct vm_edf_rq *runqueue){
+
+    struct vm_core_edf_sched *core = search_core_edf(core_edf, runqueue);
+        if (core){ 
+
+            v3_rb_erase(&core->node, &runqueue->vCPUs_tree);  
+    	    return true;
+        } 
+	else{
+	    PrintError(core->info->vm_info, core->info,"EDF Sched. delete_core_edf.Attempted to erase unexisting core");
+            return false;	  
+        }
+}
+
+
+/*
+ * deactivate_core - Removes a core from the red-black tree.
+ */
+
+static void 
+deactivate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){
+
+     if(delete_core_edf(core, runqueue)){
+         runqueue->cpu_u -= 100 * core->slice / core->period;
+         runqueue->nr_vCPU -- ;
+     }       	
+}
+
+
+/*
+ * pick_next_core: Returns the next core to be scheduled from the red black tree
+ */
+
+static struct vm_core_edf_sched * 
+pick_next_core(struct vm_edf_rq *runqueue){
+  
+  
+    /*
+     * Pick first earliest deadline core
+     */
+    struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
+    struct vm_core_edf_sched *next_core = container_of(node, struct vm_core_edf_sched, node);
+ 
+    /* 
+     * Verify if the earliest deadline core has used its complete slice and return it if not
+     */
+
+    if (next_core->used_time < next_core->slice){
+        if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state))
+            next_core->miss_deadline++; 
+        return next_core;
+    }
+    /*
+     * If slice used, pick the next core that has not used its complete slice    
+     */
+
+    else {  
+        while(next_core->used_time >= next_core->slice){
+            
+            if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state) || !next_core->extra_time ){
+
+                deactivate_core(next_core,runqueue); 
+                activate_core(next_core,runqueue);
+           
+            }            
+
+            node = v3_rb_next(node);
+            if(node){
+                next_core = container_of(node, struct vm_core_edf_sched, node);
+            }
+            else{   
+                node = v3_rb_first(&runqueue->vCPUs_tree); // If all cores have used its slice return the first one
+            return container_of(node, struct vm_core_edf_sched, node);
+            }   
+
+        }
+    }
+
+    return next_core;
+}
+
+
+static void 
+adjust_slice(struct guest_info * info, int used_time, int extra_time)
+{
+    struct vm_core_edf_sched *core = info->core_sched.priv_data;
+    struct vm_edf_rq *runqueue = get_runqueue(info);
+
+    core->used_time = used_time;
+ 
+    if (extra_time >= 0) {
+	core->used_time += extra_time;
+    }
+
+    if( core->used_time >= core->slice){     
+        deactivate_core(core,runqueue);
+        activate_core(core,runqueue);
+    }
+}
+
+
+/*
+ * run_next_core: Pick next core to be scheduled and wakeup it
+ */
+
+static void 
+run_next_core(struct guest_info *info, int used_time, int usec)
+{
+    struct vm_core_edf_sched *core = info->core_sched.priv_data;
+    struct vm_core_edf_sched *next_core;
+    struct vm_edf_rq *runqueue = get_runqueue(info);
+   
+    /* The next core to be scheduled is choosen from the tree (Function pick_next_core). 
+     * The selected core is the one with the earliest deadline and with available time 
+     * to use within the current period (used_time < slice)   
+     */
+   
+     next_core = pick_next_core(runqueue); // Pick next core to schedule
+          
+     if (core != next_core){
+
+         // Wakeup next_core
+         wakeup_core(next_core->info);
+         core->total_time += used_time;
+
+        if (used_time > core->slice){
+            core->slice_overuse++;
+            core->extra_time_given += (used_time - core->slice);
+        }
+
+         // Sleep old core
+  
+         V3_Sleep(usec);
+       
+       }
+}
+
+
+/*
+ * edf_schedule: Scheduling function
+ */
+
+static void
+edf_schedule(struct guest_info * info, int usec){
+
+    uint64_t host_time = get_curr_host_time(&info->time_state);
+    struct vm_edf_rq *runqueue = get_runqueue(info);  
+    struct vm_core_edf_sched *core = (struct vm_core_edf_sched *) info->core_sched.priv_data;
+
+    uint64_t used_time = 0;
+    if(core->last_wakeup_time != 0) 
+        used_time =  host_time - core->last_wakeup_time;
+
+    if(usec == 0) runqueue->last_sched_time = host_time; // Called from edf_sched_scheduled
+    adjust_slice(core->info, host_time - core->last_wakeup_time, usec);
+
+    run_next_core(core->info,used_time, usec);
+    return;
+
+}
+
+/*
+ * edf_sched_schedule: Main scheduling function. Computes amount of time in period left,
+ * recomputing the current core's deadline if it has expired, then runs
+ * scheduler 
+ * It is called in the following cases:
+ *    A vCPU becomes runnable
+ *    The slice of the current vCPU was used
+ *    The period of a vCPU in the runqueue starts
+ *    Other case?? 
+ * TODO Something to do with extra time?
+ * TODO Check the use of remaining_time
+ */
+
+void 
+edf_sched_schedule(struct guest_info * info){
+
+    edf_schedule(info, 0);
+    return;
+}
+
+/*
+ * edf_sched_yield: Called when yielding the logical cpu for usec is needed
+ */
+
+void 
+edf_sched_yield(struct guest_info * info, int usec){
+ 
+    edf_schedule(info, usec);
+    return;
+    
+}
+
+/*
+ * edf_sched_deinit: Frees edf scheduler data structures
+ */
+
+
+int 
+edf_sched_deinit(struct v3_vm_info *vm)
+{
+
+    struct vm_scheduler  * sched = vm->sched.sched;
+    void *priv_data = vm->sched.priv_data;
+    
+    if (sched) 
+        V3_Free(sched); 
+
+    if (priv_data) 
+        V3_Free(priv_data);
+
+    return 0;
+
+}
+
+/*
+ * edf_sched_deinit: Frees virtual core data structures
+ */
+
+int 
+edf_sched_core_deinit(struct guest_info *core)
+{
+
+    struct vm_scheduler  * sched = core->core_sched.sched;
+    void *priv_data = core->core_sched.priv_data;
+    
+    if (sched) 
+        V3_Free(sched); 
+
+    if (priv_data) 
+        V3_Free(priv_data);
+
+    return 0;
+}
+
+static struct vm_scheduler_impl edf_sched = {
+	.name = "edf",
+	.init = edf_sched_init,
+	.deinit = edf_sched_deinit,
+	.core_init = edf_sched_core_init,
+	.core_deinit = edf_sched_core_deinit,
+	.schedule = edf_sched_schedule,
+	.yield = edf_sched_yield
+};
+
+static int 
+ext_sched_edf_init() {
+	
+    PrintDebug(VM_NONE, VCORE_NONE,"Sched. Creating (%s) scheduler\n",edf_sched.name);
+    return v3_register_scheduler(&edf_sched);
+}
+
+static int 
+ext_sched_edf_vm_init() {
+    return 0;
+}
+
+static struct v3_extension_impl sched_edf_impl = {
+	.name = "EDF Scheduler",
+	.init = ext_sched_edf_init,
+	.vm_init = ext_sched_edf_vm_init,
+        .vm_deinit = NULL,
+    	.core_init = NULL,
+    	.core_deinit = NULL,
+    	.on_entry = NULL,
+    	.on_exit = NULL
+};
+
+register_extension(&sched_edf_impl);
diff --git a/palacios/src/palacios/Makefile b/palacios/src/palacios/Makefile
index a26f8df..87e7951 100644
--- a/palacios/src/palacios/Makefile
+++ b/palacios/src/palacios/Makefile
@@ -24,6 +24,7 @@ obj-y := \
 	vmm_queue.o \
 	vmm_rbtree.o \
 	vmm_ringbuffer.o \
+	vmm_scheduler.o \
 	vmm_shadow_paging.o \
 	vmm_sprintf.o \
 	vmm_string.o \
diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c
index f60acda..448e5af 100644
--- a/palacios/src/palacios/svm.c
+++ b/palacios/src/palacios/svm.c
@@ -616,7 +616,7 @@ int v3_svm_enter(struct guest_info * info) {
     uint64_t guest_cycles = 0;
 
     // Conditionally yield the CPU if the timeslice has expired
-    v3_yield_cond(info,-1);
+    v3_schedule(info);
 
     // Update timer devices after being in the VM before doing 
     // IRQ updates, so that any interrupts they raise get seen 
@@ -761,7 +761,7 @@ int v3_svm_enter(struct guest_info * info) {
     v3_stgi();
  
     // Conditionally yield the CPU if the timeslice has expired
-    v3_yield_cond(info,-1);
+    v3_schedule(info);
 
     // This update timers is for time-dependent handlers
     // if we're slaved to host time
diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c
index 148b86e..53a1508 100644
--- a/palacios/src/palacios/vmm.c
+++ b/palacios/src/palacios/vmm.c
@@ -123,9 +123,15 @@ void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus) {
     // Register all shadow paging handlers
     V3_init_shdw_paging();
 
+    // Initialize the scheduler framework (must be before extensions)
+    V3_init_scheduling();
+ 
     // Register all extensions
     V3_init_extensions();
 
+    // Enabling scheduler
+    V3_enable_scheduler();
+
 
 #ifdef V3_CONFIG_SYMMOD
     V3_init_symmod();
@@ -208,6 +214,15 @@ struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
     memset(vm->name, 0, 128);
     strncpy(vm->name, name, 127);
 
+    /*
+     * Creates scheduling hash table and register default scheduler (host scheduler)
+     */
+
+    //if(v3_scheduler_register_vm(vm) != -1) {
+    
+    //    PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n");
+   //  }
+
     return vm;
 }
 
@@ -218,6 +233,9 @@ static int start_core(void * p)
 {
     struct guest_info * core = (struct guest_info *)p;
 
+    if (v3_scheduler_register_core(core) == -1){
+        PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id);
+    }
 
     PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n", 
 	       core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
@@ -298,10 +316,10 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
     }
 
 
-    if (vm->num_cores > avail_cores) {
-	PrintError(vm, VCORE_NONE, "Attempted to start a VM with too many cores (vm->num_cores = %d, avail_cores = %d, MAX=%d)\n", 
-		   vm->num_cores, avail_cores, MAX_CORES);
-	return -1;
+    vm->avail_cores = avail_cores;
+ 
+    if (v3_scheduler_admit_vm(vm) != 0){
+        PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name);
     }
 
     vm->run_state = VM_RUNNING;
@@ -780,53 +798,6 @@ v3_cpu_mode_t v3_get_host_cpu_mode() {
 
 #endif 
 
-
-
-
-
-void v3_yield_cond(struct guest_info * info, int usec) {
-    uint64_t cur_cycle;
-    cur_cycle = v3_get_host_time(&info->time_state);
-
-    if (cur_cycle > (info->yield_start_cycle + info->vm_info->yield_cycle_period)) {
-	//PrintDebug(info->vm_info, info, "Conditional Yield (cur_cyle=%p, start_cycle=%p, period=%p)\n", 
-	//           (void *)cur_cycle, (void *)info->yield_start_cycle, 
-	//	   (void *)info->yield_cycle_period);
-	
-	if (usec < 0) { 
-	    V3_Yield();
-	} else {
-	    V3_Sleep(usec);
-	}
-
-        info->yield_start_cycle +=  info->vm_info->yield_cycle_period;
-    }
-}
- 
-
-/* 
- * unconditional cpu yield 
- * if the yielding thread is a guest context, the guest quantum is reset on resumption 
- * Non guest context threads should call this function with a NULL argument
- *
- * usec <0  => the non-timed yield is used
- * usec >=0 => the timed yield is used, which also usually implies interruptible
- */ 
-void v3_yield(struct guest_info * info, int usec) {
-    if (usec < 0) { 
-	V3_Yield();
-    } else {
-	V3_Sleep(usec);
-    }
-
-    if (info) {
-        info->yield_start_cycle +=  info->vm_info->yield_cycle_period;
-    }
-}
-
-
-
-
 void v3_print_cond(const char * fmt, ...) {
     if (v3_dbg_enable == 1) {
 	char buf[2048];
diff --git a/palacios/src/palacios/vmm_scheduler.c b/palacios/src/palacios/vmm_scheduler.c
new file mode 100644
index 0000000..36286cf
--- /dev/null
+++ b/palacios/src/palacios/vmm_scheduler.c
@@ -0,0 +1,239 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Oscar Mondragon <omondrag@cs.unm.edu>
+ *         Patrick G. Bridges <bridges@cs.unm.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_scheduler.h>
+#include <palacios/vmm_hashtable.h>
+
+#ifndef V3_CONFIG_DEBUG_SCHEDULER
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+static char default_strategy[] = "host";
+static struct hashtable * master_scheduler_table = NULL;
+static int create_host_scheduler();
+
+static struct vm_scheduler_impl *scheduler = NULL;
+
+static uint_t scheduler_hash_fn(addr_t key) {
+    char * name = (char *)key;
+    return v3_hash_buffer((uint8_t *)name, strlen(name));
+}
+
+static int scheduler_eq_fn(addr_t key1, addr_t key2) {
+    char * name1 = (char *)key1;
+    char * name2 = (char *)key2;
+
+    return (strcmp(name1, name2) == 0);
+}
+
+int V3_init_scheduling() {
+   
+     PrintDebug(VM_NONE, VCORE_NONE,"Initializing scheduler");
+
+    master_scheduler_table = v3_create_htable(0, scheduler_hash_fn, scheduler_eq_fn);
+    return create_host_scheduler();
+}
+
+
+int v3_register_scheduler(struct vm_scheduler_impl *s) {
+
+    PrintDebug(VM_NONE, VCORE_NONE,"Registering Scheduler (%s)\n", s->name);
+
+    if (v3_htable_search(master_scheduler_table, (addr_t)(s->name))) {
+        PrintError(VM_NONE, VCORE_NONE, "Multiple instances of scheduler (%s)\n", s->name);
+        return -1;
+    }
+    PrintDebug(VM_NONE, VCORE_NONE,"Registering Scheduler (%s) 2\n", s->name);
+
+  
+    if (v3_htable_insert(master_scheduler_table,
+                         (addr_t)(s->name),
+                         (addr_t)(s)) == 0) {
+        PrintError(VM_NONE, VCORE_NONE, "Could not register scheduler (%s)\n", s->name);
+        return -1;
+    }
+
+    PrintDebug(VM_NONE, VCORE_NONE,"Scheduler registered\n");
+    return 0;
+}
+
+struct vm_scheduler_impl *v3_scheduler_lookup(char *name)
+{
+    return (struct vm_scheduler_impl *)v3_htable_search(master_scheduler_table, (addr_t)(name));
+}
+
+int V3_enable_scheduler() {
+    /* XXX Lookup the specified scheduler to use for palacios and use it */
+    scheduler = v3_scheduler_lookup(default_strategy);
+    if (!scheduler) {
+	PrintError(VM_NONE, VCORE_NONE,"Specified Palacios scheduler \"%s\" not found.\n", default_strategy);
+	return -1;
+    }
+    if (scheduler->init) {
+	return scheduler->init();
+    } else {
+	return 0;
+    }
+}
+
+int v3_scheduler_register_vm(struct v3_vm_info *vm) {
+    if (scheduler->vm_init) {
+    	return scheduler->vm_init(vm);
+    } else {
+	return 0;
+    }
+}
+int v3_scheduler_register_core(struct guest_info *core) {
+    if (scheduler->core_init) {
+    	return scheduler->core_init(core);
+    } else {
+	return 0;
+    }
+}
+int v3_scheduler_admit_vm(struct v3_vm_info *vm) {
+    if (scheduler->admit) {
+    	return scheduler->admit(vm);
+    } else {
+	return 0;
+    }
+}
+int v3_scheduler_notify_remap(struct v3_vm_info *vm) {
+    if (scheduler->remap) {
+    	return scheduler->remap(vm);
+    } else {
+	return 0;
+    }
+}
+int v3_scheduler_notify_dvfs(struct v3_vm_info *vm) {
+    if (scheduler->dvfs) {
+    	return scheduler->dvfs(vm);
+    } else {
+	return 0;
+    }
+}
+void v3_schedule(struct guest_info *core) {
+    if (scheduler->schedule) {
+    	scheduler->schedule(core);
+    }
+    return;
+}
+void v3_yield(struct guest_info *core, int usec) {
+    if (scheduler->yield) {
+    	scheduler->yield(core, usec);
+    } 
+    return;
+}
+
+int host_sched_vm_init(struct v3_vm_info *vm)
+{
+
+    PrintDebug(vm, VCORE_NONE,"Sched. host_sched_init\n"); 
+
+    char * schedule_hz_str = v3_cfg_val(vm->cfg_data->cfg, "schedule_hz");
+    uint32_t sched_hz = 100; 	
+
+
+    if (schedule_hz_str) {
+	sched_hz = atoi(schedule_hz_str);
+    }
+
+    PrintDebug(vm, VCORE_NONE,"CPU_KHZ = %d, schedule_freq=%p\n", V3_CPU_KHZ(), 
+	       (void *)(addr_t)sched_hz);
+
+    uint64_t yield_cycle_period = (V3_CPU_KHZ() * 1000) / sched_hz;
+    vm->sched_priv_data = (void *)yield_cycle_period; 
+
+    return 0;
+}
+
+int host_sched_core_init(struct guest_info *core)
+{
+    PrintDebug(core->vm_info, core,"Sched. host_sched_core_init\n"); 
+
+    uint64_t t = v3_get_host_time(&core->time_state); 
+    core->sched_priv_data = (void *)t;
+
+    return 0;
+}
+
+void host_sched_schedule(struct guest_info *core)
+{
+    uint64_t cur_cycle;
+    cur_cycle = v3_get_host_time(&core->time_state);
+
+    if (cur_cycle > ( (uint64_t)core->sched_priv_data + (uint64_t)core->vm_info->sched_priv_data)) {
+	
+        V3_Yield();
+      
+        uint64_t yield_start_cycle = (uint64_t) core->sched_priv_data;
+        yield_start_cycle +=  (uint64_t)core->vm_info->sched_priv_data;
+        core->sched_priv_data = (void *)yield_start_cycle;
+      
+    }
+}
+
+/* 
+ * unconditional cpu yield 
+ * if the yielding thread is a guest context, the guest quantum is reset on resumption 
+ * Non guest context threads should call this function with a NULL argument
+ *
+ * usec <0  => the non-timed yield is used
+ * usec >=0 => the timed yield is used, which also usually implies interruptible
+ */
+void host_sched_yield(struct guest_info * core, int usec) {
+    uint64_t yield_start_cycle;
+    if (usec < 0) {
+        V3_Yield();
+    } else {
+        V3_Sleep(usec);
+    }
+    yield_start_cycle = (uint64_t) core->sched_priv_data
+                        + (uint64_t)core->vm_info->sched_priv_data;
+    core->sched_priv_data = (void *)yield_start_cycle;
+}
+
+
+int host_sched_admit(struct v3_vm_info *vm){
+    return 0;
+}
+
+static struct vm_scheduler_impl host_sched_impl = {
+    .name = "host",
+    .init = NULL,
+    .deinit = NULL,
+    .vm_init = host_sched_vm_init,
+    .vm_deinit = NULL,
+    .core_init = host_sched_core_init,
+    .core_deinit = NULL,
+    .schedule = host_sched_schedule,
+    .yield = host_sched_yield,
+    .admit = host_sched_admit,
+    .remap = NULL,
+    .dvfs=NULL
+};
+
+static int create_host_scheduler()
+{
+	v3_register_scheduler(&host_sched_impl);
+	return 0;
+}
diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c
index d6471a1..c74607e 100644
--- a/palacios/src/palacios/vmx.c
+++ b/palacios/src/palacios/vmx.c
@@ -966,7 +966,7 @@ int v3_vmx_enter(struct guest_info * info) {
     uint64_t guest_cycles = 0;
 
     // Conditionally yield the CPU if the timeslice has expired
-    v3_yield_cond(info,-1);
+    v3_schedule(info);
 
     // Update timer devices late after being in the VM so that as much 
     // of the time in the VM is accounted for as possible. Also do it before
@@ -1135,7 +1135,7 @@ int v3_vmx_enter(struct guest_info * info) {
     v3_enable_ints();
 
     // Conditionally yield the CPU if the timeslice has expired
-    v3_yield_cond(info,-1);
+    v3_schedule(info);
     v3_advance_time(info, NULL);
     v3_update_timers(info);