Initial commit of new scheduling infrastructure. The EDF scheduler does not

diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h

index 5e18244..66ac526 100644 (file)
--- a/palacios/include/palacios/vm_guest.h
+++ b/palacios/include/palacios/vm_guest.h
@@ -41,6 +41,7 @@
 #include <palacios/vmm_timeout.h>
 #include <palacios/vmm_exits.h>
 #include <palacios/vmm_events.h>
+#include <palacios/vmm_scheduler.h>
 
 #include <palacios/vmm_perftune.h>
 
@@ -73,6 +74,7 @@ struct guest_info {
 
     struct vm_core_time time_state;
     struct v3_core_timeouts timeouts;
+    void * sched_priv_data;
 
     v3_paging_mode_t shdw_pg_mode;
     struct v3_shdw_pg_state shdw_pg_state;
@@ -159,6 +161,7 @@ struct v3_vm_info {
     struct v3_mem_hooks mem_hooks;
 
     struct v3_shdw_impl_state shdw_impl;
+    void * sched_priv_data;
 
     struct v3_io_map io_map;
     struct v3_msr_map msr_map;
@@ -207,6 +210,8 @@ struct v3_vm_info {
 
     int num_cores;
 
+    int avail_cores; // Available logical cores
+
     // JRL: This MUST be the last entry...
     struct guest_info cores[0];
 };
diff --git a/palacios/include/palacios/vmm_scheduler.h b/palacios/include/palacios/vmm_scheduler.h

new file mode 100644 (file)

index 0000000..efe47fa
--- /dev/null
+++ b/palacios/include/palacios/vmm_scheduler.h
@@ -0,0 +1,64 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Oscar Mondragon <omondrag@cs.unm.edu>
+ *         Patrick G. Bridges <bridges@cs.unm.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __VMM_SCHEDULER_H__
+#define __VMM_SCHEDULER_H__
+
+struct vm_scheduler_impl {
+       char *name;
+       int (*init)();
+       int (*deinit)();
+       int (*vm_init)(struct v3_vm_info *vm);
+       int (*vm_deinit)(struct v3_vm_info *vm);
+       int (*core_init)(struct guest_info *vm);
+       int (*core_deinit)(struct guest_info *vm);
+       void (*schedule)(struct guest_info *vm);
+       void (*yield)(struct guest_info *vm, int usec);
+       int (*admit)(struct v3_vm_info *vm);
+       int (*remap)(struct v3_vm_info *vm);
+       int (*dvfs)(struct v3_vm_info *vm);
+};
+
+struct vm_sched_state {
+       struct vm_scheduler *sched;
+       void *priv_data;
+};
+
+struct vm_core_sched_state {
+       struct vm_scheduler *sched;
+       void *priv_data;
+};
+
+void v3_schedule(struct guest_info *core);
+void v3_yield(struct guest_info *core, int usec);
+
+int v3_scheduler_register_vm(struct v3_vm_info *vm);
+int v3_scheduler_register_core(struct guest_info *vm); /* ? */
+int v3_scheduler_admit_vm(struct v3_vm_info *vm);
+
+void v3_scheduler_remap_notify(struct v3_vm_info *vm);
+void v3_scheduler_dvfs_notify(struct v3_vm_info *vm);
+
+int V3_init_scheduling();
+int v3_register_scheduler(struct vm_scheduler_impl *vm);
+struct vm_scheduler_impl *v3_scheduler_lookup(char *name);
+int V3_enable_scheduler();
+
+#endif /* __VMM_SCHEDULER_H__ */
diff --git a/palacios/src/extensions/Kconfig b/palacios/src/extensions/Kconfig

index 6b497b9..a8beae6 100644 (file)
--- a/palacios/src/extensions/Kconfig
+++ b/palacios/src/extensions/Kconfig
@@ -31,4 +31,10 @@ config EXT_VMWARE
        help
          Provides a VMWare persona to allow TSC calibration
 
+config EXT_SCHED_EDF
+       bool "EDF Real-time Scheduler"
+       default n
+       help
+         Provides a full real-time EDF scheduler for VM cores
+
 endmenu
diff --git a/palacios/src/extensions/Makefile b/palacios/src/extensions/Makefile

index 24d68c3..7f4b5cb 100644 (file)
--- a/palacios/src/extensions/Makefile
+++ b/palacios/src/extensions/Makefile
@@ -5,3 +5,4 @@ obj-$(V3_CONFIG_EXT_VTIME) += ext_vtime.o
 obj-$(V3_CONFIG_EXT_INSPECTOR) += ext_inspector.o
 obj-$(V3_CONFIG_EXT_MACH_CHECK) += ext_mcheck.o
 obj-$(V3_CONFIG_EXT_VMWARE) += ext_vmware.o
+obj-$(V3_CONFIG_EXT_SCHED_EDF) += ext_sched_edf.o
diff --git a/palacios/src/extensions/ext_sched_edf.c b/palacios/src/extensions/ext_sched_edf.c

new file mode 100644 (file)

index 0000000..0c111f5
--- /dev/null
+++ b/palacios/src/extensions/ext_sched_edf.c
@@ -0,0 +1,694 @@
+/* \r
+ * This file is part of the Palacios Virtual Machine Monitor developed\r
+ * by the V3VEE Project with funding from the United States National \r
+ * Science Foundation and the Department of Energy.  \r
+ *\r
+ * The V3VEE Project is a joint project between Northwestern University\r
+ * and the University of New Mexico.  You can find out more at \r
+ * http://www.v3vee.org\r
+ *\r
+ * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org> \r
+ * All rights reserved.\r
+ *\r
+ * Author: Oscar Mondragon <omondrag@cs.unm.edu>\r
+ *         Patrick G. Bridges <bridges@cs.unm.edu>\r
+ *\r
+ * This is free software.  You are permitted to use,\r
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".\r
+ */\r
+\r
+\r
+#include <palacios/vmm.h>\r
+#include <palacios/vmm_time.h>\r
+#include <palacios/vm_guest.h>\r
+#include <palacios/vmm_hashtable.h>\r
+#include <palacios/vmm_config.h>\r
+#include <palacios/vmm_extensions.h>\r
+#include <palacios/vmm_edf_sched.h>\r
+\r
+\r
+\r
+#ifndef V3_CONFIG_DEBUG_EDF_SCHED\r
+#undef PrintDebug\r
+#define PrintDebug(fmt, args...)\r
+#endif\r
+\r
+/* Overview \r
+ *\r
+ * EDF Scheduling\r
+ *\r
+ * The EDF scheduler uses a dynamic calculated priority as scheduling criteria to choose\r
+ * what thread will be scheduled.That priority is calculated according with the relative \r
+ * deadline of the threads that are ready to run in the runqueue. This runqueue is a per-logical\r
+ * core data structure used to keep the runnable virtual cores (threads) allocated to that \r
+ * logical core.The threads with less time before its deadline will receive better priorities. \r
+ * The runqueue is sorted each time that a vCPU becomes runnable. At that time the vCPU is \r
+ * enqueue and a new scheduling decision is taken. Each time a vCPU is scheduled, the parameter\r
+ * slice used time is set to zero and the current deadline is calculated using its period. Once\r
+ * the vCPU uses the logical core for slice seconds, that vCPU sleeps until its next scheduling \r
+ * period (when is re-inserted in the runqueue) and  yields the CPU to allow the scheduling \r
+ * of the vCPU with best priority in the runqueue. \r
+ */\r
+\r
+// Default configuration values for the EDF Scheduler\r
+// time parameters in microseconds \r
+\r
+#define MAX_PERIOD 1000000000\r
+#define MIN_PERIOD 50000\r
+#define MAX_SLICE 1000000000\r
+#define MIN_SLICE 10000\r
+#define CPU_PERCENT 100\r
+\r
+\r
+/*\r
+ * init_edf_config: Initialize scheduler configuration\r
+ */\r
+\r
+static void \r
+init_edf_config(struct vm_edf_sched_config *edf_config){\r
+\r
+    edf_config->min_slice = MIN_SLICE;\r
+    edf_config->max_slice = MAX_SLICE;\r
+    edf_config->min_period = MIN_PERIOD;\r
+    edf_config->max_period = MAX_PERIOD;\r
+    edf_config->cpu_percent = CPU_PERCENT;\r
+}\r
+\r
+\r
+/*\r
+ * edf_sched_init: Initialize the run queue\r
+ */\r
+\r
+int \r
+edf_sched_init(struct v3_vm_info *vm){\r
+\r
+    PrintDebug(vm, VCORE_NONE,"EDF Sched. Initializing vm %s\n", vm->name);\r
+\r
+    struct vm_sched_state *sched_state = &vm->sched; \r
+    sched_state->priv_data = V3_Malloc( vm->avail_cores * sizeof(struct vm_edf_rq));\r
+\r
+    if (!sched_state->priv_data) {\r
+       PrintError(vm, VCORE_NONE,"Cannot allocate in priv_data in edf_sched_init\n");\r
+       return -1;\r
+    }\r
+\r
+    int lcore = 0;\r
+  \r
+    PrintDebug(vm, VCORE_NONE,"EDF Sched. edf_sched_init. Available cores %d\n", vm->avail_cores);\r
+\r
+    for(lcore = 0; lcore < vm->avail_cores ; lcore++){\r
+\r
+        PrintDebug(vm, VCORE_NONE,"EDF Sched. edf_sched_init. Initializing logical core %d\n", lcore);\r
+\r
+        struct vm_edf_rq * edf_rq_list =   (struct vm_edf_rq *) sched_state->priv_data;\r
+        struct vm_edf_rq * edf_rq = &edf_rq_list[lcore];\r
+    \r
+        edf_rq->vCPUs_tree = RB_ROOT;\r
+        edf_rq->cpu_u=0;\r
+        edf_rq->nr_vCPU=0;\r
+        edf_rq->curr_vCPU=NULL;\r
+        edf_rq->rb_leftmost=NULL;\r
+        edf_rq->last_sched_time=0;\r
+        init_edf_config(&edf_rq->edf_config);\r
+\r
+    }\r
+ \r
+   return 0;\r
+   \r
+}\r
+\r
+\r
+/*\r
+ * is_admissible_core: Decides if a core is admited to the red black tree according with \r
+ * the admisibility formula.\r
+ */\r
+\r
+static bool \r
+is_admissible_core(struct vm_core_edf_sched * new_sched_core, struct vm_edf_rq *runqueue){\r
+\r
+    int curr_utilization = runqueue->cpu_u;\r
+    int new_utilization = curr_utilization + (100 * new_sched_core->slice / new_sched_core->period);\r
+    int cpu_percent = (runqueue->edf_config).cpu_percent; \r
+\r
+    if (new_utilization <= cpu_percent)\r
+        return true;\r
+    else\r
+       return false;    \r
+\r
+}\r
+\r
+\r
+/*\r
+ * count_cores: Function useful to count the number of cores in a runqueue (Not used for now)\r
+ *\r
+ */\r
+\r
+\r
+/*static int count_cores(struct vm_edf_rq *runqueue){\r
+\r
+  struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);\r
+  struct vm_core_edf_sched *curr_core;\r
+  int number_cores = 0;    \r
+\r
+    while(node){\r
+        \r
+        curr_core = container_of(node, struct vm_core_edf_sched, node);\r
+        node = v3_rb_next(node);\r
+        number_cores++;\r
+    }\r
+\r
+   return number_cores;\r
+}*/ \r
+\r
+\r
+\r
+/*\r
+ * insert_core_edf: Finds a place in the tree for a newly activated core, adds the node \r
+ * and rebalaces the tree\r
+ */\r
+\r
+static bool \r
+insert_core_edf(struct vm_core_edf_sched *core, struct vm_edf_rq *runqueue){\r
+\r
+    struct rb_node **new_core = &(runqueue->vCPUs_tree.rb_node);\r
+    struct rb_node *parent = NULL;\r
+    struct vm_core_edf_sched *curr_core;\r
+\r
+    // Find out place in the tree for the new core \r
+    while (*new_core) {\r
+    \r
+        curr_core = container_of(*new_core, struct vm_core_edf_sched, node);\r
+        parent = *new_core;\r
+        \r
+       if (core->current_deadline < curr_core->current_deadline)\r
+           new_core = &((*new_core)->rb_left);\r
+       else if (core->current_deadline > curr_core->current_deadline)\r
+           new_core = &((*new_core)->rb_right);\r
+        else // Is Possible to have same current deadlines in both cores!\r
+            return false;\r
+    }\r
+    // Add new node and rebalance tree. \r
+    rb_link_node(&core->node, parent, new_core);\r
+    v3_rb_insert_color(&core->node, &runqueue->vCPUs_tree);\r
+    \r
+    return true;\r
+ } \r
+\r
+\r
+/*\r
+ * get_curr_host_time: Calculates the current host time (microseconds)\r
+ */\r
+\r
+static uint64_t \r
+get_curr_host_time(struct vm_core_time *core_time){\r
+\r
+    uint64_t cur_cycle = v3_get_host_time(core_time);\r
+    uint64_t cpu_khz = core_time->host_cpu_freq;\r
+    uint64_t curr_time_us = 1000 * cur_cycle / cpu_khz;\r
+\r
+    return curr_time_us;\r
+\r
+}\r
+\r
+\r
+/*\r
+ * next_start_period: Given the current host time and the period of a given vCPU, \r
+ * calculates the time in which its next period starts.\r
+ *\r
+ */\r
+\r
+static uint64_t \r
+next_start_period(uint64_t curr_time_us, uint64_t period_us){\r
+\r
+    uint64_t time_period_us = curr_time_us % period_us;\r
+    uint64_t remaining_time_us = period_us - time_period_us;\r
+    uint64_t next_start_us = curr_time_us + remaining_time_us;\r
+\r
+    return next_start_us;\r
+\r
+}\r
+\r
+/*\r
+ * get_runqueue: Get the runqueue assigned to a virtual core.\r
+ */\r
+\r
+struct vm_edf_rq * get_runqueue(struct guest_info *info){\r
+\r
+    struct vm_edf_rq *runqueue_list = (struct vm_edf_rq *) info->vm_info->sched.priv_data;\r
+    struct vm_edf_rq *runqueue = &runqueue_list[info->pcpu_id]; \r
+    return runqueue;\r
+}\r
+\r
+\r
+/*\r
+ * wakeup_core: Wakeup a given vCPU thread\r
+ */\r
+\r
+static void \r
+wakeup_core(struct guest_info *info){\r
+\r
+    struct vm_core_edf_sched *core = info->core_sched.priv_data;\r
+    struct vm_edf_rq *runqueue = get_runqueue(info);\r
+\r
+    if (!info->core_thread) {\r
+              PrintError(info->vm_info, info,"ERROR: Tried to wakeup non-existent core thread vCPU_id %d \n",info->vcpu_id);\r
+    } \r
+    else {\r
+\r
+        PrintDebug(info->vm_info, info,"EDF Sched. run_next_core. vcpu_id %d, logical id %d, Total time %llu, Miss_deadlines %d, slice_overuses %d extra_time %llu, thread (%p)\n", \r
+            core->info->vcpu_id,\r
+            core->info->pcpu_id,\r
+            core->total_time,\r
+            core->miss_deadline,\r
+            core->slice_overuse,\r
+            core->extra_time_given,\r
+            (struct task_struct *)info->core_thread); \r
+       \r
+       V3_Wakeup(info->core_thread);\r
+       core->last_wakeup_time = get_curr_host_time(&core->info->time_state);\r
+       runqueue->curr_vCPU = core;\r
+\r
+    }\r
+\r
+}\r
+\r
+\r
+/*\r
+ * activate_core - Moves a core to the red-black tree.\r
+ * used time is set to zero and current deadline is calculated \r
+ */\r
+\r
+static void \r
+activate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){\r
+    \r
+    if (is_admissible_core(core, runqueue)){\r
+            \r
+        uint64_t curr_time_us = get_curr_host_time(&core->info->time_state);\r
+        uint64_t curr_deadline = next_start_period(curr_time_us, core->period);\r
+        \r
+        core->current_deadline = curr_deadline;\r
+        core->used_time=0; \r
+        core->remaining_time=core->slice; \r
+        \r
+        bool ins = insert_core_edf(core, runqueue);\r
+        /* \r
+         * If not inserted is possible that there is other core with the same deadline.\r
+         * Then, the deadline is modified and try again \r
+         */   \r
+        while(!ins){  \r
+            core->current_deadline ++;\r
+            ins = insert_core_edf(core, runqueue);  \r
+        }    \r
+     \r
+        runqueue->cpu_u += 100 * core->slice / core->period;\r
+        runqueue->nr_vCPU ++;\r
+        \r
+        /*\r
+         * If this is the first time to be activated pick first earliest deadline core to wakeup.\r
+         */\r
+\r
+        if(core->last_wakeup_time == 0){\r
+\r
+            struct vm_core_edf_sched *next_core;\r
+        \r
+            /*\r
+            * Pick first earliest deadline core\r
+             */\r
+            struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);\r
+            next_core = container_of(node, struct vm_core_edf_sched, node);\r
+          \r
+            // Wakeup next_core\r
+            wakeup_core(next_core->info);\r
+       \r
+            //Sleep old core\r
+  \r
+            V3_Sleep(0);\r
+        }\r
+        \r
+      }\r
+      else \r
+          PrintError(core->info->vm_info, core->info,"EDF Sched. activate_core. CPU cannot activate the core. It is not admissible");  \r
+}\r
+\r
+\r
+/*\r
+ * edf_sched_core_init: Initializes per core data structure and \r
+ * calls activate function.\r
+ */\r
+\r
+int \r
+edf_sched_core_init(struct guest_info * info){\r
+\r
+    struct vm_edf_rq *runqueue = get_runqueue(info);\r
+    struct vm_core_edf_sched *core_edf;\r
+\r
+    PrintDebug(info->vm_info, info,"EDF Sched. Initializing vcore %d\n", info->vcpu_id);\r
+\r
+    core_edf = (struct vm_core_edf_sched *) V3_Malloc(sizeof (struct vm_core_edf_sched));\r
+    if (!core_edf) {\r
+       PrintError(info->vm_info, info,"Cannot allocate private_data in edf_sched_core_init\n");\r
+       return -1;\r
+    }\r
+    info->core_sched.priv_data = core_edf;\r
+    \r
+    // Default configuration if not specified in configuration file  \r
+  \r
+    core_edf->info = info; \r
+    core_edf->period = 500000;\r
+    core_edf->slice = 50000;\r
+    core_edf->used_time = 0;\r
+    core_edf->last_wakeup_time = 0;\r
+    core_edf->remaining_time = core_edf->slice;  \r
+    core_edf->miss_deadline = 0;\r
+    core_edf->extra_time = true;\r
+    core_edf->total_time = 0;\r
+    core_edf->slice_overuse = 0;\r
+    core_edf->extra_time_given = 0;\r
+\r
+    v3_cfg_tree_t * cfg_tree = core_edf->info->vm_info->cfg_data->cfg;\r
+    v3_cfg_tree_t * core = v3_cfg_subtree(v3_cfg_subtree(cfg_tree, "cores"), "core");\r
+    \r
+    while (core){\r
+        char *id = v3_cfg_val(core, "vcpu_id");\r
+        char *period = v3_cfg_val(core, "period");\r
+        char *slice = v3_cfg_val(core, "slice");\r
+        char *extra_time = v3_cfg_val(core, "extra_time");\r
+        \r
+        if (atoi(id) == core_edf->info->vcpu_id){\r
+   \r
+            core_edf->period = atoi(period);\r
+            core_edf->slice = atoi(slice);\r
+            core_edf->remaining_time = core_edf->slice;  \r
+            if (strcasecmp(extra_time, "true") == 0)\r
+                core_edf->extra_time = true;\r
+            else    \r
+                core_edf->extra_time = false;\r
+            break;\r
+        }\r
+        core = v3_cfg_next_branch(core);\r
+    }\r
+\r
+    activate_core(core_edf,runqueue); \r
+    return 0; \r
+}\r
+\r
+/*\r
+ * search_core_edf: Searches a core in the red-black tree by using its vcpu_id\r
+ */\r
+static struct vm_core_edf_sched * \r
+search_core_edf(struct vm_core_edf_sched *core_edf, struct vm_edf_rq *runqueue){\r
+\r
+    struct rb_node *node = runqueue->vCPUs_tree.rb_node;\r
+       \r
+    while (node) {\r
+     \r
+        struct vm_core_edf_sched *core = container_of(node, struct vm_core_edf_sched, node);\r
+       \r
+        if (core_edf->current_deadline < core->current_deadline)\r
+            node = node->rb_left;\r
+       else if (core_edf->current_deadline > core->current_deadline)\r
+           node = node->rb_right;\r
+        else\r
+            if(core->info->vcpu_id == core_edf->info->vcpu_id){\r
+                return core;\r
+            }\r
+    }\r
+    return NULL;\r
+}\r
+\r
+\r
+/* \r
+ * delete_core_edf: Deletes a core from the red black tree, generally when it has \r
+ * consumed its time slice within the current period.\r
+ */\r
+\r
+static bool \r
+delete_core_edf( struct vm_core_edf_sched *core_edf  , struct vm_edf_rq *runqueue){\r
+\r
+    struct vm_core_edf_sched *core = search_core_edf(core_edf, runqueue);\r
+        if (core){ \r
+\r
+            v3_rb_erase(&core->node, &runqueue->vCPUs_tree);  \r
+           return true;\r
+        } \r
+       else{\r
+           PrintError(core->info->vm_info, core->info,"EDF Sched. delete_core_edf.Attempted to erase unexisting core");\r
+            return false;        \r
+        }\r
+}\r
+\r
+\r
+/*\r
+ * deactivate_core - Removes a core from the red-black tree.\r
+ */\r
+\r
+static void \r
+deactivate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){\r
+\r
+     if(delete_core_edf(core, runqueue)){\r
+         runqueue->cpu_u -= 100 * core->slice / core->period;\r
+         runqueue->nr_vCPU -- ;\r
+     }         \r
+}\r
+\r
+\r
+/*\r
+ * pick_next_core: Returns the next core to be scheduled from the red black tree\r
+ */\r
+\r
+static struct vm_core_edf_sched * \r
+pick_next_core(struct vm_edf_rq *runqueue){\r
+  \r
+  \r
+    /*\r
+     * Pick first earliest deadline core\r
+     */\r
+    struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);\r
+    struct vm_core_edf_sched *next_core = container_of(node, struct vm_core_edf_sched, node);\r
+ \r
+    /* \r
+     * Verify if the earliest deadline core has used its complete slice and return it if not\r
+     */\r
+\r
+    if (next_core->used_time < next_core->slice){\r
+        if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state))\r
+            next_core->miss_deadline++; \r
+        return next_core;\r
+    }\r
+    /*\r
+     * If slice used, pick the next core that has not used its complete slice    \r
+     */\r
+\r
+    else {  \r
+        while(next_core->used_time >= next_core->slice){\r
+            \r
+            if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state) || !next_core->extra_time ){\r
+\r
+                deactivate_core(next_core,runqueue); \r
+                activate_core(next_core,runqueue);\r
+           \r
+            }            \r
+\r
+            node = v3_rb_next(node);\r
+            if(node){\r
+                next_core = container_of(node, struct vm_core_edf_sched, node);\r
+            }\r
+            else{   \r
+                node = v3_rb_first(&runqueue->vCPUs_tree); // If all cores have used its slice return the first one\r
+            return container_of(node, struct vm_core_edf_sched, node);\r
+            }   \r
+\r
+        }\r
+    }\r
+\r
+    return next_core;\r
+}\r
+\r
+\r
+static void \r
+adjust_slice(struct guest_info * info, int used_time, int extra_time)\r
+{\r
+    struct vm_core_edf_sched *core = info->core_sched.priv_data;\r
+    struct vm_edf_rq *runqueue = get_runqueue(info);\r
+\r
+    core->used_time = used_time;\r
+ \r
+    if (extra_time >= 0) {\r
+       core->used_time += extra_time;\r
+    }\r
+\r
+    if( core->used_time >= core->slice){     \r
+        deactivate_core(core,runqueue);\r
+        activate_core(core,runqueue);\r
+    }\r
+}\r
+\r
+\r
+/*\r
+ * run_next_core: Pick next core to be scheduled and wakeup it\r
+ */\r
+\r
+static void \r
+run_next_core(struct guest_info *info, int used_time, int usec)\r
+{\r
+    struct vm_core_edf_sched *core = info->core_sched.priv_data;\r
+    struct vm_core_edf_sched *next_core;\r
+    struct vm_edf_rq *runqueue = get_runqueue(info);\r
+   \r
+    /* The next core to be scheduled is choosen from the tree (Function pick_next_core). \r
+     * The selected core is the one with the earliest deadline and with available time \r
+     * to use within the current period (used_time < slice)   \r
+     */\r
+   \r
+     next_core = pick_next_core(runqueue); // Pick next core to schedule\r
+          \r
+     if (core != next_core){\r
+\r
+         // Wakeup next_core\r
+         wakeup_core(next_core->info);\r
+         core->total_time += used_time;\r
+\r
+        if (used_time > core->slice){\r
+            core->slice_overuse++;\r
+            core->extra_time_given += (used_time - core->slice);\r
+        }\r
+\r
+         // Sleep old core\r
+  \r
+         V3_Sleep(usec);\r
+       \r
+       }\r
+}\r
+\r
+\r
+/*\r
+ * edf_schedule: Scheduling function\r
+ */\r
+\r
+static void\r
+edf_schedule(struct guest_info * info, int usec){\r
+\r
+    uint64_t host_time = get_curr_host_time(&info->time_state);\r
+    struct vm_edf_rq *runqueue = get_runqueue(info);  \r
+    struct vm_core_edf_sched *core = (struct vm_core_edf_sched *) info->core_sched.priv_data;\r
+\r
+    uint64_t used_time = 0;\r
+    if(core->last_wakeup_time != 0) \r
+        used_time =  host_time - core->last_wakeup_time;\r
+\r
+    if(usec == 0) runqueue->last_sched_time = host_time; // Called from edf_sched_scheduled\r
+    adjust_slice(core->info, host_time - core->last_wakeup_time, usec);\r
+\r
+    run_next_core(core->info,used_time, usec);\r
+    return;\r
+\r
+}\r
+\r
+/*\r
+ * edf_sched_schedule: Main scheduling function. Computes amount of time in period left,\r
+ * recomputing the current core's deadline if it has expired, then runs\r
+ * scheduler \r
+ * It is called in the following cases:\r
+ *    A vCPU becomes runnable\r
+ *    The slice of the current vCPU was used\r
+ *    The period of a vCPU in the runqueue starts\r
+ *    Other case?? \r
+ * TODO Something to do with extra time?\r
+ * TODO Check the use of remaining_time\r
+ */\r
+\r
+void \r
+edf_sched_schedule(struct guest_info * info){\r
+\r
+    edf_schedule(info, 0);\r
+    return;\r
+}\r
+\r
+/*\r
+ * edf_sched_yield: Called when yielding the logical cpu for usec is needed\r
+ */\r
+\r
+void \r
+edf_sched_yield(struct guest_info * info, int usec){\r
+ \r
+    edf_schedule(info, usec);\r
+    return;\r
+    \r
+}\r
+\r
+/*\r
+ * edf_sched_deinit: Frees edf scheduler data structures\r
+ */\r
+\r
+\r
+int \r
+edf_sched_deinit(struct v3_vm_info *vm)\r
+{\r
+\r
+    struct vm_scheduler  * sched = vm->sched.sched;\r
+    void *priv_data = vm->sched.priv_data;\r
+    \r
+    if (sched) \r
+        V3_Free(sched); \r
+\r
+    if (priv_data) \r
+        V3_Free(priv_data);\r
+\r
+    return 0;\r
+\r
+}\r
+\r
+/*\r
+ * edf_sched_deinit: Frees virtual core data structures\r
+ */\r
+\r
+int \r
+edf_sched_core_deinit(struct guest_info *core)\r
+{\r
+\r
+    struct vm_scheduler  * sched = core->core_sched.sched;\r
+    void *priv_data = core->core_sched.priv_data;\r
+    \r
+    if (sched) \r
+        V3_Free(sched); \r
+\r
+    if (priv_data) \r
+        V3_Free(priv_data);\r
+\r
+    return 0;\r
+}\r
+\r
+static struct vm_scheduler_impl edf_sched = {\r
+       .name = "edf",\r
+       .init = edf_sched_init,\r
+       .deinit = edf_sched_deinit,\r
+       .core_init = edf_sched_core_init,\r
+       .core_deinit = edf_sched_core_deinit,\r
+       .schedule = edf_sched_schedule,\r
+       .yield = edf_sched_yield\r
+};\r
+\r
+static int \r
+ext_sched_edf_init() {\r
+       \r
+    PrintDebug(VM_NONE, VCORE_NONE,"Sched. Creating (%s) scheduler\n",edf_sched.name);\r
+    return v3_register_scheduler(&edf_sched);\r
+}\r
+\r
+static int \r
+ext_sched_edf_vm_init() {\r
+    return 0;\r
+}\r
+\r
+static struct v3_extension_impl sched_edf_impl = {\r
+       .name = "EDF Scheduler",\r
+       .init = ext_sched_edf_init,\r
+       .vm_init = ext_sched_edf_vm_init,\r
+        .vm_deinit = NULL,\r
+       .core_init = NULL,\r
+       .core_deinit = NULL,\r
+       .on_entry = NULL,\r
+       .on_exit = NULL\r
+};\r
+\r
+register_extension(&sched_edf_impl);\r
diff --git a/palacios/src/palacios/Makefile b/palacios/src/palacios/Makefile

index a26f8df..87e7951 100644 (file)
--- a/palacios/src/palacios/Makefile
+++ b/palacios/src/palacios/Makefile
@@ -24,6 +24,7 @@ obj-y := \
        vmm_queue.o \
        vmm_rbtree.o \
        vmm_ringbuffer.o \
+       vmm_scheduler.o \
        vmm_shadow_paging.o \
        vmm_sprintf.o \
        vmm_string.o \
diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c

index f60acda..448e5af 100644 (file)
--- a/palacios/src/palacios/svm.c
+++ b/palacios/src/palacios/svm.c
@@ -616,7 +616,7 @@ int v3_svm_enter(struct guest_info * info) {
     uint64_t guest_cycles = 0;
 
     // Conditionally yield the CPU if the timeslice has expired
-    v3_yield_cond(info,-1);
+    v3_schedule(info);
 
     // Update timer devices after being in the VM before doing 
     // IRQ updates, so that any interrupts they raise get seen 
@@ -761,7 +761,7 @@ int v3_svm_enter(struct guest_info * info) {
     v3_stgi();
  
     // Conditionally yield the CPU if the timeslice has expired
-    v3_yield_cond(info,-1);
+    v3_schedule(info);
 
     // This update timers is for time-dependent handlers
     // if we're slaved to host time
diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c

index 148b86e..53a1508 100644 (file)
--- a/palacios/src/palacios/vmm.c
+++ b/palacios/src/palacios/vmm.c
@@ -123,9 +123,15 @@ void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus) {
     // Register all shadow paging handlers
     V3_init_shdw_paging();
 
+    // Initialize the scheduler framework (must be before extensions)
+    V3_init_scheduling();
+ 
     // Register all extensions
     V3_init_extensions();
 
+    // Enabling scheduler
+    V3_enable_scheduler();
+
 
 #ifdef V3_CONFIG_SYMMOD
     V3_init_symmod();
@@ -208,6 +214,15 @@ struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) {
     memset(vm->name, 0, 128);
     strncpy(vm->name, name, 127);
 
+    /*
+     * Creates scheduling hash table and register default scheduler (host scheduler)
+     */
+
+    //if(v3_scheduler_register_vm(vm) != -1) {
+    
+    //    PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n");
+   //  }
+
     return vm;
 }
 
@@ -218,6 +233,9 @@ static int start_core(void * p)
 {
     struct guest_info * core = (struct guest_info *)p;
 
+    if (v3_scheduler_register_core(core) == -1){
+        PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id);
+    }
 
     PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n", 
               core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip);
@@ -298,10 +316,10 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) {
     }
 
 
-    if (vm->num_cores > avail_cores) {
-       PrintError(vm, VCORE_NONE, "Attempted to start a VM with too many cores (vm->num_cores = %d, avail_cores = %d, MAX=%d)\n", 
-                  vm->num_cores, avail_cores, MAX_CORES);
-       return -1;
+    vm->avail_cores = avail_cores;
+ 
+    if (v3_scheduler_admit_vm(vm) != 0){
+        PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name);
     }
 
     vm->run_state = VM_RUNNING;
@@ -780,53 +798,6 @@ v3_cpu_mode_t v3_get_host_cpu_mode() {
 
 #endif 
 
-
-
-
-
-void v3_yield_cond(struct guest_info * info, int usec) {
-    uint64_t cur_cycle;
-    cur_cycle = v3_get_host_time(&info->time_state);
-
-    if (cur_cycle > (info->yield_start_cycle + info->vm_info->yield_cycle_period)) {
-       //PrintDebug(info->vm_info, info, "Conditional Yield (cur_cyle=%p, start_cycle=%p, period=%p)\n", 
-       //           (void *)cur_cycle, (void *)info->yield_start_cycle, 
-       //         (void *)info->yield_cycle_period);
-       
-       if (usec < 0) { 
-           V3_Yield();
-       } else {
-           V3_Sleep(usec);
-       }
-
-        info->yield_start_cycle +=  info->vm_info->yield_cycle_period;
-    }
-}
- 
-
-/* 
- * unconditional cpu yield 
- * if the yielding thread is a guest context, the guest quantum is reset on resumption 
- * Non guest context threads should call this function with a NULL argument
- *
- * usec <0  => the non-timed yield is used
- * usec >=0 => the timed yield is used, which also usually implies interruptible
- */ 
-void v3_yield(struct guest_info * info, int usec) {
-    if (usec < 0) { 
-       V3_Yield();
-    } else {
-       V3_Sleep(usec);
-    }
-
-    if (info) {
-        info->yield_start_cycle +=  info->vm_info->yield_cycle_period;
-    }
-}
-
-
-
-
 void v3_print_cond(const char * fmt, ...) {
     if (v3_dbg_enable == 1) {
        char buf[2048];
diff --git a/palacios/src/palacios/vmm_scheduler.c b/palacios/src/palacios/vmm_scheduler.c

new file mode 100644 (file)

index 0000000..36286cf
--- /dev/null
+++ b/palacios/src/palacios/vmm_scheduler.c
@@ -0,0 +1,239 @@
+/* 
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu> 
+ * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Oscar Mondragon <omondrag@cs.unm.edu>
+ *         Patrick G. Bridges <bridges@cs.unm.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <palacios/vm_guest.h>
+#include <palacios/vmm_scheduler.h>
+#include <palacios/vmm_hashtable.h>
+
+#ifndef V3_CONFIG_DEBUG_SCHEDULER
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+static char default_strategy[] = "host";
+static struct hashtable * master_scheduler_table = NULL;
+static int create_host_scheduler();
+
+static struct vm_scheduler_impl *scheduler = NULL;
+
+static uint_t scheduler_hash_fn(addr_t key) {
+    char * name = (char *)key;
+    return v3_hash_buffer((uint8_t *)name, strlen(name));
+}
+
+static int scheduler_eq_fn(addr_t key1, addr_t key2) {
+    char * name1 = (char *)key1;
+    char * name2 = (char *)key2;
+
+    return (strcmp(name1, name2) == 0);
+}
+
+int V3_init_scheduling() {
+   
+     PrintDebug(VM_NONE, VCORE_NONE,"Initializing scheduler");
+
+    master_scheduler_table = v3_create_htable(0, scheduler_hash_fn, scheduler_eq_fn);
+    return create_host_scheduler();
+}
+
+
+int v3_register_scheduler(struct vm_scheduler_impl *s) {
+
+    PrintDebug(VM_NONE, VCORE_NONE,"Registering Scheduler (%s)\n", s->name);
+
+    if (v3_htable_search(master_scheduler_table, (addr_t)(s->name))) {
+        PrintError(VM_NONE, VCORE_NONE, "Multiple instances of scheduler (%s)\n", s->name);
+        return -1;
+    }
+    PrintDebug(VM_NONE, VCORE_NONE,"Registering Scheduler (%s) 2\n", s->name);
+
+  
+    if (v3_htable_insert(master_scheduler_table,
+                         (addr_t)(s->name),
+                         (addr_t)(s)) == 0) {
+        PrintError(VM_NONE, VCORE_NONE, "Could not register scheduler (%s)\n", s->name);
+        return -1;
+    }
+
+    PrintDebug(VM_NONE, VCORE_NONE,"Scheduler registered\n");
+    return 0;
+}
+
+struct vm_scheduler_impl *v3_scheduler_lookup(char *name)
+{
+    return (struct vm_scheduler_impl *)v3_htable_search(master_scheduler_table, (addr_t)(name));
+}
+
+int V3_enable_scheduler() {
+    /* XXX Lookup the specified scheduler to use for palacios and use it */
+    scheduler = v3_scheduler_lookup(default_strategy);
+    if (!scheduler) {
+       PrintError(VM_NONE, VCORE_NONE,"Specified Palacios scheduler \"%s\" not found.\n", default_strategy);
+       return -1;
+    }
+    if (scheduler->init) {
+       return scheduler->init();
+    } else {
+       return 0;
+    }
+}
+
+int v3_scheduler_register_vm(struct v3_vm_info *vm) {
+    if (scheduler->vm_init) {
+       return scheduler->vm_init(vm);
+    } else {
+       return 0;
+    }
+}
+int v3_scheduler_register_core(struct guest_info *core) {
+    if (scheduler->core_init) {
+       return scheduler->core_init(core);
+    } else {
+       return 0;
+    }
+}
+int v3_scheduler_admit_vm(struct v3_vm_info *vm) {
+    if (scheduler->admit) {
+       return scheduler->admit(vm);
+    } else {
+       return 0;
+    }
+}
+int v3_scheduler_notify_remap(struct v3_vm_info *vm) {
+    if (scheduler->remap) {
+       return scheduler->remap(vm);
+    } else {
+       return 0;
+    }
+}
+int v3_scheduler_notify_dvfs(struct v3_vm_info *vm) {
+    if (scheduler->dvfs) {
+       return scheduler->dvfs(vm);
+    } else {
+       return 0;
+    }
+}
+void v3_schedule(struct guest_info *core) {
+    if (scheduler->schedule) {
+       scheduler->schedule(core);
+    }
+    return;
+}
+void v3_yield(struct guest_info *core, int usec) {
+    if (scheduler->yield) {
+       scheduler->yield(core, usec);
+    } 
+    return;
+}
+
+int host_sched_vm_init(struct v3_vm_info *vm)
+{
+
+    PrintDebug(vm, VCORE_NONE,"Sched. host_sched_init\n"); 
+
+    char * schedule_hz_str = v3_cfg_val(vm->cfg_data->cfg, "schedule_hz");
+    uint32_t sched_hz = 100;   
+
+
+    if (schedule_hz_str) {
+       sched_hz = atoi(schedule_hz_str);
+    }
+
+    PrintDebug(vm, VCORE_NONE,"CPU_KHZ = %d, schedule_freq=%p\n", V3_CPU_KHZ(), 
+              (void *)(addr_t)sched_hz);
+
+    uint64_t yield_cycle_period = (V3_CPU_KHZ() * 1000) / sched_hz;
+    vm->sched_priv_data = (void *)yield_cycle_period; 
+
+    return 0;
+}
+
+int host_sched_core_init(struct guest_info *core)
+{
+    PrintDebug(core->vm_info, core,"Sched. host_sched_core_init\n"); 
+
+    uint64_t t = v3_get_host_time(&core->time_state); 
+    core->sched_priv_data = (void *)t;
+
+    return 0;
+}
+
+void host_sched_schedule(struct guest_info *core)
+{
+    uint64_t cur_cycle;
+    cur_cycle = v3_get_host_time(&core->time_state);
+
+    if (cur_cycle > ( (uint64_t)core->sched_priv_data + (uint64_t)core->vm_info->sched_priv_data)) {
+       
+        V3_Yield();
+      
+        uint64_t yield_start_cycle = (uint64_t) core->sched_priv_data;
+        yield_start_cycle +=  (uint64_t)core->vm_info->sched_priv_data;
+        core->sched_priv_data = (void *)yield_start_cycle;
+      
+    }
+}
+
+/* 
+ * unconditional cpu yield 
+ * if the yielding thread is a guest context, the guest quantum is reset on resumption 
+ * Non guest context threads should call this function with a NULL argument
+ *
+ * usec <0  => the non-timed yield is used
+ * usec >=0 => the timed yield is used, which also usually implies interruptible
+ */
+void host_sched_yield(struct guest_info * core, int usec) {
+    uint64_t yield_start_cycle;
+    if (usec < 0) {
+        V3_Yield();
+    } else {
+        V3_Sleep(usec);
+    }
+    yield_start_cycle = (uint64_t) core->sched_priv_data
+                        + (uint64_t)core->vm_info->sched_priv_data;
+    core->sched_priv_data = (void *)yield_start_cycle;
+}
+
+
+int host_sched_admit(struct v3_vm_info *vm){
+    return 0;
+}
+
+static struct vm_scheduler_impl host_sched_impl = {
+    .name = "host",
+    .init = NULL,
+    .deinit = NULL,
+    .vm_init = host_sched_vm_init,
+    .vm_deinit = NULL,
+    .core_init = host_sched_core_init,
+    .core_deinit = NULL,
+    .schedule = host_sched_schedule,
+    .yield = host_sched_yield,
+    .admit = host_sched_admit,
+    .remap = NULL,
+    .dvfs=NULL
+};
+
+static int create_host_scheduler()
+{
+       v3_register_scheduler(&host_sched_impl);
+       return 0;
+}
diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c

index d6471a1..c74607e 100644 (file)
--- a/palacios/src/palacios/vmx.c
+++ b/palacios/src/palacios/vmx.c
@@ -966,7 +966,7 @@ int v3_vmx_enter(struct guest_info * info) {
     uint64_t guest_cycles = 0;
 
     // Conditionally yield the CPU if the timeslice has expired
-    v3_yield_cond(info,-1);
+    v3_schedule(info);
 
     // Update timer devices late after being in the VM so that as much 
     // of the time in the VM is accounted for as possible. Also do it before
@@ -1135,7 +1135,7 @@ int v3_vmx_enter(struct guest_info * info) {
     v3_enable_ints();
 
     // Conditionally yield the CPU if the timeslice has expired
-    v3_yield_cond(info,-1);
+    v3_schedule(info);
     v3_advance_time(info, NULL);
     v3_update_timers(info);
palacios/include/palacios/vm_guest.h		patch \| blob \| history
palacios/include/palacios/vmm_scheduler.h	[new file with mode: 0644]	patch \| blob
palacios/src/extensions/Kconfig		patch \| blob \| history
palacios/src/extensions/Makefile		patch \| blob \| history
palacios/src/extensions/ext_sched_edf.c	[new file with mode: 0644]	patch \| blob
palacios/src/palacios/Makefile		patch \| blob \| history
palacios/src/palacios/svm.c		patch \| blob \| history
palacios/src/palacios/vmm.c		patch \| blob \| history
palacios/src/palacios/vmm_scheduler.c	[new file with mode: 0644]	patch \| blob
palacios/src/palacios/vmx.c		patch \| blob \| history