From: Oscar Mondragon Date: Wed, 6 Feb 2013 17:50:52 +0000 (-0700) Subject: Initial commit of new scheduling infrastructure. The EDF scheduler does not X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=df510a069624b48dda1ad66368954f028757b1bc;p=palacios.releases.git Initial commit of new scheduling infrastructure. The EDF scheduler does not yet work, Linux/Kitten hooks still need to be added for sleep/wakeup, and we still need a mechanism to specify options to Palacios when it starts up. it is booted. --- diff --git a/palacios/include/palacios/vm_guest.h b/palacios/include/palacios/vm_guest.h index 5e18244..66ac526 100644 --- a/palacios/include/palacios/vm_guest.h +++ b/palacios/include/palacios/vm_guest.h @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -73,6 +74,7 @@ struct guest_info { struct vm_core_time time_state; struct v3_core_timeouts timeouts; + void * sched_priv_data; v3_paging_mode_t shdw_pg_mode; struct v3_shdw_pg_state shdw_pg_state; @@ -159,6 +161,7 @@ struct v3_vm_info { struct v3_mem_hooks mem_hooks; struct v3_shdw_impl_state shdw_impl; + void * sched_priv_data; struct v3_io_map io_map; struct v3_msr_map msr_map; @@ -207,6 +210,8 @@ struct v3_vm_info { int num_cores; + int avail_cores; // Available logical cores + // JRL: This MUST be the last entry... struct guest_info cores[0]; }; diff --git a/palacios/include/palacios/vmm_scheduler.h b/palacios/include/palacios/vmm_scheduler.h new file mode 100644 index 0000000..efe47fa --- /dev/null +++ b/palacios/include/palacios/vmm_scheduler.h @@ -0,0 +1,64 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Oscar Mondragon + * Patrick G. Bridges + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#ifndef __VMM_SCHEDULER_H__ +#define __VMM_SCHEDULER_H__ + +struct vm_scheduler_impl { + char *name; + int (*init)(); + int (*deinit)(); + int (*vm_init)(struct v3_vm_info *vm); + int (*vm_deinit)(struct v3_vm_info *vm); + int (*core_init)(struct guest_info *vm); + int (*core_deinit)(struct guest_info *vm); + void (*schedule)(struct guest_info *vm); + void (*yield)(struct guest_info *vm, int usec); + int (*admit)(struct v3_vm_info *vm); + int (*remap)(struct v3_vm_info *vm); + int (*dvfs)(struct v3_vm_info *vm); +}; + +struct vm_sched_state { + struct vm_scheduler *sched; + void *priv_data; +}; + +struct vm_core_sched_state { + struct vm_scheduler *sched; + void *priv_data; +}; + +void v3_schedule(struct guest_info *core); +void v3_yield(struct guest_info *core, int usec); + +int v3_scheduler_register_vm(struct v3_vm_info *vm); +int v3_scheduler_register_core(struct guest_info *vm); /* ? */ +int v3_scheduler_admit_vm(struct v3_vm_info *vm); + +void v3_scheduler_remap_notify(struct v3_vm_info *vm); +void v3_scheduler_dvfs_notify(struct v3_vm_info *vm); + +int V3_init_scheduling(); +int v3_register_scheduler(struct vm_scheduler_impl *vm); +struct vm_scheduler_impl *v3_scheduler_lookup(char *name); +int V3_enable_scheduler(); + +#endif /* __VMM_SCHEDULER_H__ */ diff --git a/palacios/src/extensions/Kconfig b/palacios/src/extensions/Kconfig index 6b497b9..a8beae6 100644 --- a/palacios/src/extensions/Kconfig +++ b/palacios/src/extensions/Kconfig @@ -31,4 +31,10 @@ config EXT_VMWARE help Provides a VMWare persona to allow TSC calibration +config EXT_SCHED_EDF + bool "EDF Real-time Scheduler" + default n + help + Provides a full real-time EDF scheduler for VM cores + endmenu diff --git a/palacios/src/extensions/Makefile b/palacios/src/extensions/Makefile index 24d68c3..7f4b5cb 100644 --- a/palacios/src/extensions/Makefile +++ b/palacios/src/extensions/Makefile @@ -5,3 +5,4 @@ obj-$(V3_CONFIG_EXT_VTIME) += ext_vtime.o obj-$(V3_CONFIG_EXT_INSPECTOR) += ext_inspector.o obj-$(V3_CONFIG_EXT_MACH_CHECK) += ext_mcheck.o obj-$(V3_CONFIG_EXT_VMWARE) += ext_vmware.o +obj-$(V3_CONFIG_EXT_SCHED_EDF) += ext_sched_edf.o diff --git a/palacios/src/extensions/ext_sched_edf.c b/palacios/src/extensions/ext_sched_edf.c new file mode 100644 index 0000000..0c111f5 --- /dev/null +++ b/palacios/src/extensions/ext_sched_edf.c @@ -0,0 +1,694 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2012, The V3VEE Project + * All rights reserved. + * + * Author: Oscar Mondragon + * Patrick G. Bridges + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + + +#include +#include +#include +#include +#include +#include +#include + + + +#ifndef V3_CONFIG_DEBUG_EDF_SCHED +#undef PrintDebug +#define PrintDebug(fmt, args...) +#endif + +/* Overview + * + * EDF Scheduling + * + * The EDF scheduler uses a dynamic calculated priority as scheduling criteria to choose + * what thread will be scheduled.That priority is calculated according with the relative + * deadline of the threads that are ready to run in the runqueue. This runqueue is a per-logical + * core data structure used to keep the runnable virtual cores (threads) allocated to that + * logical core.The threads with less time before its deadline will receive better priorities. + * The runqueue is sorted each time that a vCPU becomes runnable. At that time the vCPU is + * enqueue and a new scheduling decision is taken. Each time a vCPU is scheduled, the parameter + * slice used time is set to zero and the current deadline is calculated using its period. Once + * the vCPU uses the logical core for slice seconds, that vCPU sleeps until its next scheduling + * period (when is re-inserted in the runqueue) and yields the CPU to allow the scheduling + * of the vCPU with best priority in the runqueue. + */ + +// Default configuration values for the EDF Scheduler +// time parameters in microseconds + +#define MAX_PERIOD 1000000000 +#define MIN_PERIOD 50000 +#define MAX_SLICE 1000000000 +#define MIN_SLICE 10000 +#define CPU_PERCENT 100 + + +/* + * init_edf_config: Initialize scheduler configuration + */ + +static void +init_edf_config(struct vm_edf_sched_config *edf_config){ + + edf_config->min_slice = MIN_SLICE; + edf_config->max_slice = MAX_SLICE; + edf_config->min_period = MIN_PERIOD; + edf_config->max_period = MAX_PERIOD; + edf_config->cpu_percent = CPU_PERCENT; +} + + +/* + * edf_sched_init: Initialize the run queue + */ + +int +edf_sched_init(struct v3_vm_info *vm){ + + PrintDebug(vm, VCORE_NONE,"EDF Sched. Initializing vm %s\n", vm->name); + + struct vm_sched_state *sched_state = &vm->sched; + sched_state->priv_data = V3_Malloc( vm->avail_cores * sizeof(struct vm_edf_rq)); + + if (!sched_state->priv_data) { + PrintError(vm, VCORE_NONE,"Cannot allocate in priv_data in edf_sched_init\n"); + return -1; + } + + int lcore = 0; + + PrintDebug(vm, VCORE_NONE,"EDF Sched. edf_sched_init. Available cores %d\n", vm->avail_cores); + + for(lcore = 0; lcore < vm->avail_cores ; lcore++){ + + PrintDebug(vm, VCORE_NONE,"EDF Sched. edf_sched_init. Initializing logical core %d\n", lcore); + + struct vm_edf_rq * edf_rq_list = (struct vm_edf_rq *) sched_state->priv_data; + struct vm_edf_rq * edf_rq = &edf_rq_list[lcore]; + + edf_rq->vCPUs_tree = RB_ROOT; + edf_rq->cpu_u=0; + edf_rq->nr_vCPU=0; + edf_rq->curr_vCPU=NULL; + edf_rq->rb_leftmost=NULL; + edf_rq->last_sched_time=0; + init_edf_config(&edf_rq->edf_config); + + } + + return 0; + +} + + +/* + * is_admissible_core: Decides if a core is admited to the red black tree according with + * the admisibility formula. + */ + +static bool +is_admissible_core(struct vm_core_edf_sched * new_sched_core, struct vm_edf_rq *runqueue){ + + int curr_utilization = runqueue->cpu_u; + int new_utilization = curr_utilization + (100 * new_sched_core->slice / new_sched_core->period); + int cpu_percent = (runqueue->edf_config).cpu_percent; + + if (new_utilization <= cpu_percent) + return true; + else + return false; + +} + + +/* + * count_cores: Function useful to count the number of cores in a runqueue (Not used for now) + * + */ + + +/*static int count_cores(struct vm_edf_rq *runqueue){ + + struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree); + struct vm_core_edf_sched *curr_core; + int number_cores = 0; + + while(node){ + + curr_core = container_of(node, struct vm_core_edf_sched, node); + node = v3_rb_next(node); + number_cores++; + } + + return number_cores; +}*/ + + + +/* + * insert_core_edf: Finds a place in the tree for a newly activated core, adds the node + * and rebalaces the tree + */ + +static bool +insert_core_edf(struct vm_core_edf_sched *core, struct vm_edf_rq *runqueue){ + + struct rb_node **new_core = &(runqueue->vCPUs_tree.rb_node); + struct rb_node *parent = NULL; + struct vm_core_edf_sched *curr_core; + + // Find out place in the tree for the new core + while (*new_core) { + + curr_core = container_of(*new_core, struct vm_core_edf_sched, node); + parent = *new_core; + + if (core->current_deadline < curr_core->current_deadline) + new_core = &((*new_core)->rb_left); + else if (core->current_deadline > curr_core->current_deadline) + new_core = &((*new_core)->rb_right); + else // Is Possible to have same current deadlines in both cores! + return false; + } + // Add new node and rebalance tree. + rb_link_node(&core->node, parent, new_core); + v3_rb_insert_color(&core->node, &runqueue->vCPUs_tree); + + return true; + } + + +/* + * get_curr_host_time: Calculates the current host time (microseconds) + */ + +static uint64_t +get_curr_host_time(struct vm_core_time *core_time){ + + uint64_t cur_cycle = v3_get_host_time(core_time); + uint64_t cpu_khz = core_time->host_cpu_freq; + uint64_t curr_time_us = 1000 * cur_cycle / cpu_khz; + + return curr_time_us; + +} + + +/* + * next_start_period: Given the current host time and the period of a given vCPU, + * calculates the time in which its next period starts. + * + */ + +static uint64_t +next_start_period(uint64_t curr_time_us, uint64_t period_us){ + + uint64_t time_period_us = curr_time_us % period_us; + uint64_t remaining_time_us = period_us - time_period_us; + uint64_t next_start_us = curr_time_us + remaining_time_us; + + return next_start_us; + +} + +/* + * get_runqueue: Get the runqueue assigned to a virtual core. + */ + +struct vm_edf_rq * get_runqueue(struct guest_info *info){ + + struct vm_edf_rq *runqueue_list = (struct vm_edf_rq *) info->vm_info->sched.priv_data; + struct vm_edf_rq *runqueue = &runqueue_list[info->pcpu_id]; + return runqueue; +} + + +/* + * wakeup_core: Wakeup a given vCPU thread + */ + +static void +wakeup_core(struct guest_info *info){ + + struct vm_core_edf_sched *core = info->core_sched.priv_data; + struct vm_edf_rq *runqueue = get_runqueue(info); + + if (!info->core_thread) { + PrintError(info->vm_info, info,"ERROR: Tried to wakeup non-existent core thread vCPU_id %d \n",info->vcpu_id); + } + else { + + PrintDebug(info->vm_info, info,"EDF Sched. run_next_core. vcpu_id %d, logical id %d, Total time %llu, Miss_deadlines %d, slice_overuses %d extra_time %llu, thread (%p)\n", + core->info->vcpu_id, + core->info->pcpu_id, + core->total_time, + core->miss_deadline, + core->slice_overuse, + core->extra_time_given, + (struct task_struct *)info->core_thread); + + V3_Wakeup(info->core_thread); + core->last_wakeup_time = get_curr_host_time(&core->info->time_state); + runqueue->curr_vCPU = core; + + } + +} + + +/* + * activate_core - Moves a core to the red-black tree. + * used time is set to zero and current deadline is calculated + */ + +static void +activate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){ + + if (is_admissible_core(core, runqueue)){ + + uint64_t curr_time_us = get_curr_host_time(&core->info->time_state); + uint64_t curr_deadline = next_start_period(curr_time_us, core->period); + + core->current_deadline = curr_deadline; + core->used_time=0; + core->remaining_time=core->slice; + + bool ins = insert_core_edf(core, runqueue); + /* + * If not inserted is possible that there is other core with the same deadline. + * Then, the deadline is modified and try again + */ + while(!ins){ + core->current_deadline ++; + ins = insert_core_edf(core, runqueue); + } + + runqueue->cpu_u += 100 * core->slice / core->period; + runqueue->nr_vCPU ++; + + /* + * If this is the first time to be activated pick first earliest deadline core to wakeup. + */ + + if(core->last_wakeup_time == 0){ + + struct vm_core_edf_sched *next_core; + + /* + * Pick first earliest deadline core + */ + struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree); + next_core = container_of(node, struct vm_core_edf_sched, node); + + // Wakeup next_core + wakeup_core(next_core->info); + + //Sleep old core + + V3_Sleep(0); + } + + } + else + PrintError(core->info->vm_info, core->info,"EDF Sched. activate_core. CPU cannot activate the core. It is not admissible"); +} + + +/* + * edf_sched_core_init: Initializes per core data structure and + * calls activate function. + */ + +int +edf_sched_core_init(struct guest_info * info){ + + struct vm_edf_rq *runqueue = get_runqueue(info); + struct vm_core_edf_sched *core_edf; + + PrintDebug(info->vm_info, info,"EDF Sched. Initializing vcore %d\n", info->vcpu_id); + + core_edf = (struct vm_core_edf_sched *) V3_Malloc(sizeof (struct vm_core_edf_sched)); + if (!core_edf) { + PrintError(info->vm_info, info,"Cannot allocate private_data in edf_sched_core_init\n"); + return -1; + } + info->core_sched.priv_data = core_edf; + + // Default configuration if not specified in configuration file + + core_edf->info = info; + core_edf->period = 500000; + core_edf->slice = 50000; + core_edf->used_time = 0; + core_edf->last_wakeup_time = 0; + core_edf->remaining_time = core_edf->slice; + core_edf->miss_deadline = 0; + core_edf->extra_time = true; + core_edf->total_time = 0; + core_edf->slice_overuse = 0; + core_edf->extra_time_given = 0; + + v3_cfg_tree_t * cfg_tree = core_edf->info->vm_info->cfg_data->cfg; + v3_cfg_tree_t * core = v3_cfg_subtree(v3_cfg_subtree(cfg_tree, "cores"), "core"); + + while (core){ + char *id = v3_cfg_val(core, "vcpu_id"); + char *period = v3_cfg_val(core, "period"); + char *slice = v3_cfg_val(core, "slice"); + char *extra_time = v3_cfg_val(core, "extra_time"); + + if (atoi(id) == core_edf->info->vcpu_id){ + + core_edf->period = atoi(period); + core_edf->slice = atoi(slice); + core_edf->remaining_time = core_edf->slice; + if (strcasecmp(extra_time, "true") == 0) + core_edf->extra_time = true; + else + core_edf->extra_time = false; + break; + } + core = v3_cfg_next_branch(core); + } + + activate_core(core_edf,runqueue); + return 0; +} + +/* + * search_core_edf: Searches a core in the red-black tree by using its vcpu_id + */ +static struct vm_core_edf_sched * +search_core_edf(struct vm_core_edf_sched *core_edf, struct vm_edf_rq *runqueue){ + + struct rb_node *node = runqueue->vCPUs_tree.rb_node; + + while (node) { + + struct vm_core_edf_sched *core = container_of(node, struct vm_core_edf_sched, node); + + if (core_edf->current_deadline < core->current_deadline) + node = node->rb_left; + else if (core_edf->current_deadline > core->current_deadline) + node = node->rb_right; + else + if(core->info->vcpu_id == core_edf->info->vcpu_id){ + return core; + } + } + return NULL; +} + + +/* + * delete_core_edf: Deletes a core from the red black tree, generally when it has + * consumed its time slice within the current period. + */ + +static bool +delete_core_edf( struct vm_core_edf_sched *core_edf , struct vm_edf_rq *runqueue){ + + struct vm_core_edf_sched *core = search_core_edf(core_edf, runqueue); + if (core){ + + v3_rb_erase(&core->node, &runqueue->vCPUs_tree); + return true; + } + else{ + PrintError(core->info->vm_info, core->info,"EDF Sched. delete_core_edf.Attempted to erase unexisting core"); + return false; + } +} + + +/* + * deactivate_core - Removes a core from the red-black tree. + */ + +static void +deactivate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){ + + if(delete_core_edf(core, runqueue)){ + runqueue->cpu_u -= 100 * core->slice / core->period; + runqueue->nr_vCPU -- ; + } +} + + +/* + * pick_next_core: Returns the next core to be scheduled from the red black tree + */ + +static struct vm_core_edf_sched * +pick_next_core(struct vm_edf_rq *runqueue){ + + + /* + * Pick first earliest deadline core + */ + struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree); + struct vm_core_edf_sched *next_core = container_of(node, struct vm_core_edf_sched, node); + + /* + * Verify if the earliest deadline core has used its complete slice and return it if not + */ + + if (next_core->used_time < next_core->slice){ + if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state)) + next_core->miss_deadline++; + return next_core; + } + /* + * If slice used, pick the next core that has not used its complete slice + */ + + else { + while(next_core->used_time >= next_core->slice){ + + if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state) || !next_core->extra_time ){ + + deactivate_core(next_core,runqueue); + activate_core(next_core,runqueue); + + } + + node = v3_rb_next(node); + if(node){ + next_core = container_of(node, struct vm_core_edf_sched, node); + } + else{ + node = v3_rb_first(&runqueue->vCPUs_tree); // If all cores have used its slice return the first one + return container_of(node, struct vm_core_edf_sched, node); + } + + } + } + + return next_core; +} + + +static void +adjust_slice(struct guest_info * info, int used_time, int extra_time) +{ + struct vm_core_edf_sched *core = info->core_sched.priv_data; + struct vm_edf_rq *runqueue = get_runqueue(info); + + core->used_time = used_time; + + if (extra_time >= 0) { + core->used_time += extra_time; + } + + if( core->used_time >= core->slice){ + deactivate_core(core,runqueue); + activate_core(core,runqueue); + } +} + + +/* + * run_next_core: Pick next core to be scheduled and wakeup it + */ + +static void +run_next_core(struct guest_info *info, int used_time, int usec) +{ + struct vm_core_edf_sched *core = info->core_sched.priv_data; + struct vm_core_edf_sched *next_core; + struct vm_edf_rq *runqueue = get_runqueue(info); + + /* The next core to be scheduled is choosen from the tree (Function pick_next_core). + * The selected core is the one with the earliest deadline and with available time + * to use within the current period (used_time < slice) + */ + + next_core = pick_next_core(runqueue); // Pick next core to schedule + + if (core != next_core){ + + // Wakeup next_core + wakeup_core(next_core->info); + core->total_time += used_time; + + if (used_time > core->slice){ + core->slice_overuse++; + core->extra_time_given += (used_time - core->slice); + } + + // Sleep old core + + V3_Sleep(usec); + + } +} + + +/* + * edf_schedule: Scheduling function + */ + +static void +edf_schedule(struct guest_info * info, int usec){ + + uint64_t host_time = get_curr_host_time(&info->time_state); + struct vm_edf_rq *runqueue = get_runqueue(info); + struct vm_core_edf_sched *core = (struct vm_core_edf_sched *) info->core_sched.priv_data; + + uint64_t used_time = 0; + if(core->last_wakeup_time != 0) + used_time = host_time - core->last_wakeup_time; + + if(usec == 0) runqueue->last_sched_time = host_time; // Called from edf_sched_scheduled + adjust_slice(core->info, host_time - core->last_wakeup_time, usec); + + run_next_core(core->info,used_time, usec); + return; + +} + +/* + * edf_sched_schedule: Main scheduling function. Computes amount of time in period left, + * recomputing the current core's deadline if it has expired, then runs + * scheduler + * It is called in the following cases: + * A vCPU becomes runnable + * The slice of the current vCPU was used + * The period of a vCPU in the runqueue starts + * Other case?? + * TODO Something to do with extra time? + * TODO Check the use of remaining_time + */ + +void +edf_sched_schedule(struct guest_info * info){ + + edf_schedule(info, 0); + return; +} + +/* + * edf_sched_yield: Called when yielding the logical cpu for usec is needed + */ + +void +edf_sched_yield(struct guest_info * info, int usec){ + + edf_schedule(info, usec); + return; + +} + +/* + * edf_sched_deinit: Frees edf scheduler data structures + */ + + +int +edf_sched_deinit(struct v3_vm_info *vm) +{ + + struct vm_scheduler * sched = vm->sched.sched; + void *priv_data = vm->sched.priv_data; + + if (sched) + V3_Free(sched); + + if (priv_data) + V3_Free(priv_data); + + return 0; + +} + +/* + * edf_sched_deinit: Frees virtual core data structures + */ + +int +edf_sched_core_deinit(struct guest_info *core) +{ + + struct vm_scheduler * sched = core->core_sched.sched; + void *priv_data = core->core_sched.priv_data; + + if (sched) + V3_Free(sched); + + if (priv_data) + V3_Free(priv_data); + + return 0; +} + +static struct vm_scheduler_impl edf_sched = { + .name = "edf", + .init = edf_sched_init, + .deinit = edf_sched_deinit, + .core_init = edf_sched_core_init, + .core_deinit = edf_sched_core_deinit, + .schedule = edf_sched_schedule, + .yield = edf_sched_yield +}; + +static int +ext_sched_edf_init() { + + PrintDebug(VM_NONE, VCORE_NONE,"Sched. Creating (%s) scheduler\n",edf_sched.name); + return v3_register_scheduler(&edf_sched); +} + +static int +ext_sched_edf_vm_init() { + return 0; +} + +static struct v3_extension_impl sched_edf_impl = { + .name = "EDF Scheduler", + .init = ext_sched_edf_init, + .vm_init = ext_sched_edf_vm_init, + .vm_deinit = NULL, + .core_init = NULL, + .core_deinit = NULL, + .on_entry = NULL, + .on_exit = NULL +}; + +register_extension(&sched_edf_impl); diff --git a/palacios/src/palacios/Makefile b/palacios/src/palacios/Makefile index a26f8df..87e7951 100644 --- a/palacios/src/palacios/Makefile +++ b/palacios/src/palacios/Makefile @@ -24,6 +24,7 @@ obj-y := \ vmm_queue.o \ vmm_rbtree.o \ vmm_ringbuffer.o \ + vmm_scheduler.o \ vmm_shadow_paging.o \ vmm_sprintf.o \ vmm_string.o \ diff --git a/palacios/src/palacios/svm.c b/palacios/src/palacios/svm.c index f60acda..448e5af 100644 --- a/palacios/src/palacios/svm.c +++ b/palacios/src/palacios/svm.c @@ -616,7 +616,7 @@ int v3_svm_enter(struct guest_info * info) { uint64_t guest_cycles = 0; // Conditionally yield the CPU if the timeslice has expired - v3_yield_cond(info,-1); + v3_schedule(info); // Update timer devices after being in the VM before doing // IRQ updates, so that any interrupts they raise get seen @@ -761,7 +761,7 @@ int v3_svm_enter(struct guest_info * info) { v3_stgi(); // Conditionally yield the CPU if the timeslice has expired - v3_yield_cond(info,-1); + v3_schedule(info); // This update timers is for time-dependent handlers // if we're slaved to host time diff --git a/palacios/src/palacios/vmm.c b/palacios/src/palacios/vmm.c index 148b86e..53a1508 100644 --- a/palacios/src/palacios/vmm.c +++ b/palacios/src/palacios/vmm.c @@ -123,9 +123,15 @@ void Init_V3(struct v3_os_hooks * hooks, char * cpu_mask, int num_cpus) { // Register all shadow paging handlers V3_init_shdw_paging(); + // Initialize the scheduler framework (must be before extensions) + V3_init_scheduling(); + // Register all extensions V3_init_extensions(); + // Enabling scheduler + V3_enable_scheduler(); + #ifdef V3_CONFIG_SYMMOD V3_init_symmod(); @@ -208,6 +214,15 @@ struct v3_vm_info * v3_create_vm(void * cfg, void * priv_data, char * name) { memset(vm->name, 0, 128); strncpy(vm->name, name, 127); + /* + * Creates scheduling hash table and register default scheduler (host scheduler) + */ + + //if(v3_scheduler_register_vm(vm) != -1) { + + // PrintError(vm, VCORE_NONE,"Error registering VM with scheduler\n"); + // } + return vm; } @@ -218,6 +233,9 @@ static int start_core(void * p) { struct guest_info * core = (struct guest_info *)p; + if (v3_scheduler_register_core(core) == -1){ + PrintError(core->vm_info, core,"Error initializing scheduling in core %d\n", core->vcpu_id); + } PrintDebug(core->vm_info,core,"virtual core %u (on logical core %u): in start_core (RIP=%p)\n", core->vcpu_id, core->pcpu_id, (void *)(addr_t)core->rip); @@ -298,10 +316,10 @@ int v3_start_vm(struct v3_vm_info * vm, unsigned int cpu_mask) { } - if (vm->num_cores > avail_cores) { - PrintError(vm, VCORE_NONE, "Attempted to start a VM with too many cores (vm->num_cores = %d, avail_cores = %d, MAX=%d)\n", - vm->num_cores, avail_cores, MAX_CORES); - return -1; + vm->avail_cores = avail_cores; + + if (v3_scheduler_admit_vm(vm) != 0){ + PrintError(vm, VCORE_NONE,"Error admitting VM %s for scheduling", vm->name); } vm->run_state = VM_RUNNING; @@ -780,53 +798,6 @@ v3_cpu_mode_t v3_get_host_cpu_mode() { #endif - - - - -void v3_yield_cond(struct guest_info * info, int usec) { - uint64_t cur_cycle; - cur_cycle = v3_get_host_time(&info->time_state); - - if (cur_cycle > (info->yield_start_cycle + info->vm_info->yield_cycle_period)) { - //PrintDebug(info->vm_info, info, "Conditional Yield (cur_cyle=%p, start_cycle=%p, period=%p)\n", - // (void *)cur_cycle, (void *)info->yield_start_cycle, - // (void *)info->yield_cycle_period); - - if (usec < 0) { - V3_Yield(); - } else { - V3_Sleep(usec); - } - - info->yield_start_cycle += info->vm_info->yield_cycle_period; - } -} - - -/* - * unconditional cpu yield - * if the yielding thread is a guest context, the guest quantum is reset on resumption - * Non guest context threads should call this function with a NULL argument - * - * usec <0 => the non-timed yield is used - * usec >=0 => the timed yield is used, which also usually implies interruptible - */ -void v3_yield(struct guest_info * info, int usec) { - if (usec < 0) { - V3_Yield(); - } else { - V3_Sleep(usec); - } - - if (info) { - info->yield_start_cycle += info->vm_info->yield_cycle_period; - } -} - - - - void v3_print_cond(const char * fmt, ...) { if (v3_dbg_enable == 1) { char buf[2048]; diff --git a/palacios/src/palacios/vmm_scheduler.c b/palacios/src/palacios/vmm_scheduler.c new file mode 100644 index 0000000..36286cf --- /dev/null +++ b/palacios/src/palacios/vmm_scheduler.c @@ -0,0 +1,239 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Oscar Mondragon + * Patrick G. Bridges + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#include +#include +#include +#include + +#ifndef V3_CONFIG_DEBUG_SCHEDULER +#undef PrintDebug +#define PrintDebug(fmt, args...) +#endif + +static char default_strategy[] = "host"; +static struct hashtable * master_scheduler_table = NULL; +static int create_host_scheduler(); + +static struct vm_scheduler_impl *scheduler = NULL; + +static uint_t scheduler_hash_fn(addr_t key) { + char * name = (char *)key; + return v3_hash_buffer((uint8_t *)name, strlen(name)); +} + +static int scheduler_eq_fn(addr_t key1, addr_t key2) { + char * name1 = (char *)key1; + char * name2 = (char *)key2; + + return (strcmp(name1, name2) == 0); +} + +int V3_init_scheduling() { + + PrintDebug(VM_NONE, VCORE_NONE,"Initializing scheduler"); + + master_scheduler_table = v3_create_htable(0, scheduler_hash_fn, scheduler_eq_fn); + return create_host_scheduler(); +} + + +int v3_register_scheduler(struct vm_scheduler_impl *s) { + + PrintDebug(VM_NONE, VCORE_NONE,"Registering Scheduler (%s)\n", s->name); + + if (v3_htable_search(master_scheduler_table, (addr_t)(s->name))) { + PrintError(VM_NONE, VCORE_NONE, "Multiple instances of scheduler (%s)\n", s->name); + return -1; + } + PrintDebug(VM_NONE, VCORE_NONE,"Registering Scheduler (%s) 2\n", s->name); + + + if (v3_htable_insert(master_scheduler_table, + (addr_t)(s->name), + (addr_t)(s)) == 0) { + PrintError(VM_NONE, VCORE_NONE, "Could not register scheduler (%s)\n", s->name); + return -1; + } + + PrintDebug(VM_NONE, VCORE_NONE,"Scheduler registered\n"); + return 0; +} + +struct vm_scheduler_impl *v3_scheduler_lookup(char *name) +{ + return (struct vm_scheduler_impl *)v3_htable_search(master_scheduler_table, (addr_t)(name)); +} + +int V3_enable_scheduler() { + /* XXX Lookup the specified scheduler to use for palacios and use it */ + scheduler = v3_scheduler_lookup(default_strategy); + if (!scheduler) { + PrintError(VM_NONE, VCORE_NONE,"Specified Palacios scheduler \"%s\" not found.\n", default_strategy); + return -1; + } + if (scheduler->init) { + return scheduler->init(); + } else { + return 0; + } +} + +int v3_scheduler_register_vm(struct v3_vm_info *vm) { + if (scheduler->vm_init) { + return scheduler->vm_init(vm); + } else { + return 0; + } +} +int v3_scheduler_register_core(struct guest_info *core) { + if (scheduler->core_init) { + return scheduler->core_init(core); + } else { + return 0; + } +} +int v3_scheduler_admit_vm(struct v3_vm_info *vm) { + if (scheduler->admit) { + return scheduler->admit(vm); + } else { + return 0; + } +} +int v3_scheduler_notify_remap(struct v3_vm_info *vm) { + if (scheduler->remap) { + return scheduler->remap(vm); + } else { + return 0; + } +} +int v3_scheduler_notify_dvfs(struct v3_vm_info *vm) { + if (scheduler->dvfs) { + return scheduler->dvfs(vm); + } else { + return 0; + } +} +void v3_schedule(struct guest_info *core) { + if (scheduler->schedule) { + scheduler->schedule(core); + } + return; +} +void v3_yield(struct guest_info *core, int usec) { + if (scheduler->yield) { + scheduler->yield(core, usec); + } + return; +} + +int host_sched_vm_init(struct v3_vm_info *vm) +{ + + PrintDebug(vm, VCORE_NONE,"Sched. host_sched_init\n"); + + char * schedule_hz_str = v3_cfg_val(vm->cfg_data->cfg, "schedule_hz"); + uint32_t sched_hz = 100; + + + if (schedule_hz_str) { + sched_hz = atoi(schedule_hz_str); + } + + PrintDebug(vm, VCORE_NONE,"CPU_KHZ = %d, schedule_freq=%p\n", V3_CPU_KHZ(), + (void *)(addr_t)sched_hz); + + uint64_t yield_cycle_period = (V3_CPU_KHZ() * 1000) / sched_hz; + vm->sched_priv_data = (void *)yield_cycle_period; + + return 0; +} + +int host_sched_core_init(struct guest_info *core) +{ + PrintDebug(core->vm_info, core,"Sched. host_sched_core_init\n"); + + uint64_t t = v3_get_host_time(&core->time_state); + core->sched_priv_data = (void *)t; + + return 0; +} + +void host_sched_schedule(struct guest_info *core) +{ + uint64_t cur_cycle; + cur_cycle = v3_get_host_time(&core->time_state); + + if (cur_cycle > ( (uint64_t)core->sched_priv_data + (uint64_t)core->vm_info->sched_priv_data)) { + + V3_Yield(); + + uint64_t yield_start_cycle = (uint64_t) core->sched_priv_data; + yield_start_cycle += (uint64_t)core->vm_info->sched_priv_data; + core->sched_priv_data = (void *)yield_start_cycle; + + } +} + +/* + * unconditional cpu yield + * if the yielding thread is a guest context, the guest quantum is reset on resumption + * Non guest context threads should call this function with a NULL argument + * + * usec <0 => the non-timed yield is used + * usec >=0 => the timed yield is used, which also usually implies interruptible + */ +void host_sched_yield(struct guest_info * core, int usec) { + uint64_t yield_start_cycle; + if (usec < 0) { + V3_Yield(); + } else { + V3_Sleep(usec); + } + yield_start_cycle = (uint64_t) core->sched_priv_data + + (uint64_t)core->vm_info->sched_priv_data; + core->sched_priv_data = (void *)yield_start_cycle; +} + + +int host_sched_admit(struct v3_vm_info *vm){ + return 0; +} + +static struct vm_scheduler_impl host_sched_impl = { + .name = "host", + .init = NULL, + .deinit = NULL, + .vm_init = host_sched_vm_init, + .vm_deinit = NULL, + .core_init = host_sched_core_init, + .core_deinit = NULL, + .schedule = host_sched_schedule, + .yield = host_sched_yield, + .admit = host_sched_admit, + .remap = NULL, + .dvfs=NULL +}; + +static int create_host_scheduler() +{ + v3_register_scheduler(&host_sched_impl); + return 0; +} diff --git a/palacios/src/palacios/vmx.c b/palacios/src/palacios/vmx.c index d6471a1..c74607e 100644 --- a/palacios/src/palacios/vmx.c +++ b/palacios/src/palacios/vmx.c @@ -966,7 +966,7 @@ int v3_vmx_enter(struct guest_info * info) { uint64_t guest_cycles = 0; // Conditionally yield the CPU if the timeslice has expired - v3_yield_cond(info,-1); + v3_schedule(info); // Update timer devices late after being in the VM so that as much // of the time in the VM is accounted for as possible. Also do it before @@ -1135,7 +1135,7 @@ int v3_vmx_enter(struct guest_info * info) { v3_enable_ints(); // Conditionally yield the CPU if the timeslice has expired - v3_yield_cond(info,-1); + v3_schedule(info); v3_advance_time(info, NULL); v3_update_timers(info);