2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2013, Oscar Mondragon <omondrag@cs.unm.edu>
11 * Copyright (c) 2013, Patrick G. Bridges <bridges@cs.unm.edu>
12 * Copyright (c) 2013, The V3VEE Project <http://www.v3vee.org>
13 * All rights reserved.
15 * Author: Oscar Mondragon <omondrag@cs.unm.edu>
16 * Patrick G. Bridges <bridges@cs.unm.edu>
18 * This is free software. You are permitted to use,
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
23 #include <palacios/vmm.h>
24 #include <palacios/vmm_time.h>
25 #include <palacios/vm_guest.h>
26 #include <palacios/vmm_hashtable.h>
27 #include <palacios/vmm_config.h>
28 #include <palacios/vmm_extensions.h>
29 #include <palacios/vmm_rbtree.h>
32 #ifndef V3_CONFIG_DEBUG_EXT_SCHED_EDF
34 #define PrintDebug(fmt, args...)
41 * The EDF scheduler uses a dynamic calculated priority as scheduling criteria to choose
42 * what thread will be scheduled.That priority is calculated according with the relative
43 * deadline of the threads that are ready to run in the runqueue. This runqueue is a per-logical
44 * core data structure used to keep the runnable virtual cores (threads) allocated to that
45 * logical core.The threads with less time before its deadline will receive better priorities.
46 * The runqueue is sorted each time that a vCPU becomes runnable. At that time the vCPU is
47 * enqueue and a new scheduling decision is taken. Each time a vCPU is scheduled, the parameter
48 * slice used time is set to zero and the current deadline is calculated using its period. Once
49 * the vCPU uses the logical core for slice seconds, that vCPU sleeps until its next scheduling
50 * period (when is re-inserted in the runqueue) and yields the CPU to allow the scheduling
51 * of the vCPU with best priority in the runqueue.
54 // Default configuration values for the EDF Scheduler
55 // time parameters in microseconds
57 #define MAX_PERIOD 1000000000
58 #define MIN_PERIOD 50000
59 #define MAX_SLICE 1000000000
60 #define MIN_SLICE 10000
61 #define CPU_PERCENT 100
62 typedef uint64_t time_us;
65 * Per-core EDF Scheduling information
68 struct vm_core_edf_sched {
69 struct guest_info *info; // Core struct
70 struct rb_node node; // red-black tree node
71 time_us period; // Amount of time (us) during which the core may received a CPU allocation
72 time_us slice; // Minimum amount of time (us) received for the core during each period
73 time_us current_deadline; // Time (us) at which current core period ends
74 time_us used_time; // Amount of time (us) of the slice used whiting the current period
75 time_us last_wakeup_time; // Time at which the last wakeup started for this core
76 time_us remaining_time; // Remaining time (us) before current core period ends (before current deadline)
77 bool extra_time; // Specifies if the virtual core is eligible to receive extra CPU time
78 int miss_deadline; // Number of times the core has missed its deadline
79 time_us total_time; // Total scheduled time for this core. For now used for debugging purposes
80 int slice_overuse; // Statistical purposes
81 time_us extra_time_given; // Statistical
85 * Scheduler configuration
88 struct vm_edf_sched_config {
89 time_us min_slice; // Minimum allowed slice
90 time_us max_slice; // Maximum allowed slice
91 time_us min_period; // Minimum allowed period
92 time_us max_period; // Maximum allowed period
93 int cpu_percent; // Percentange of CPU utilization for the scheduler in each physical CPU (100 or less)
98 * Run queue structure. Per-logical core data structure used to keep the runnable virtual cores (threads) allocated to that logical core
99 * Contains a pointer to the red black tree, the structure of configuration options and other info
104 //int cpu_id; // Physical CPU id
105 int cpu_u; // CPU utilization (must be less or equal to the cpu_percent in vm_edf_sched_config)
106 struct rb_root vCPUs_tree; // Red-Black Tree
107 struct vm_edf_sched_config edf_config; // Scheduling configuration structure
108 int nr_vCPU; // Number of cores in the runqueue
109 struct vm_core_edf_sched *curr_vCPU; // Current running CPU
110 struct rb_node *rb_leftmost; // vCPU with the earliest deadline (leftmost in the tree)
111 time_us last_sched_time; // statistical purposes
115 * Basic functions for scheduling
118 int v3_init_edf_scheduling();
124 * init_edf_config: Initialize scheduler configuration
128 init_edf_config(struct vm_edf_sched_config *edf_config){
130 edf_config->min_slice = MIN_SLICE;
131 edf_config->max_slice = MAX_SLICE;
132 edf_config->min_period = MIN_PERIOD;
133 edf_config->max_period = MAX_PERIOD;
134 edf_config->cpu_percent = CPU_PERCENT;
139 * priv_data_init: Initialize the run queue
143 priv_data_init(struct v3_vm_info *vm){
145 PrintDebug(vm, VCORE_NONE,"EDF Sched. Initializing EDF Scheduling \n");
147 vm->sched_priv_data = V3_Malloc( vm->avail_cores * sizeof(struct vm_edf_rq));
149 if (!vm->sched_priv_data) {
150 PrintError(vm, VCORE_NONE,"Cannot allocate in priv_data in priv_data_init\n");
156 PrintDebug(vm, VCORE_NONE,"EDF Sched. priv_data_init. Available cores %d\n", vm->avail_cores);
158 for(lcore = 0; lcore < vm->avail_cores ; lcore++){
160 PrintDebug(vm, VCORE_NONE,"EDF Sched. priv_data_init. Initializing logical core %d\n", lcore);
162 struct vm_edf_rq * edf_rq_list = (struct vm_edf_rq *)vm->sched_priv_data;
163 struct vm_edf_rq * edf_rq = &edf_rq_list[lcore];
165 edf_rq->vCPUs_tree = RB_ROOT;
168 edf_rq->curr_vCPU=NULL;
169 edf_rq->rb_leftmost=NULL;
170 edf_rq->last_sched_time=0;
171 init_edf_config(&edf_rq->edf_config);
180 * is_admissible_core: Decides if a core is admited to the red black tree according with
181 * the admisibility formula.
185 is_admissible_core(struct vm_core_edf_sched * new_sched_core, struct vm_edf_rq *runqueue){
187 struct v3_vm_info * vm = new_sched_core->info->vm_info;
189 struct v3_time *vm_ts = &(vm->time_state);
190 int tdf = vm_ts->td_denom;
192 int curr_utilization = runqueue->cpu_u;
193 int new_utilization = curr_utilization + ((100/tdf) * new_sched_core->slice / new_sched_core->period);
194 int cpu_percent = (runqueue->edf_config).cpu_percent;
196 if (new_utilization <= cpu_percent)
206 * count_cores: Function useful to count the number of cores in a runqueue (Not used for now)
211 /*static int count_cores(struct vm_edf_rq *runqueue){
213 struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
214 struct vm_core_edf_sched *curr_core;
215 int number_cores = 0;
219 curr_core = container_of(node, struct vm_core_edf_sched, node);
220 node = v3_rb_next(node);
230 * insert_core_edf: Finds a place in the tree for a newly activated core, adds the node
231 * and rebalaces the tree
235 insert_core_edf(struct vm_core_edf_sched *core, struct vm_edf_rq *runqueue){
237 struct rb_node **new_core = &(runqueue->vCPUs_tree.rb_node);
238 struct rb_node *parent = NULL;
239 struct vm_core_edf_sched *curr_core;
241 // Find out place in the tree for the new core
244 curr_core = container_of(*new_core, struct vm_core_edf_sched, node);
247 if (core->current_deadline < curr_core->current_deadline)
248 new_core = &((*new_core)->rb_left);
249 else if (core->current_deadline > curr_core->current_deadline)
250 new_core = &((*new_core)->rb_right);
251 else // Is Possible to have same current deadlines in both cores!
254 // Add new node and rebalance tree.
255 rb_link_node(&core->node, parent, new_core);
256 v3_rb_insert_color(&core->node, &runqueue->vCPUs_tree);
263 * get_curr_host_time: Calculates the current host time (microseconds)
267 get_curr_host_time(struct vm_core_time *core_time){
269 uint64_t cur_cycle = v3_get_host_time(core_time);
270 uint64_t cpu_khz = core_time->host_cpu_freq;
271 uint64_t curr_time_us = 1000 * cur_cycle / cpu_khz;
279 * next_start_period: Given the current host time and the period of a given vCPU,
280 * calculates the time in which its next period starts.
285 next_start_period(uint64_t curr_time_us, uint64_t period_us){
287 uint64_t time_period_us = curr_time_us % period_us;
288 uint64_t remaining_time_us = period_us - time_period_us;
289 uint64_t next_start_us = curr_time_us + remaining_time_us;
291 return next_start_us;
296 * get_runqueue: Get the runqueue assigned to a virtual core.
299 struct vm_edf_rq * get_runqueue(struct guest_info *info){
301 struct vm_edf_rq *runqueue_list = (struct vm_edf_rq *) info->vm_info->sched_priv_data;
302 struct vm_edf_rq *runqueue = &runqueue_list[info->pcpu_id];
308 * wakeup_core: Wakeup a given vCPU thread
312 wakeup_core(struct guest_info *info){
314 struct vm_core_edf_sched *core = info->sched_priv_data;
315 struct vm_edf_rq *runqueue = get_runqueue(info);
317 if (!info->core_thread) {
318 PrintError(info->vm_info, info,"ERROR: Tried to wakeup non-existent core thread vCPU_id %d \n",info->vcpu_id);
322 PrintDebug(info->vm_info, info,"EDF Sched. run_next_core. vcpu_id %d, logical id %d, Total time %llu, Miss_deadlines %d, slice_overuses %d extra_time %llu, thread (%p)\n",
328 core->extra_time_given,
329 (struct task_struct *)info->core_thread);
331 V3_Wakeup(info->core_thread);
332 core->last_wakeup_time = get_curr_host_time(&core->info->time_state);
333 runqueue->curr_vCPU = core;
341 * activate_core - Moves a core to the red-black tree.
342 * used time is set to zero and current deadline is calculated
346 activate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){
348 struct v3_vm_info * vm = core->info->vm_info;
350 struct v3_time *vm_ts = &(vm->time_state);
351 int tdf = vm_ts->td_denom;
353 if (is_admissible_core(core, runqueue)){
355 uint64_t curr_time_us = get_curr_host_time(&core->info->time_state);
356 uint64_t curr_deadline = next_start_period(curr_time_us, core->period);
358 core->current_deadline = curr_deadline;
360 core->remaining_time=core->slice;
362 bool ins = insert_core_edf(core, runqueue);
364 * If not inserted is possible that there is other core with the same deadline.
365 * Then, the deadline is modified and try again
368 core->current_deadline ++;
369 ins = insert_core_edf(core, runqueue);
372 runqueue->cpu_u += (100/tdf) * core->slice / core->period;
373 runqueue->nr_vCPU ++;
376 * If this is the first time to be activated pick first earliest deadline core to wakeup.
379 if(core->last_wakeup_time == 0){
381 struct vm_core_edf_sched *next_core;
384 * Pick first earliest deadline core
386 struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
387 next_core = container_of(node, struct vm_core_edf_sched, node);
390 wakeup_core(next_core->info);
399 PrintError(core->info->vm_info, core->info,"EDF Sched. activate_core. CPU cannot activate the core. It is not admissible");
404 * edf_sched_core_init: Initializes per core data structure and
405 * calls activate function.
409 edf_sched_core_init(struct guest_info * info){
411 struct vm_edf_rq *runqueue = get_runqueue(info);
412 struct vm_core_edf_sched *core_edf;
414 PrintDebug(info->vm_info, info,"EDF Sched. Initializing vcore %d\n", info->vcpu_id);
416 core_edf = (struct vm_core_edf_sched *) V3_Malloc(sizeof (struct vm_core_edf_sched));
418 PrintError(info->vm_info, info,"Cannot allocate private_data in edf_sched_core_init\n");
421 info->sched_priv_data = core_edf;
423 // Default configuration if not specified in configuration file
425 core_edf->info = info;
426 core_edf->period = 500000;
427 core_edf->slice = 50000;
428 core_edf->used_time = 0;
429 core_edf->last_wakeup_time = 0;
430 core_edf->remaining_time = core_edf->slice;
431 core_edf->miss_deadline = 0;
432 core_edf->extra_time = true;
433 core_edf->total_time = 0;
434 core_edf->slice_overuse = 0;
435 core_edf->extra_time_given = 0;
437 v3_cfg_tree_t * cfg_tree = core_edf->info->vm_info->cfg_data->cfg;
438 v3_cfg_tree_t * core = v3_cfg_subtree(v3_cfg_subtree(cfg_tree, "cores"), "core");
441 char *id = v3_cfg_val(core, "vcpu_id");
442 char *period = v3_cfg_val(core, "period");
443 char *slice = v3_cfg_val(core, "slice");
444 char *extra_time = v3_cfg_val(core, "extra_time");
446 if (atoi(id) == core_edf->info->vcpu_id){
448 core_edf->period = atoi(period);
449 core_edf->slice = atoi(slice);
450 core_edf->remaining_time = core_edf->slice;
451 if (strcasecmp(extra_time, "true") == 0)
452 core_edf->extra_time = true;
454 core_edf->extra_time = false;
457 core = v3_cfg_next_branch(core);
460 activate_core(core_edf,runqueue);
465 * search_core_edf: Searches a core in the red-black tree by using its vcpu_id
467 static struct vm_core_edf_sched *
468 search_core_edf(struct vm_core_edf_sched *core_edf, struct vm_edf_rq *runqueue){
470 struct rb_node *node = runqueue->vCPUs_tree.rb_node;
474 struct vm_core_edf_sched *core = container_of(node, struct vm_core_edf_sched, node);
476 if (core_edf->current_deadline < core->current_deadline)
477 node = node->rb_left;
478 else if (core_edf->current_deadline > core->current_deadline)
479 node = node->rb_right;
481 if(core->info->vcpu_id == core_edf->info->vcpu_id){
490 * delete_core_edf: Deletes a core from the red black tree, generally when it has
491 * consumed its time slice within the current period.
495 delete_core_edf( struct vm_core_edf_sched *core_edf , struct vm_edf_rq *runqueue){
497 struct vm_core_edf_sched *core = search_core_edf(core_edf, runqueue);
500 v3_rb_erase(&core->node, &runqueue->vCPUs_tree);
504 PrintError(core->info->vm_info, core->info,"EDF Sched. delete_core_edf.Attempted to erase unexisting core");
511 * deactivate_core - Removes a core from the red-black tree.
515 deactivate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){
517 if(delete_core_edf(core, runqueue)){
518 runqueue->cpu_u -= 100 * core->slice / core->period;
519 runqueue->nr_vCPU -- ;
525 * pick_next_core: Returns the next core to be scheduled from the red black tree
528 static struct vm_core_edf_sched *
529 pick_next_core(struct vm_edf_rq *runqueue){
533 * Pick first earliest deadline core
535 struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
536 struct vm_core_edf_sched *next_core = container_of(node, struct vm_core_edf_sched, node);
539 * Verify if the earliest deadline core has used its complete slice and return it if not
542 if (next_core->used_time < next_core->slice){
543 if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state))
544 next_core->miss_deadline++;
548 * If slice used, pick the next core that has not used its complete slice
552 while(next_core->used_time >= next_core->slice){
554 if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state) || !next_core->extra_time ){
556 deactivate_core(next_core,runqueue);
557 activate_core(next_core,runqueue);
561 node = v3_rb_next(node);
563 next_core = container_of(node, struct vm_core_edf_sched, node);
566 node = v3_rb_first(&runqueue->vCPUs_tree); // If all cores have used its slice return the first one
567 return container_of(node, struct vm_core_edf_sched, node);
578 adjust_slice(struct guest_info * info, int used_time, int extra_time)
580 struct vm_core_edf_sched *core = info->sched_priv_data;
581 struct vm_edf_rq *runqueue = get_runqueue(info);
583 core->used_time = used_time;
585 if (extra_time >= 0) {
586 core->used_time += extra_time;
589 if( core->used_time >= core->slice){
590 deactivate_core(core,runqueue);
591 activate_core(core,runqueue);
597 * run_next_core: Pick next core to be scheduled and wakeup it
601 run_next_core(struct guest_info *info, int used_time, int usec)
603 struct vm_core_edf_sched *core = info->sched_priv_data;
604 struct vm_core_edf_sched *next_core;
605 struct vm_edf_rq *runqueue = get_runqueue(info);
607 /* The next core to be scheduled is choosen from the tree (Function pick_next_core).
608 * The selected core is the one with the earliest deadline and with available time
609 * to use within the current period (used_time < slice)
612 next_core = pick_next_core(runqueue); // Pick next core to schedule
614 if (core != next_core){
617 wakeup_core(next_core->info);
618 core->total_time += used_time;
620 if (used_time > core->slice){
621 core->slice_overuse++;
622 core->extra_time_given += (used_time - core->slice);
634 * edf_schedule: Scheduling function
638 edf_schedule(struct guest_info * info, int usec){
640 uint64_t host_time = get_curr_host_time(&info->time_state);
641 struct vm_edf_rq *runqueue = get_runqueue(info);
642 struct vm_core_edf_sched *core = (struct vm_core_edf_sched *) info->sched_priv_data;
644 uint64_t used_time = 0;
645 if(core->last_wakeup_time != 0)
646 used_time = host_time - core->last_wakeup_time;
648 if(usec == 0) runqueue->last_sched_time = host_time; // Called from edf_sched_scheduled
649 adjust_slice(core->info, host_time - core->last_wakeup_time, usec);
651 run_next_core(core->info,used_time, usec);
657 * edf_sched_schedule: Main scheduling function. Computes amount of time in period left,
658 * recomputing the current core's deadline if it has expired, then runs
660 * It is called in the following cases:
661 * A vCPU becomes runnable
662 * The slice of the current vCPU was used
663 * The period of a vCPU in the runqueue starts
665 * TODO Something to do with extra time?
666 * TODO Check the use of remaining_time
670 edf_sched_schedule(struct guest_info * info){
672 edf_schedule(info, 0);
677 * edf_sched_yield: Called when yielding the logical cpu for usec is needed
681 edf_sched_yield(struct guest_info * info, int usec){
683 edf_schedule(info, usec);
689 * edf_sched_deinit: Frees edf scheduler data structures
694 edf_sched_deinit(struct v3_vm_info *vm)
696 void *priv_data = vm->sched_priv_data;
706 * edf_sched_deinit: Frees virtual core data structures
710 edf_sched_core_deinit(struct guest_info *core)
712 void *priv_data = core->sched_priv_data;
720 int edf_sched_vm_init(struct v3_vm_info *vm){
724 int edf_sched_admit(struct v3_vm_info *vm){
727 * Initialize priv_data for the vm:
728 * For EDF this is done here because we need the parameter
729 * avail_core which is set in v3_start_vm before the
730 * v3_scheduler_admit_vm function is called.
741 static struct vm_scheduler_impl edf_sched = {
746 .vm_init = edf_sched_vm_init,
748 .core_init = edf_sched_core_init,
749 .core_deinit = edf_sched_core_deinit,
750 .schedule = edf_sched_schedule,
751 .yield = edf_sched_yield,
752 .admit = edf_sched_admit,
758 ext_sched_edf_init() {
759 PrintDebug(VM_NONE, VCORE_NONE,"Sched. Creating (%s) scheduler\n",edf_sched.name);
760 return v3_register_scheduler(&edf_sched);
764 ext_sched_edf_vm_init() {
768 static struct v3_extension_impl sched_edf_impl = {
769 .name = "EDF Scheduler",
770 .init = ext_sched_edf_init,
771 .vm_init = ext_sched_edf_vm_init,
779 register_extension(&sched_edf_impl);