2 * This file is part of the Palacios Virtual Machine Monitor developed
\r
3 * by the V3VEE Project with funding from the United States National
\r
4 * Science Foundation and the Department of Energy.
\r
6 * The V3VEE Project is a joint project between Northwestern University
\r
7 * and the University of New Mexico. You can find out more at
\r
8 * http://www.v3vee.org
\r
10 * Copyright (c) 2013, Oscar Mondragon <omondrag@cs.unm.edu>
\r
11 * Copyright (c) 2013, Patrick G. Bridges <bridges@cs.unm.edu>
\r
12 * Copyright (c) 2013, The V3VEE Project <http://www.v3vee.org>
\r
13 * All rights reserved.
\r
15 * Author: Oscar Mondragon <omondrag@cs.unm.edu>
\r
16 * Patrick G. Bridges <bridges@cs.unm.edu>
\r
18 * This is free software. You are permitted to use,
\r
19 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
\r
23 #include <palacios/vmm.h>
\r
24 #include <palacios/vmm_time.h>
\r
25 #include <palacios/vm_guest.h>
\r
26 #include <palacios/vmm_hashtable.h>
\r
27 #include <palacios/vmm_config.h>
\r
28 #include <palacios/vmm_extensions.h>
\r
29 #include <palacios/vmm_rbtree.h>
\r
32 #ifndef V3_CONFIG_DEBUG_EDF_SCHED
\r
34 #define PrintDebug(fmt, args...)
\r
41 * The EDF scheduler uses a dynamic calculated priority as scheduling criteria to choose
\r
42 * what thread will be scheduled.That priority is calculated according with the relative
\r
43 * deadline of the threads that are ready to run in the runqueue. This runqueue is a per-logical
\r
44 * core data structure used to keep the runnable virtual cores (threads) allocated to that
\r
45 * logical core.The threads with less time before its deadline will receive better priorities.
\r
46 * The runqueue is sorted each time that a vCPU becomes runnable. At that time the vCPU is
\r
47 * enqueue and a new scheduling decision is taken. Each time a vCPU is scheduled, the parameter
\r
48 * slice used time is set to zero and the current deadline is calculated using its period. Once
\r
49 * the vCPU uses the logical core for slice seconds, that vCPU sleeps until its next scheduling
\r
50 * period (when is re-inserted in the runqueue) and yields the CPU to allow the scheduling
\r
51 * of the vCPU with best priority in the runqueue.
\r
54 // Default configuration values for the EDF Scheduler
\r
55 // time parameters in microseconds
\r
57 #define MAX_PERIOD 1000000000
\r
58 #define MIN_PERIOD 50000
\r
59 #define MAX_SLICE 1000000000
\r
60 #define MIN_SLICE 10000
\r
61 #define CPU_PERCENT 100
\r
62 typedef uint64_t time_us;
\r
65 * Per-core EDF Scheduling information
\r
68 struct vm_core_edf_sched {
\r
69 struct guest_info *info; // Core struct
\r
70 struct rb_node node; // red-black tree node
\r
71 time_us period; // Amount of time (us) during which the core may received a CPU allocation
\r
72 time_us slice; // Minimum amount of time (us) received for the core during each period
\r
73 time_us current_deadline; // Time (us) at which current core period ends
\r
74 time_us used_time; // Amount of time (us) of the slice used whiting the current period
\r
75 time_us last_wakeup_time; // Time at which the last wakeup started for this core
\r
76 time_us remaining_time; // Remaining time (us) before current core period ends (before current deadline)
\r
77 bool extra_time; // Specifies if the virtual core is eligible to receive extra CPU time
\r
78 int miss_deadline; // Number of times the core has missed its deadline
\r
79 time_us total_time; // Total scheduled time for this core. For now used for debugging purposes
\r
80 int slice_overuse; // Statistical purposes
\r
81 time_us extra_time_given; // Statistical
\r
85 * Scheduler configuration
\r
88 struct vm_edf_sched_config {
\r
89 time_us min_slice; // Minimum allowed slice
\r
90 time_us max_slice; // Maximum allowed slice
\r
91 time_us min_period; // Minimum allowed period
\r
92 time_us max_period; // Maximum allowed period
\r
93 int cpu_percent; // Percentange of CPU utilization for the scheduler in each physical CPU (100 or less)
\r
98 * Run queue structure. Per-logical core data structure used to keep the runnable virtual cores (threads) allocated to that logical core
\r
99 * Contains a pointer to the red black tree, the structure of configuration options and other info
\r
104 //int cpu_id; // Physical CPU id
\r
105 int cpu_u; // CPU utilization (must be less or equal to the cpu_percent in vm_edf_sched_config)
\r
106 struct rb_root vCPUs_tree; // Red-Black Tree
\r
107 struct vm_edf_sched_config edf_config; // Scheduling configuration structure
\r
108 int nr_vCPU; // Number of cores in the runqueue
\r
109 struct vm_core_edf_sched *curr_vCPU; // Current running CPU
\r
110 struct rb_node *rb_leftmost; // vCPU with the earliest deadline (leftmost in the tree)
\r
111 time_us last_sched_time; // statistical purposes
\r
115 * Basic functions for scheduling
\r
118 int v3_init_edf_scheduling();
\r
124 * init_edf_config: Initialize scheduler configuration
\r
128 init_edf_config(struct vm_edf_sched_config *edf_config){
\r
130 edf_config->min_slice = MIN_SLICE;
\r
131 edf_config->max_slice = MAX_SLICE;
\r
132 edf_config->min_period = MIN_PERIOD;
\r
133 edf_config->max_period = MAX_PERIOD;
\r
134 edf_config->cpu_percent = CPU_PERCENT;
\r
139 * priv_data_init: Initialize the run queue
\r
143 priv_data_init(struct v3_vm_info *vm){
\r
145 PrintDebug(vm, VCORE_NONE,"EDF Sched. Initializing EDF Scheduling \n");
\r
147 vm->sched_priv_data = V3_Malloc( vm->avail_cores * sizeof(struct vm_edf_rq));
\r
149 if (!vm->sched_priv_data) {
\r
150 PrintError(vm, VCORE_NONE,"Cannot allocate in priv_data in priv_data_init\n");
\r
156 PrintDebug(vm, VCORE_NONE,"EDF Sched. priv_data_init. Available cores %d\n", vm->avail_cores);
\r
158 for(lcore = 0; lcore < vm->avail_cores ; lcore++){
\r
160 PrintDebug(vm, VCORE_NONE,"EDF Sched. priv_data_init. Initializing logical core %d\n", lcore);
\r
162 struct vm_edf_rq * edf_rq_list = (struct vm_edf_rq *)vm->sched_priv_data;
\r
163 struct vm_edf_rq * edf_rq = &edf_rq_list[lcore];
\r
165 edf_rq->vCPUs_tree = RB_ROOT;
\r
168 edf_rq->curr_vCPU=NULL;
\r
169 edf_rq->rb_leftmost=NULL;
\r
170 edf_rq->last_sched_time=0;
\r
171 init_edf_config(&edf_rq->edf_config);
\r
180 * is_admissible_core: Decides if a core is admited to the red black tree according with
\r
181 * the admisibility formula.
\r
185 is_admissible_core(struct vm_core_edf_sched * new_sched_core, struct vm_edf_rq *runqueue){
\r
187 int curr_utilization = runqueue->cpu_u;
\r
188 int new_utilization = curr_utilization + (100 * new_sched_core->slice / new_sched_core->period);
\r
189 int cpu_percent = (runqueue->edf_config).cpu_percent;
\r
191 if (new_utilization <= cpu_percent)
\r
201 * count_cores: Function useful to count the number of cores in a runqueue (Not used for now)
\r
206 /*static int count_cores(struct vm_edf_rq *runqueue){
\r
208 struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
\r
209 struct vm_core_edf_sched *curr_core;
\r
210 int number_cores = 0;
\r
214 curr_core = container_of(node, struct vm_core_edf_sched, node);
\r
215 node = v3_rb_next(node);
\r
219 return number_cores;
\r
225 * insert_core_edf: Finds a place in the tree for a newly activated core, adds the node
\r
226 * and rebalaces the tree
\r
230 insert_core_edf(struct vm_core_edf_sched *core, struct vm_edf_rq *runqueue){
\r
232 struct rb_node **new_core = &(runqueue->vCPUs_tree.rb_node);
\r
233 struct rb_node *parent = NULL;
\r
234 struct vm_core_edf_sched *curr_core;
\r
236 // Find out place in the tree for the new core
\r
237 while (*new_core) {
\r
239 curr_core = container_of(*new_core, struct vm_core_edf_sched, node);
\r
240 parent = *new_core;
\r
242 if (core->current_deadline < curr_core->current_deadline)
\r
243 new_core = &((*new_core)->rb_left);
\r
244 else if (core->current_deadline > curr_core->current_deadline)
\r
245 new_core = &((*new_core)->rb_right);
\r
246 else // Is Possible to have same current deadlines in both cores!
\r
249 // Add new node and rebalance tree.
\r
250 rb_link_node(&core->node, parent, new_core);
\r
251 v3_rb_insert_color(&core->node, &runqueue->vCPUs_tree);
\r
258 * get_curr_host_time: Calculates the current host time (microseconds)
\r
262 get_curr_host_time(struct vm_core_time *core_time){
\r
264 uint64_t cur_cycle = v3_get_host_time(core_time);
\r
265 uint64_t cpu_khz = core_time->host_cpu_freq;
\r
266 uint64_t curr_time_us = 1000 * cur_cycle / cpu_khz;
\r
268 return curr_time_us;
\r
274 * next_start_period: Given the current host time and the period of a given vCPU,
\r
275 * calculates the time in which its next period starts.
\r
280 next_start_period(uint64_t curr_time_us, uint64_t period_us){
\r
282 uint64_t time_period_us = curr_time_us % period_us;
\r
283 uint64_t remaining_time_us = period_us - time_period_us;
\r
284 uint64_t next_start_us = curr_time_us + remaining_time_us;
\r
286 return next_start_us;
\r
291 * get_runqueue: Get the runqueue assigned to a virtual core.
\r
294 struct vm_edf_rq * get_runqueue(struct guest_info *info){
\r
296 struct vm_edf_rq *runqueue_list = (struct vm_edf_rq *) info->vm_info->sched_priv_data;
\r
297 struct vm_edf_rq *runqueue = &runqueue_list[info->pcpu_id];
\r
303 * wakeup_core: Wakeup a given vCPU thread
\r
307 wakeup_core(struct guest_info *info){
\r
309 struct vm_core_edf_sched *core = info->sched_priv_data;
\r
310 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
312 if (!info->core_thread) {
\r
313 PrintError(info->vm_info, info,"ERROR: Tried to wakeup non-existent core thread vCPU_id %d \n",info->vcpu_id);
\r
317 PrintDebug(info->vm_info, info,"EDF Sched. run_next_core. vcpu_id %d, logical id %d, Total time %llu, Miss_deadlines %d, slice_overuses %d extra_time %llu, thread (%p)\n",
\r
318 core->info->vcpu_id,
\r
319 core->info->pcpu_id,
\r
321 core->miss_deadline,
\r
322 core->slice_overuse,
\r
323 core->extra_time_given,
\r
324 (struct task_struct *)info->core_thread);
\r
326 V3_Wakeup(info->core_thread);
\r
327 core->last_wakeup_time = get_curr_host_time(&core->info->time_state);
\r
328 runqueue->curr_vCPU = core;
\r
336 * activate_core - Moves a core to the red-black tree.
\r
337 * used time is set to zero and current deadline is calculated
\r
341 activate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){
\r
343 if (is_admissible_core(core, runqueue)){
\r
345 uint64_t curr_time_us = get_curr_host_time(&core->info->time_state);
\r
346 uint64_t curr_deadline = next_start_period(curr_time_us, core->period);
\r
348 core->current_deadline = curr_deadline;
\r
349 core->used_time=0;
\r
350 core->remaining_time=core->slice;
\r
352 bool ins = insert_core_edf(core, runqueue);
\r
354 * If not inserted is possible that there is other core with the same deadline.
\r
355 * Then, the deadline is modified and try again
\r
358 core->current_deadline ++;
\r
359 ins = insert_core_edf(core, runqueue);
\r
362 runqueue->cpu_u += 100 * core->slice / core->period;
\r
363 runqueue->nr_vCPU ++;
\r
366 * If this is the first time to be activated pick first earliest deadline core to wakeup.
\r
369 if(core->last_wakeup_time == 0){
\r
371 struct vm_core_edf_sched *next_core;
\r
374 * Pick first earliest deadline core
\r
376 struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
\r
377 next_core = container_of(node, struct vm_core_edf_sched, node);
\r
379 // Wakeup next_core
\r
380 wakeup_core(next_core->info);
\r
389 PrintError(core->info->vm_info, core->info,"EDF Sched. activate_core. CPU cannot activate the core. It is not admissible");
\r
394 * edf_sched_core_init: Initializes per core data structure and
\r
395 * calls activate function.
\r
399 edf_sched_core_init(struct guest_info * info){
\r
401 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
402 struct vm_core_edf_sched *core_edf;
\r
404 PrintDebug(info->vm_info, info,"EDF Sched. Initializing vcore %d\n", info->vcpu_id);
\r
406 core_edf = (struct vm_core_edf_sched *) V3_Malloc(sizeof (struct vm_core_edf_sched));
\r
408 PrintError(info->vm_info, info,"Cannot allocate private_data in edf_sched_core_init\n");
\r
411 info->sched_priv_data = core_edf;
\r
413 // Default configuration if not specified in configuration file
\r
415 core_edf->info = info;
\r
416 core_edf->period = 500000;
\r
417 core_edf->slice = 50000;
\r
418 core_edf->used_time = 0;
\r
419 core_edf->last_wakeup_time = 0;
\r
420 core_edf->remaining_time = core_edf->slice;
\r
421 core_edf->miss_deadline = 0;
\r
422 core_edf->extra_time = true;
\r
423 core_edf->total_time = 0;
\r
424 core_edf->slice_overuse = 0;
\r
425 core_edf->extra_time_given = 0;
\r
427 v3_cfg_tree_t * cfg_tree = core_edf->info->vm_info->cfg_data->cfg;
\r
428 v3_cfg_tree_t * core = v3_cfg_subtree(v3_cfg_subtree(cfg_tree, "cores"), "core");
\r
431 char *id = v3_cfg_val(core, "vcpu_id");
\r
432 char *period = v3_cfg_val(core, "period");
\r
433 char *slice = v3_cfg_val(core, "slice");
\r
434 char *extra_time = v3_cfg_val(core, "extra_time");
\r
436 if (atoi(id) == core_edf->info->vcpu_id){
\r
438 core_edf->period = atoi(period);
\r
439 core_edf->slice = atoi(slice);
\r
440 core_edf->remaining_time = core_edf->slice;
\r
441 if (strcasecmp(extra_time, "true") == 0)
\r
442 core_edf->extra_time = true;
\r
444 core_edf->extra_time = false;
\r
447 core = v3_cfg_next_branch(core);
\r
450 activate_core(core_edf,runqueue);
\r
455 * search_core_edf: Searches a core in the red-black tree by using its vcpu_id
\r
457 static struct vm_core_edf_sched *
\r
458 search_core_edf(struct vm_core_edf_sched *core_edf, struct vm_edf_rq *runqueue){
\r
460 struct rb_node *node = runqueue->vCPUs_tree.rb_node;
\r
464 struct vm_core_edf_sched *core = container_of(node, struct vm_core_edf_sched, node);
\r
466 if (core_edf->current_deadline < core->current_deadline)
\r
467 node = node->rb_left;
\r
468 else if (core_edf->current_deadline > core->current_deadline)
\r
469 node = node->rb_right;
\r
471 if(core->info->vcpu_id == core_edf->info->vcpu_id){
\r
480 * delete_core_edf: Deletes a core from the red black tree, generally when it has
\r
481 * consumed its time slice within the current period.
\r
485 delete_core_edf( struct vm_core_edf_sched *core_edf , struct vm_edf_rq *runqueue){
\r
487 struct vm_core_edf_sched *core = search_core_edf(core_edf, runqueue);
\r
490 v3_rb_erase(&core->node, &runqueue->vCPUs_tree);
\r
494 PrintError(core->info->vm_info, core->info,"EDF Sched. delete_core_edf.Attempted to erase unexisting core");
\r
501 * deactivate_core - Removes a core from the red-black tree.
\r
505 deactivate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){
\r
507 if(delete_core_edf(core, runqueue)){
\r
508 runqueue->cpu_u -= 100 * core->slice / core->period;
\r
509 runqueue->nr_vCPU -- ;
\r
515 * pick_next_core: Returns the next core to be scheduled from the red black tree
\r
518 static struct vm_core_edf_sched *
\r
519 pick_next_core(struct vm_edf_rq *runqueue){
\r
523 * Pick first earliest deadline core
\r
525 struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
\r
526 struct vm_core_edf_sched *next_core = container_of(node, struct vm_core_edf_sched, node);
\r
529 * Verify if the earliest deadline core has used its complete slice and return it if not
\r
532 if (next_core->used_time < next_core->slice){
\r
533 if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state))
\r
534 next_core->miss_deadline++;
\r
538 * If slice used, pick the next core that has not used its complete slice
\r
542 while(next_core->used_time >= next_core->slice){
\r
544 if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state) || !next_core->extra_time ){
\r
546 deactivate_core(next_core,runqueue);
\r
547 activate_core(next_core,runqueue);
\r
551 node = v3_rb_next(node);
\r
553 next_core = container_of(node, struct vm_core_edf_sched, node);
\r
556 node = v3_rb_first(&runqueue->vCPUs_tree); // If all cores have used its slice return the first one
\r
557 return container_of(node, struct vm_core_edf_sched, node);
\r
568 adjust_slice(struct guest_info * info, int used_time, int extra_time)
\r
570 struct vm_core_edf_sched *core = info->sched_priv_data;
\r
571 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
573 core->used_time = used_time;
\r
575 if (extra_time >= 0) {
\r
576 core->used_time += extra_time;
\r
579 if( core->used_time >= core->slice){
\r
580 deactivate_core(core,runqueue);
\r
581 activate_core(core,runqueue);
\r
587 * run_next_core: Pick next core to be scheduled and wakeup it
\r
591 run_next_core(struct guest_info *info, int used_time, int usec)
\r
593 struct vm_core_edf_sched *core = info->sched_priv_data;
\r
594 struct vm_core_edf_sched *next_core;
\r
595 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
597 /* The next core to be scheduled is choosen from the tree (Function pick_next_core).
\r
598 * The selected core is the one with the earliest deadline and with available time
\r
599 * to use within the current period (used_time < slice)
\r
602 next_core = pick_next_core(runqueue); // Pick next core to schedule
\r
604 if (core != next_core){
\r
606 // Wakeup next_core
\r
607 wakeup_core(next_core->info);
\r
608 core->total_time += used_time;
\r
610 if (used_time > core->slice){
\r
611 core->slice_overuse++;
\r
612 core->extra_time_given += (used_time - core->slice);
\r
624 * edf_schedule: Scheduling function
\r
628 edf_schedule(struct guest_info * info, int usec){
\r
630 uint64_t host_time = get_curr_host_time(&info->time_state);
\r
631 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
632 struct vm_core_edf_sched *core = (struct vm_core_edf_sched *) info->sched_priv_data;
\r
634 uint64_t used_time = 0;
\r
635 if(core->last_wakeup_time != 0)
\r
636 used_time = host_time - core->last_wakeup_time;
\r
638 if(usec == 0) runqueue->last_sched_time = host_time; // Called from edf_sched_scheduled
\r
639 adjust_slice(core->info, host_time - core->last_wakeup_time, usec);
\r
641 run_next_core(core->info,used_time, usec);
\r
647 * edf_sched_schedule: Main scheduling function. Computes amount of time in period left,
\r
648 * recomputing the current core's deadline if it has expired, then runs
\r
650 * It is called in the following cases:
\r
651 * A vCPU becomes runnable
\r
652 * The slice of the current vCPU was used
\r
653 * The period of a vCPU in the runqueue starts
\r
655 * TODO Something to do with extra time?
\r
656 * TODO Check the use of remaining_time
\r
660 edf_sched_schedule(struct guest_info * info){
\r
662 edf_schedule(info, 0);
\r
667 * edf_sched_yield: Called when yielding the logical cpu for usec is needed
\r
671 edf_sched_yield(struct guest_info * info, int usec){
\r
673 edf_schedule(info, usec);
\r
679 * edf_sched_deinit: Frees edf scheduler data structures
\r
684 edf_sched_deinit(struct v3_vm_info *vm)
\r
686 void *priv_data = vm->sched_priv_data;
\r
689 V3_Free(priv_data);
\r
696 * edf_sched_deinit: Frees virtual core data structures
\r
700 edf_sched_core_deinit(struct guest_info *core)
\r
702 void *priv_data = core->sched_priv_data;
\r
705 V3_Free(priv_data);
\r
710 int edf_sched_vm_init(struct v3_vm_info *vm){
\r
714 int edf_sched_admit(struct v3_vm_info *vm){
\r
717 * Initialize priv_data for the vm:
\r
718 * For EDF this is done here because we need the parameter
\r
719 * avail_core which is set in v3_start_vm before the
\r
720 * v3_scheduler_admit_vm function is called.
\r
723 priv_data_init(vm);
\r
731 static struct vm_scheduler_impl edf_sched = {
\r
736 .vm_init = edf_sched_vm_init,
\r
738 .core_init = edf_sched_core_init,
\r
739 .core_deinit = edf_sched_core_deinit,
\r
740 .schedule = edf_sched_schedule,
\r
741 .yield = edf_sched_yield,
\r
742 .admit = edf_sched_admit,
\r
748 ext_sched_edf_init() {
\r
749 PrintDebug(VM_NONE, VCORE_NONE,"Sched. Creating (%s) scheduler\n",edf_sched.name);
\r
750 return v3_register_scheduler(&edf_sched);
\r
754 ext_sched_edf_vm_init() {
\r
758 static struct v3_extension_impl sched_edf_impl = {
\r
759 .name = "EDF Scheduler",
\r
760 .init = ext_sched_edf_init,
\r
761 .vm_init = ext_sched_edf_vm_init,
\r
764 .core_deinit = NULL,
\r
769 register_extension(&sched_edf_impl);
\r