2 * This file is part of the Palacios Virtual Machine Monitor developed
\r
3 * by the V3VEE Project with funding from the United States National
\r
4 * Science Foundation and the Department of Energy.
\r
6 * The V3VEE Project is a joint project between Northwestern University
\r
7 * and the University of New Mexico. You can find out more at
\r
8 * http://www.v3vee.org
\r
10 * Copyright (c) 2013, Oscar Mondragon <omondrag@cs.unm.edu>
\r
11 * Copyright (c) 2013, The V3VEE Project <http://www.v3vee.org>
\r
12 * All rights reserved.
\r
14 * Author: Oscar Mondragon <omondrag@cs.unm.edu>
\r
15 * Patrick G. Bridges <bridges@cs.unm.edu>
\r
17 * This is free software. You are permitted to use,
\r
18 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
\r
22 #include <palacios/vmm.h>
\r
23 #include <palacios/vmm_time.h>
\r
24 #include <palacios/vm_guest.h>
\r
25 #include <palacios/vmm_hashtable.h>
\r
26 #include <palacios/vmm_config.h>
\r
27 #include <palacios/vmm_extensions.h>
\r
28 #include <palacios/vmm_edf_sched.h>
\r
32 #ifndef V3_CONFIG_DEBUG_EDF_SCHED
\r
34 #define PrintDebug(fmt, args...)
\r
41 * The EDF scheduler uses a dynamic calculated priority as scheduling criteria to choose
\r
42 * what thread will be scheduled.That priority is calculated according with the relative
\r
43 * deadline of the threads that are ready to run in the runqueue. This runqueue is a per-logical
\r
44 * core data structure used to keep the runnable virtual cores (threads) allocated to that
\r
45 * logical core.The threads with less time before its deadline will receive better priorities.
\r
46 * The runqueue is sorted each time that a vCPU becomes runnable. At that time the vCPU is
\r
47 * enqueue and a new scheduling decision is taken. Each time a vCPU is scheduled, the parameter
\r
48 * slice used time is set to zero and the current deadline is calculated using its period. Once
\r
49 * the vCPU uses the logical core for slice seconds, that vCPU sleeps until its next scheduling
\r
50 * period (when is re-inserted in the runqueue) and yields the CPU to allow the scheduling
\r
51 * of the vCPU with best priority in the runqueue.
\r
54 // Default configuration values for the EDF Scheduler
\r
55 // time parameters in microseconds
\r
57 #define MAX_PERIOD 1000000000
\r
58 #define MIN_PERIOD 50000
\r
59 #define MAX_SLICE 1000000000
\r
60 #define MIN_SLICE 10000
\r
61 #define CPU_PERCENT 100
\r
65 * init_edf_config: Initialize scheduler configuration
\r
69 init_edf_config(struct vm_edf_sched_config *edf_config){
\r
71 edf_config->min_slice = MIN_SLICE;
\r
72 edf_config->max_slice = MAX_SLICE;
\r
73 edf_config->min_period = MIN_PERIOD;
\r
74 edf_config->max_period = MAX_PERIOD;
\r
75 edf_config->cpu_percent = CPU_PERCENT;
\r
80 * edf_sched_init: Initialize the run queue
\r
84 edf_sched_init(struct v3_vm_info *vm){
\r
86 PrintDebug(vm, VCORE_NONE,"EDF Sched. Initializing vm %s\n", vm->name);
\r
88 struct vm_sched_state *sched_state = &vm->sched;
\r
89 sched_state->priv_data = V3_Malloc( vm->avail_cores * sizeof(struct vm_edf_rq));
\r
91 if (!sched_state->priv_data) {
\r
92 PrintError(vm, VCORE_NONE,"Cannot allocate in priv_data in edf_sched_init\n");
\r
98 PrintDebug(vm, VCORE_NONE,"EDF Sched. edf_sched_init. Available cores %d\n", vm->avail_cores);
\r
100 for(lcore = 0; lcore < vm->avail_cores ; lcore++){
\r
102 PrintDebug(vm, VCORE_NONE,"EDF Sched. edf_sched_init. Initializing logical core %d\n", lcore);
\r
104 struct vm_edf_rq * edf_rq_list = (struct vm_edf_rq *) sched_state->priv_data;
\r
105 struct vm_edf_rq * edf_rq = &edf_rq_list[lcore];
\r
107 edf_rq->vCPUs_tree = RB_ROOT;
\r
110 edf_rq->curr_vCPU=NULL;
\r
111 edf_rq->rb_leftmost=NULL;
\r
112 edf_rq->last_sched_time=0;
\r
113 init_edf_config(&edf_rq->edf_config);
\r
123 * is_admissible_core: Decides if a core is admited to the red black tree according with
\r
124 * the admisibility formula.
\r
128 is_admissible_core(struct vm_core_edf_sched * new_sched_core, struct vm_edf_rq *runqueue){
\r
130 int curr_utilization = runqueue->cpu_u;
\r
131 int new_utilization = curr_utilization + (100 * new_sched_core->slice / new_sched_core->period);
\r
132 int cpu_percent = (runqueue->edf_config).cpu_percent;
\r
134 if (new_utilization <= cpu_percent)
\r
143 * count_cores: Function useful to count the number of cores in a runqueue (Not used for now)
\r
148 /*static int count_cores(struct vm_edf_rq *runqueue){
\r
150 struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
\r
151 struct vm_core_edf_sched *curr_core;
\r
152 int number_cores = 0;
\r
156 curr_core = container_of(node, struct vm_core_edf_sched, node);
\r
157 node = v3_rb_next(node);
\r
161 return number_cores;
\r
167 * insert_core_edf: Finds a place in the tree for a newly activated core, adds the node
\r
168 * and rebalaces the tree
\r
172 insert_core_edf(struct vm_core_edf_sched *core, struct vm_edf_rq *runqueue){
\r
174 struct rb_node **new_core = &(runqueue->vCPUs_tree.rb_node);
\r
175 struct rb_node *parent = NULL;
\r
176 struct vm_core_edf_sched *curr_core;
\r
178 // Find out place in the tree for the new core
\r
179 while (*new_core) {
\r
181 curr_core = container_of(*new_core, struct vm_core_edf_sched, node);
\r
182 parent = *new_core;
\r
184 if (core->current_deadline < curr_core->current_deadline)
\r
185 new_core = &((*new_core)->rb_left);
\r
186 else if (core->current_deadline > curr_core->current_deadline)
\r
187 new_core = &((*new_core)->rb_right);
\r
188 else // Is Possible to have same current deadlines in both cores!
\r
191 // Add new node and rebalance tree.
\r
192 rb_link_node(&core->node, parent, new_core);
\r
193 v3_rb_insert_color(&core->node, &runqueue->vCPUs_tree);
\r
200 * get_curr_host_time: Calculates the current host time (microseconds)
\r
204 get_curr_host_time(struct vm_core_time *core_time){
\r
206 uint64_t cur_cycle = v3_get_host_time(core_time);
\r
207 uint64_t cpu_khz = core_time->host_cpu_freq;
\r
208 uint64_t curr_time_us = 1000 * cur_cycle / cpu_khz;
\r
210 return curr_time_us;
\r
216 * next_start_period: Given the current host time and the period of a given vCPU,
\r
217 * calculates the time in which its next period starts.
\r
222 next_start_period(uint64_t curr_time_us, uint64_t period_us){
\r
224 uint64_t time_period_us = curr_time_us % period_us;
\r
225 uint64_t remaining_time_us = period_us - time_period_us;
\r
226 uint64_t next_start_us = curr_time_us + remaining_time_us;
\r
228 return next_start_us;
\r
233 * get_runqueue: Get the runqueue assigned to a virtual core.
\r
236 struct vm_edf_rq * get_runqueue(struct guest_info *info){
\r
238 struct vm_edf_rq *runqueue_list = (struct vm_edf_rq *) info->vm_info->sched.priv_data;
\r
239 struct vm_edf_rq *runqueue = &runqueue_list[info->pcpu_id];
\r
245 * wakeup_core: Wakeup a given vCPU thread
\r
249 wakeup_core(struct guest_info *info){
\r
251 struct vm_core_edf_sched *core = info->core_sched.priv_data;
\r
252 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
254 if (!info->core_thread) {
\r
255 PrintError(info->vm_info, info,"ERROR: Tried to wakeup non-existent core thread vCPU_id %d \n",info->vcpu_id);
\r
259 PrintDebug(info->vm_info, info,"EDF Sched. run_next_core. vcpu_id %d, logical id %d, Total time %llu, Miss_deadlines %d, slice_overuses %d extra_time %llu, thread (%p)\n",
\r
260 core->info->vcpu_id,
\r
261 core->info->pcpu_id,
\r
263 core->miss_deadline,
\r
264 core->slice_overuse,
\r
265 core->extra_time_given,
\r
266 (struct task_struct *)info->core_thread);
\r
268 V3_Wakeup(info->core_thread);
\r
269 core->last_wakeup_time = get_curr_host_time(&core->info->time_state);
\r
270 runqueue->curr_vCPU = core;
\r
278 * activate_core - Moves a core to the red-black tree.
\r
279 * used time is set to zero and current deadline is calculated
\r
283 activate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){
\r
285 if (is_admissible_core(core, runqueue)){
\r
287 uint64_t curr_time_us = get_curr_host_time(&core->info->time_state);
\r
288 uint64_t curr_deadline = next_start_period(curr_time_us, core->period);
\r
290 core->current_deadline = curr_deadline;
\r
291 core->used_time=0;
\r
292 core->remaining_time=core->slice;
\r
294 bool ins = insert_core_edf(core, runqueue);
\r
296 * If not inserted is possible that there is other core with the same deadline.
\r
297 * Then, the deadline is modified and try again
\r
300 core->current_deadline ++;
\r
301 ins = insert_core_edf(core, runqueue);
\r
304 runqueue->cpu_u += 100 * core->slice / core->period;
\r
305 runqueue->nr_vCPU ++;
\r
308 * If this is the first time to be activated pick first earliest deadline core to wakeup.
\r
311 if(core->last_wakeup_time == 0){
\r
313 struct vm_core_edf_sched *next_core;
\r
316 * Pick first earliest deadline core
\r
318 struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
\r
319 next_core = container_of(node, struct vm_core_edf_sched, node);
\r
321 // Wakeup next_core
\r
322 wakeup_core(next_core->info);
\r
331 PrintError(core->info->vm_info, core->info,"EDF Sched. activate_core. CPU cannot activate the core. It is not admissible");
\r
336 * edf_sched_core_init: Initializes per core data structure and
\r
337 * calls activate function.
\r
341 edf_sched_core_init(struct guest_info * info){
\r
343 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
344 struct vm_core_edf_sched *core_edf;
\r
346 PrintDebug(info->vm_info, info,"EDF Sched. Initializing vcore %d\n", info->vcpu_id);
\r
348 core_edf = (struct vm_core_edf_sched *) V3_Malloc(sizeof (struct vm_core_edf_sched));
\r
350 PrintError(info->vm_info, info,"Cannot allocate private_data in edf_sched_core_init\n");
\r
353 info->core_sched.priv_data = core_edf;
\r
355 // Default configuration if not specified in configuration file
\r
357 core_edf->info = info;
\r
358 core_edf->period = 500000;
\r
359 core_edf->slice = 50000;
\r
360 core_edf->used_time = 0;
\r
361 core_edf->last_wakeup_time = 0;
\r
362 core_edf->remaining_time = core_edf->slice;
\r
363 core_edf->miss_deadline = 0;
\r
364 core_edf->extra_time = true;
\r
365 core_edf->total_time = 0;
\r
366 core_edf->slice_overuse = 0;
\r
367 core_edf->extra_time_given = 0;
\r
369 v3_cfg_tree_t * cfg_tree = core_edf->info->vm_info->cfg_data->cfg;
\r
370 v3_cfg_tree_t * core = v3_cfg_subtree(v3_cfg_subtree(cfg_tree, "cores"), "core");
\r
373 char *id = v3_cfg_val(core, "vcpu_id");
\r
374 char *period = v3_cfg_val(core, "period");
\r
375 char *slice = v3_cfg_val(core, "slice");
\r
376 char *extra_time = v3_cfg_val(core, "extra_time");
\r
378 if (atoi(id) == core_edf->info->vcpu_id){
\r
380 core_edf->period = atoi(period);
\r
381 core_edf->slice = atoi(slice);
\r
382 core_edf->remaining_time = core_edf->slice;
\r
383 if (strcasecmp(extra_time, "true") == 0)
\r
384 core_edf->extra_time = true;
\r
386 core_edf->extra_time = false;
\r
389 core = v3_cfg_next_branch(core);
\r
392 activate_core(core_edf,runqueue);
\r
397 * search_core_edf: Searches a core in the red-black tree by using its vcpu_id
\r
399 static struct vm_core_edf_sched *
\r
400 search_core_edf(struct vm_core_edf_sched *core_edf, struct vm_edf_rq *runqueue){
\r
402 struct rb_node *node = runqueue->vCPUs_tree.rb_node;
\r
406 struct vm_core_edf_sched *core = container_of(node, struct vm_core_edf_sched, node);
\r
408 if (core_edf->current_deadline < core->current_deadline)
\r
409 node = node->rb_left;
\r
410 else if (core_edf->current_deadline > core->current_deadline)
\r
411 node = node->rb_right;
\r
413 if(core->info->vcpu_id == core_edf->info->vcpu_id){
\r
422 * delete_core_edf: Deletes a core from the red black tree, generally when it has
\r
423 * consumed its time slice within the current period.
\r
427 delete_core_edf( struct vm_core_edf_sched *core_edf , struct vm_edf_rq *runqueue){
\r
429 struct vm_core_edf_sched *core = search_core_edf(core_edf, runqueue);
\r
432 v3_rb_erase(&core->node, &runqueue->vCPUs_tree);
\r
436 PrintError(core->info->vm_info, core->info,"EDF Sched. delete_core_edf.Attempted to erase unexisting core");
\r
443 * deactivate_core - Removes a core from the red-black tree.
\r
447 deactivate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){
\r
449 if(delete_core_edf(core, runqueue)){
\r
450 runqueue->cpu_u -= 100 * core->slice / core->period;
\r
451 runqueue->nr_vCPU -- ;
\r
457 * pick_next_core: Returns the next core to be scheduled from the red black tree
\r
460 static struct vm_core_edf_sched *
\r
461 pick_next_core(struct vm_edf_rq *runqueue){
\r
465 * Pick first earliest deadline core
\r
467 struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
\r
468 struct vm_core_edf_sched *next_core = container_of(node, struct vm_core_edf_sched, node);
\r
471 * Verify if the earliest deadline core has used its complete slice and return it if not
\r
474 if (next_core->used_time < next_core->slice){
\r
475 if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state))
\r
476 next_core->miss_deadline++;
\r
480 * If slice used, pick the next core that has not used its complete slice
\r
484 while(next_core->used_time >= next_core->slice){
\r
486 if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state) || !next_core->extra_time ){
\r
488 deactivate_core(next_core,runqueue);
\r
489 activate_core(next_core,runqueue);
\r
493 node = v3_rb_next(node);
\r
495 next_core = container_of(node, struct vm_core_edf_sched, node);
\r
498 node = v3_rb_first(&runqueue->vCPUs_tree); // If all cores have used its slice return the first one
\r
499 return container_of(node, struct vm_core_edf_sched, node);
\r
510 adjust_slice(struct guest_info * info, int used_time, int extra_time)
\r
512 struct vm_core_edf_sched *core = info->core_sched.priv_data;
\r
513 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
515 core->used_time = used_time;
\r
517 if (extra_time >= 0) {
\r
518 core->used_time += extra_time;
\r
521 if( core->used_time >= core->slice){
\r
522 deactivate_core(core,runqueue);
\r
523 activate_core(core,runqueue);
\r
529 * run_next_core: Pick next core to be scheduled and wakeup it
\r
533 run_next_core(struct guest_info *info, int used_time, int usec)
\r
535 struct vm_core_edf_sched *core = info->core_sched.priv_data;
\r
536 struct vm_core_edf_sched *next_core;
\r
537 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
539 /* The next core to be scheduled is choosen from the tree (Function pick_next_core).
\r
540 * The selected core is the one with the earliest deadline and with available time
\r
541 * to use within the current period (used_time < slice)
\r
544 next_core = pick_next_core(runqueue); // Pick next core to schedule
\r
546 if (core != next_core){
\r
548 // Wakeup next_core
\r
549 wakeup_core(next_core->info);
\r
550 core->total_time += used_time;
\r
552 if (used_time > core->slice){
\r
553 core->slice_overuse++;
\r
554 core->extra_time_given += (used_time - core->slice);
\r
566 * edf_schedule: Scheduling function
\r
570 edf_schedule(struct guest_info * info, int usec){
\r
572 uint64_t host_time = get_curr_host_time(&info->time_state);
\r
573 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
574 struct vm_core_edf_sched *core = (struct vm_core_edf_sched *) info->core_sched.priv_data;
\r
576 uint64_t used_time = 0;
\r
577 if(core->last_wakeup_time != 0)
\r
578 used_time = host_time - core->last_wakeup_time;
\r
580 if(usec == 0) runqueue->last_sched_time = host_time; // Called from edf_sched_scheduled
\r
581 adjust_slice(core->info, host_time - core->last_wakeup_time, usec);
\r
583 run_next_core(core->info,used_time, usec);
\r
589 * edf_sched_schedule: Main scheduling function. Computes amount of time in period left,
\r
590 * recomputing the current core's deadline if it has expired, then runs
\r
592 * It is called in the following cases:
\r
593 * A vCPU becomes runnable
\r
594 * The slice of the current vCPU was used
\r
595 * The period of a vCPU in the runqueue starts
\r
597 * TODO Something to do with extra time?
\r
598 * TODO Check the use of remaining_time
\r
602 edf_sched_schedule(struct guest_info * info){
\r
604 edf_schedule(info, 0);
\r
609 * edf_sched_yield: Called when yielding the logical cpu for usec is needed
\r
613 edf_sched_yield(struct guest_info * info, int usec){
\r
615 edf_schedule(info, usec);
\r
621 * edf_sched_deinit: Frees edf scheduler data structures
\r
626 edf_sched_deinit(struct v3_vm_info *vm)
\r
629 struct vm_scheduler * sched = vm->sched.sched;
\r
630 void *priv_data = vm->sched.priv_data;
\r
636 V3_Free(priv_data);
\r
643 * edf_sched_deinit: Frees virtual core data structures
\r
647 edf_sched_core_deinit(struct guest_info *core)
\r
650 struct vm_scheduler * sched = core->core_sched.sched;
\r
651 void *priv_data = core->core_sched.priv_data;
\r
657 V3_Free(priv_data);
\r
662 static struct vm_scheduler_impl edf_sched = {
\r
664 .init = edf_sched_init,
\r
665 .deinit = edf_sched_deinit,
\r
666 .core_init = edf_sched_core_init,
\r
667 .core_deinit = edf_sched_core_deinit,
\r
668 .schedule = edf_sched_schedule,
\r
669 .yield = edf_sched_yield
\r
673 ext_sched_edf_init() {
\r
675 PrintDebug(VM_NONE, VCORE_NONE,"Sched. Creating (%s) scheduler\n",edf_sched.name);
\r
676 return v3_register_scheduler(&edf_sched);
\r
680 ext_sched_edf_vm_init() {
\r
684 static struct v3_extension_impl sched_edf_impl = {
\r
685 .name = "EDF Scheduler",
\r
686 .init = ext_sched_edf_init,
\r
687 .vm_init = ext_sched_edf_vm_init,
\r
690 .core_deinit = NULL,
\r
695 register_extension(&sched_edf_impl);
\r