2 * This file is part of the Palacios Virtual Machine Monitor developed
\r
3 * by the V3VEE Project with funding from the United States National
\r
4 * Science Foundation and the Department of Energy.
\r
6 * The V3VEE Project is a joint project between Northwestern University
\r
7 * and the University of New Mexico. You can find out more at
\r
8 * http://www.v3vee.org
\r
10 * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org>
\r
11 * All rights reserved.
\r
13 * Author: Oscar Mondragon <omondrag@cs.unm.edu>
\r
14 * Patrick G. Bridges <bridges@cs.unm.edu>
\r
16 * This is free software. You are permitted to use,
\r
17 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
\r
21 #include <palacios/vmm.h>
\r
22 #include <palacios/vmm_time.h>
\r
23 #include <palacios/vm_guest.h>
\r
24 #include <palacios/vmm_hashtable.h>
\r
25 #include <palacios/vmm_config.h>
\r
26 #include <palacios/vmm_extensions.h>
\r
27 #include <palacios/vmm_edf_sched.h>
\r
31 #ifndef V3_CONFIG_DEBUG_EDF_SCHED
\r
33 #define PrintDebug(fmt, args...)
\r
40 * The EDF scheduler uses a dynamic calculated priority as scheduling criteria to choose
\r
41 * what thread will be scheduled.That priority is calculated according with the relative
\r
42 * deadline of the threads that are ready to run in the runqueue. This runqueue is a per-logical
\r
43 * core data structure used to keep the runnable virtual cores (threads) allocated to that
\r
44 * logical core.The threads with less time before its deadline will receive better priorities.
\r
45 * The runqueue is sorted each time that a vCPU becomes runnable. At that time the vCPU is
\r
46 * enqueue and a new scheduling decision is taken. Each time a vCPU is scheduled, the parameter
\r
47 * slice used time is set to zero and the current deadline is calculated using its period. Once
\r
48 * the vCPU uses the logical core for slice seconds, that vCPU sleeps until its next scheduling
\r
49 * period (when is re-inserted in the runqueue) and yields the CPU to allow the scheduling
\r
50 * of the vCPU with best priority in the runqueue.
\r
53 // Default configuration values for the EDF Scheduler
\r
54 // time parameters in microseconds
\r
56 #define MAX_PERIOD 1000000000
\r
57 #define MIN_PERIOD 50000
\r
58 #define MAX_SLICE 1000000000
\r
59 #define MIN_SLICE 10000
\r
60 #define CPU_PERCENT 100
\r
64 * init_edf_config: Initialize scheduler configuration
\r
68 init_edf_config(struct vm_edf_sched_config *edf_config){
\r
70 edf_config->min_slice = MIN_SLICE;
\r
71 edf_config->max_slice = MAX_SLICE;
\r
72 edf_config->min_period = MIN_PERIOD;
\r
73 edf_config->max_period = MAX_PERIOD;
\r
74 edf_config->cpu_percent = CPU_PERCENT;
\r
79 * edf_sched_init: Initialize the run queue
\r
83 edf_sched_init(struct v3_vm_info *vm){
\r
85 PrintDebug(vm, VCORE_NONE,"EDF Sched. Initializing vm %s\n", vm->name);
\r
87 struct vm_sched_state *sched_state = &vm->sched;
\r
88 sched_state->priv_data = V3_Malloc( vm->avail_cores * sizeof(struct vm_edf_rq));
\r
90 if (!sched_state->priv_data) {
\r
91 PrintError(vm, VCORE_NONE,"Cannot allocate in priv_data in edf_sched_init\n");
\r
97 PrintDebug(vm, VCORE_NONE,"EDF Sched. edf_sched_init. Available cores %d\n", vm->avail_cores);
\r
99 for(lcore = 0; lcore < vm->avail_cores ; lcore++){
\r
101 PrintDebug(vm, VCORE_NONE,"EDF Sched. edf_sched_init. Initializing logical core %d\n", lcore);
\r
103 struct vm_edf_rq * edf_rq_list = (struct vm_edf_rq *) sched_state->priv_data;
\r
104 struct vm_edf_rq * edf_rq = &edf_rq_list[lcore];
\r
106 edf_rq->vCPUs_tree = RB_ROOT;
\r
109 edf_rq->curr_vCPU=NULL;
\r
110 edf_rq->rb_leftmost=NULL;
\r
111 edf_rq->last_sched_time=0;
\r
112 init_edf_config(&edf_rq->edf_config);
\r
122 * is_admissible_core: Decides if a core is admited to the red black tree according with
\r
123 * the admisibility formula.
\r
127 is_admissible_core(struct vm_core_edf_sched * new_sched_core, struct vm_edf_rq *runqueue){
\r
129 int curr_utilization = runqueue->cpu_u;
\r
130 int new_utilization = curr_utilization + (100 * new_sched_core->slice / new_sched_core->period);
\r
131 int cpu_percent = (runqueue->edf_config).cpu_percent;
\r
133 if (new_utilization <= cpu_percent)
\r
142 * count_cores: Function useful to count the number of cores in a runqueue (Not used for now)
\r
147 /*static int count_cores(struct vm_edf_rq *runqueue){
\r
149 struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
\r
150 struct vm_core_edf_sched *curr_core;
\r
151 int number_cores = 0;
\r
155 curr_core = container_of(node, struct vm_core_edf_sched, node);
\r
156 node = v3_rb_next(node);
\r
160 return number_cores;
\r
166 * insert_core_edf: Finds a place in the tree for a newly activated core, adds the node
\r
167 * and rebalaces the tree
\r
171 insert_core_edf(struct vm_core_edf_sched *core, struct vm_edf_rq *runqueue){
\r
173 struct rb_node **new_core = &(runqueue->vCPUs_tree.rb_node);
\r
174 struct rb_node *parent = NULL;
\r
175 struct vm_core_edf_sched *curr_core;
\r
177 // Find out place in the tree for the new core
\r
178 while (*new_core) {
\r
180 curr_core = container_of(*new_core, struct vm_core_edf_sched, node);
\r
181 parent = *new_core;
\r
183 if (core->current_deadline < curr_core->current_deadline)
\r
184 new_core = &((*new_core)->rb_left);
\r
185 else if (core->current_deadline > curr_core->current_deadline)
\r
186 new_core = &((*new_core)->rb_right);
\r
187 else // Is Possible to have same current deadlines in both cores!
\r
190 // Add new node and rebalance tree.
\r
191 rb_link_node(&core->node, parent, new_core);
\r
192 v3_rb_insert_color(&core->node, &runqueue->vCPUs_tree);
\r
199 * get_curr_host_time: Calculates the current host time (microseconds)
\r
203 get_curr_host_time(struct vm_core_time *core_time){
\r
205 uint64_t cur_cycle = v3_get_host_time(core_time);
\r
206 uint64_t cpu_khz = core_time->host_cpu_freq;
\r
207 uint64_t curr_time_us = 1000 * cur_cycle / cpu_khz;
\r
209 return curr_time_us;
\r
215 * next_start_period: Given the current host time and the period of a given vCPU,
\r
216 * calculates the time in which its next period starts.
\r
221 next_start_period(uint64_t curr_time_us, uint64_t period_us){
\r
223 uint64_t time_period_us = curr_time_us % period_us;
\r
224 uint64_t remaining_time_us = period_us - time_period_us;
\r
225 uint64_t next_start_us = curr_time_us + remaining_time_us;
\r
227 return next_start_us;
\r
232 * get_runqueue: Get the runqueue assigned to a virtual core.
\r
235 struct vm_edf_rq * get_runqueue(struct guest_info *info){
\r
237 struct vm_edf_rq *runqueue_list = (struct vm_edf_rq *) info->vm_info->sched.priv_data;
\r
238 struct vm_edf_rq *runqueue = &runqueue_list[info->pcpu_id];
\r
244 * wakeup_core: Wakeup a given vCPU thread
\r
248 wakeup_core(struct guest_info *info){
\r
250 struct vm_core_edf_sched *core = info->core_sched.priv_data;
\r
251 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
253 if (!info->core_thread) {
\r
254 PrintError(info->vm_info, info,"ERROR: Tried to wakeup non-existent core thread vCPU_id %d \n",info->vcpu_id);
\r
258 PrintDebug(info->vm_info, info,"EDF Sched. run_next_core. vcpu_id %d, logical id %d, Total time %llu, Miss_deadlines %d, slice_overuses %d extra_time %llu, thread (%p)\n",
\r
259 core->info->vcpu_id,
\r
260 core->info->pcpu_id,
\r
262 core->miss_deadline,
\r
263 core->slice_overuse,
\r
264 core->extra_time_given,
\r
265 (struct task_struct *)info->core_thread);
\r
267 V3_Wakeup(info->core_thread);
\r
268 core->last_wakeup_time = get_curr_host_time(&core->info->time_state);
\r
269 runqueue->curr_vCPU = core;
\r
277 * activate_core - Moves a core to the red-black tree.
\r
278 * used time is set to zero and current deadline is calculated
\r
282 activate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){
\r
284 if (is_admissible_core(core, runqueue)){
\r
286 uint64_t curr_time_us = get_curr_host_time(&core->info->time_state);
\r
287 uint64_t curr_deadline = next_start_period(curr_time_us, core->period);
\r
289 core->current_deadline = curr_deadline;
\r
290 core->used_time=0;
\r
291 core->remaining_time=core->slice;
\r
293 bool ins = insert_core_edf(core, runqueue);
\r
295 * If not inserted is possible that there is other core with the same deadline.
\r
296 * Then, the deadline is modified and try again
\r
299 core->current_deadline ++;
\r
300 ins = insert_core_edf(core, runqueue);
\r
303 runqueue->cpu_u += 100 * core->slice / core->period;
\r
304 runqueue->nr_vCPU ++;
\r
307 * If this is the first time to be activated pick first earliest deadline core to wakeup.
\r
310 if(core->last_wakeup_time == 0){
\r
312 struct vm_core_edf_sched *next_core;
\r
315 * Pick first earliest deadline core
\r
317 struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
\r
318 next_core = container_of(node, struct vm_core_edf_sched, node);
\r
320 // Wakeup next_core
\r
321 wakeup_core(next_core->info);
\r
330 PrintError(core->info->vm_info, core->info,"EDF Sched. activate_core. CPU cannot activate the core. It is not admissible");
\r
335 * edf_sched_core_init: Initializes per core data structure and
\r
336 * calls activate function.
\r
340 edf_sched_core_init(struct guest_info * info){
\r
342 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
343 struct vm_core_edf_sched *core_edf;
\r
345 PrintDebug(info->vm_info, info,"EDF Sched. Initializing vcore %d\n", info->vcpu_id);
\r
347 core_edf = (struct vm_core_edf_sched *) V3_Malloc(sizeof (struct vm_core_edf_sched));
\r
349 PrintError(info->vm_info, info,"Cannot allocate private_data in edf_sched_core_init\n");
\r
352 info->core_sched.priv_data = core_edf;
\r
354 // Default configuration if not specified in configuration file
\r
356 core_edf->info = info;
\r
357 core_edf->period = 500000;
\r
358 core_edf->slice = 50000;
\r
359 core_edf->used_time = 0;
\r
360 core_edf->last_wakeup_time = 0;
\r
361 core_edf->remaining_time = core_edf->slice;
\r
362 core_edf->miss_deadline = 0;
\r
363 core_edf->extra_time = true;
\r
364 core_edf->total_time = 0;
\r
365 core_edf->slice_overuse = 0;
\r
366 core_edf->extra_time_given = 0;
\r
368 v3_cfg_tree_t * cfg_tree = core_edf->info->vm_info->cfg_data->cfg;
\r
369 v3_cfg_tree_t * core = v3_cfg_subtree(v3_cfg_subtree(cfg_tree, "cores"), "core");
\r
372 char *id = v3_cfg_val(core, "vcpu_id");
\r
373 char *period = v3_cfg_val(core, "period");
\r
374 char *slice = v3_cfg_val(core, "slice");
\r
375 char *extra_time = v3_cfg_val(core, "extra_time");
\r
377 if (atoi(id) == core_edf->info->vcpu_id){
\r
379 core_edf->period = atoi(period);
\r
380 core_edf->slice = atoi(slice);
\r
381 core_edf->remaining_time = core_edf->slice;
\r
382 if (strcasecmp(extra_time, "true") == 0)
\r
383 core_edf->extra_time = true;
\r
385 core_edf->extra_time = false;
\r
388 core = v3_cfg_next_branch(core);
\r
391 activate_core(core_edf,runqueue);
\r
396 * search_core_edf: Searches a core in the red-black tree by using its vcpu_id
\r
398 static struct vm_core_edf_sched *
\r
399 search_core_edf(struct vm_core_edf_sched *core_edf, struct vm_edf_rq *runqueue){
\r
401 struct rb_node *node = runqueue->vCPUs_tree.rb_node;
\r
405 struct vm_core_edf_sched *core = container_of(node, struct vm_core_edf_sched, node);
\r
407 if (core_edf->current_deadline < core->current_deadline)
\r
408 node = node->rb_left;
\r
409 else if (core_edf->current_deadline > core->current_deadline)
\r
410 node = node->rb_right;
\r
412 if(core->info->vcpu_id == core_edf->info->vcpu_id){
\r
421 * delete_core_edf: Deletes a core from the red black tree, generally when it has
\r
422 * consumed its time slice within the current period.
\r
426 delete_core_edf( struct vm_core_edf_sched *core_edf , struct vm_edf_rq *runqueue){
\r
428 struct vm_core_edf_sched *core = search_core_edf(core_edf, runqueue);
\r
431 v3_rb_erase(&core->node, &runqueue->vCPUs_tree);
\r
435 PrintError(core->info->vm_info, core->info,"EDF Sched. delete_core_edf.Attempted to erase unexisting core");
\r
442 * deactivate_core - Removes a core from the red-black tree.
\r
446 deactivate_core(struct vm_core_edf_sched * core, struct vm_edf_rq *runqueue){
\r
448 if(delete_core_edf(core, runqueue)){
\r
449 runqueue->cpu_u -= 100 * core->slice / core->period;
\r
450 runqueue->nr_vCPU -- ;
\r
456 * pick_next_core: Returns the next core to be scheduled from the red black tree
\r
459 static struct vm_core_edf_sched *
\r
460 pick_next_core(struct vm_edf_rq *runqueue){
\r
464 * Pick first earliest deadline core
\r
466 struct rb_node *node = v3_rb_first(&runqueue->vCPUs_tree);
\r
467 struct vm_core_edf_sched *next_core = container_of(node, struct vm_core_edf_sched, node);
\r
470 * Verify if the earliest deadline core has used its complete slice and return it if not
\r
473 if (next_core->used_time < next_core->slice){
\r
474 if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state))
\r
475 next_core->miss_deadline++;
\r
479 * If slice used, pick the next core that has not used its complete slice
\r
483 while(next_core->used_time >= next_core->slice){
\r
485 if(next_core->current_deadline < get_curr_host_time(&next_core->info->time_state) || !next_core->extra_time ){
\r
487 deactivate_core(next_core,runqueue);
\r
488 activate_core(next_core,runqueue);
\r
492 node = v3_rb_next(node);
\r
494 next_core = container_of(node, struct vm_core_edf_sched, node);
\r
497 node = v3_rb_first(&runqueue->vCPUs_tree); // If all cores have used its slice return the first one
\r
498 return container_of(node, struct vm_core_edf_sched, node);
\r
509 adjust_slice(struct guest_info * info, int used_time, int extra_time)
\r
511 struct vm_core_edf_sched *core = info->core_sched.priv_data;
\r
512 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
514 core->used_time = used_time;
\r
516 if (extra_time >= 0) {
\r
517 core->used_time += extra_time;
\r
520 if( core->used_time >= core->slice){
\r
521 deactivate_core(core,runqueue);
\r
522 activate_core(core,runqueue);
\r
528 * run_next_core: Pick next core to be scheduled and wakeup it
\r
532 run_next_core(struct guest_info *info, int used_time, int usec)
\r
534 struct vm_core_edf_sched *core = info->core_sched.priv_data;
\r
535 struct vm_core_edf_sched *next_core;
\r
536 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
538 /* The next core to be scheduled is choosen from the tree (Function pick_next_core).
\r
539 * The selected core is the one with the earliest deadline and with available time
\r
540 * to use within the current period (used_time < slice)
\r
543 next_core = pick_next_core(runqueue); // Pick next core to schedule
\r
545 if (core != next_core){
\r
547 // Wakeup next_core
\r
548 wakeup_core(next_core->info);
\r
549 core->total_time += used_time;
\r
551 if (used_time > core->slice){
\r
552 core->slice_overuse++;
\r
553 core->extra_time_given += (used_time - core->slice);
\r
565 * edf_schedule: Scheduling function
\r
569 edf_schedule(struct guest_info * info, int usec){
\r
571 uint64_t host_time = get_curr_host_time(&info->time_state);
\r
572 struct vm_edf_rq *runqueue = get_runqueue(info);
\r
573 struct vm_core_edf_sched *core = (struct vm_core_edf_sched *) info->core_sched.priv_data;
\r
575 uint64_t used_time = 0;
\r
576 if(core->last_wakeup_time != 0)
\r
577 used_time = host_time - core->last_wakeup_time;
\r
579 if(usec == 0) runqueue->last_sched_time = host_time; // Called from edf_sched_scheduled
\r
580 adjust_slice(core->info, host_time - core->last_wakeup_time, usec);
\r
582 run_next_core(core->info,used_time, usec);
\r
588 * edf_sched_schedule: Main scheduling function. Computes amount of time in period left,
\r
589 * recomputing the current core's deadline if it has expired, then runs
\r
591 * It is called in the following cases:
\r
592 * A vCPU becomes runnable
\r
593 * The slice of the current vCPU was used
\r
594 * The period of a vCPU in the runqueue starts
\r
596 * TODO Something to do with extra time?
\r
597 * TODO Check the use of remaining_time
\r
601 edf_sched_schedule(struct guest_info * info){
\r
603 edf_schedule(info, 0);
\r
608 * edf_sched_yield: Called when yielding the logical cpu for usec is needed
\r
612 edf_sched_yield(struct guest_info * info, int usec){
\r
614 edf_schedule(info, usec);
\r
620 * edf_sched_deinit: Frees edf scheduler data structures
\r
625 edf_sched_deinit(struct v3_vm_info *vm)
\r
628 struct vm_scheduler * sched = vm->sched.sched;
\r
629 void *priv_data = vm->sched.priv_data;
\r
635 V3_Free(priv_data);
\r
642 * edf_sched_deinit: Frees virtual core data structures
\r
646 edf_sched_core_deinit(struct guest_info *core)
\r
649 struct vm_scheduler * sched = core->core_sched.sched;
\r
650 void *priv_data = core->core_sched.priv_data;
\r
656 V3_Free(priv_data);
\r
661 static struct vm_scheduler_impl edf_sched = {
\r
663 .init = edf_sched_init,
\r
664 .deinit = edf_sched_deinit,
\r
665 .core_init = edf_sched_core_init,
\r
666 .core_deinit = edf_sched_core_deinit,
\r
667 .schedule = edf_sched_schedule,
\r
668 .yield = edf_sched_yield
\r
672 ext_sched_edf_init() {
\r
674 PrintDebug(VM_NONE, VCORE_NONE,"Sched. Creating (%s) scheduler\n",edf_sched.name);
\r
675 return v3_register_scheduler(&edf_sched);
\r
679 ext_sched_edf_vm_init() {
\r
683 static struct v3_extension_impl sched_edf_impl = {
\r
684 .name = "EDF Scheduler",
\r
685 .init = ext_sched_edf_init,
\r
686 .vm_init = ext_sched_edf_vm_init,
\r
689 .core_deinit = NULL,
\r
694 register_extension(&sched_edf_impl);
\r