2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2014, the V3VEE Project <http://www.v3vee.org>
11 * all rights reserved.
13 * Author: Kyle C. Hale <kh@u.northwestern.edu>
14 * Shiva Rao <shiva.rao.717@gmail.com>
15 * Peter Dinda <pdinda@northwestern.edu>
17 * This is free software. you are permitted to use,
18 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
21 #include <linux/uaccess.h>
22 #include <linux/seq_file.h>
23 #include <linux/proc_fs.h>
24 #include <linux/cpufreq.h>
25 #include <linux/kernel.h>
26 #include <linux/kmod.h>
27 #include <linux/module.h>
28 #include <linux/string.h>
29 #include <linux/interrupt.h>
30 #include <asm/processor.h>
32 #include <asm/msr-index.h>
34 // Used to determine the appropriate pstates values on Intel
35 #include <linux/acpi.h>
36 #include <acpi/processor.h>
38 #include <interfaces/vmm_pstate_ctrl.h>
41 #include "iface-pstate-ctrl.h"
43 #include "linux-exts.h"
46 This P-STATE control implementation includes the following modes.
47 You can switch between modes at any time.
49 - Internal control of processor states in Palacios (handoff from Linux)
50 When Palacios acuires this control, this module disables Linux cpufreq control
51 and allows code within Palacios unfettered access to the DVFS hardware.
52 - Direct control of Intel and AMD processor pstates using code in this module
53 When you acquire this control, this module disables Linux cpufreq control
54 and directly programs the processor itself in response to your requests
55 - External control of processor states via Linux
56 When you acuire this control, this module uses the Linux cpufreq control
57 to program the processor on your behelf
58 - Host control of processor stastes
59 This is the normal mode of DVFS control (e.g., Linux cpufreq)
61 Additionally, it provides a user-space interface for manipulating
62 p-state regardless of the host's functionality. This includes
63 an ioctl for commanding the implementation and a /proc file for
64 showing current status and capabilities. From user space, you can
65 use the Direct, External, and Host modes.
67 What we mean by "p-state" here is the processor's internal
68 configuration. For AMD, this is defined as being the same as
69 the ACPI-defined p-state. For Intel, it is not. There, it is the
70 contents of the perf ctl MSR, which is opaque. We try hard to
71 provide "p-states" that go from 0...max, by analogy or equivalence
77 #define PALACIOS_GOVNAME "v3vee"
78 #define MAX_PATH_LEN 128
79 #define MAX_GOV_NAME_LEN 16
82 struct pstate_core_info {
83 // Here we have the notion of host control
84 #define V3_PSTATE_HOST_CONTROL 0
85 // and all the modes from the Palacios interface:
86 // V3_PSTATE_EXTERNAL_CONTROL
87 // V3_PSTATE_DIRECT_CONTROL
88 // V3_PSTATE_INTERNAL_CONTROL
91 // Apply if we are under the DIRECT state
96 uint64_t cur_hw_pstate;
98 // Apply if we are under the EXTERNAL state
99 uint64_t set_freq_khz; // this is the frequency we're hoping to get
100 uint64_t cur_freq_khz;
101 uint64_t max_freq_khz;
102 uint64_t min_freq_khz;
105 uint8_t prior_speedstep;
106 uint8_t turbo_disabled;
111 // This is where we stash Linux's governor when we make a mode switch
112 char * linux_governor;
113 // We have this so we can restore the original frequency when we started
114 uint64_t original_hz;
119 static DEFINE_PER_CPU(struct pstate_core_info, core_state);
123 // These are used to assert DIRECT control over the core pstates
124 struct pstate_core_funcs {
125 void (*arch_init)(void);
126 void (*arch_deinit)(void);
127 uint64_t (*get_min_pstate)(void);
128 uint64_t (*get_max_pstate)(void);
129 uint64_t (*get_pstate)(void);
130 void (*set_pstate)(uint64_t pstate);
133 struct pstate_machine_info {
134 enum {INTEL, AMD, OTHER } arch;
135 int supports_pstates;
145 int have_opportunistic; // this means "Turbo Boost" or "IDA"
146 int have_policy_hint;
147 int have_hwp; // hardware-controlled performance states
148 int have_hdc; // hardware duty cycling
149 int have_mwait_ext; // mwait power extensions
150 int have_mwait_int; // mwait wakes on interrupt
153 int have_pstate_hw_coord; // mperf/aperf
155 // used for DIRECT control
156 struct pstate_core_funcs *funcs;
160 static struct pstate_machine_info machine_state;
163 /****************************************************
165 ***************************************************/
167 /* AMD Programmer's Manual Vol 2 (Rev 3, 2013), Sec. 17.1, pp.557 */
168 #define MSR_PSTATE_LIMIT_REG_AMD 0xc0010061
169 #define MSR_PSTATE_CTL_REG_AMD 0xc0010062
170 #define MSR_PSTATE_STAT_REG_AMD 0xc0010063
172 struct p_state_limit_reg_amd {
176 uint8_t pstate_limit : 4; /* lowest P-state value (highest perf.) supported currently (this can change at runtime) */
177 uint8_t pstate_max : 4; /* highest P-state value supported (lowest perf) */
180 } __attribute__((packed));
181 } __attribute__((packed));
184 struct p_state_stat_reg_amd {
191 } __attribute__((packed));
192 } __attribute__((packed));
195 struct p_state_ctl_reg_amd {
202 } __attribute__((packed));
203 } __attribute__((packed));
206 /* CPUID Fn8000_0007_EDX[HwPstate(7)] = 1 */
207 static uint8_t supports_pstates_amd (void)
213 uint32_t eax, ebx, ecx, edx;
215 cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
216 machine_state.have_pstate = !!(edx & (1 << 7));
217 machine_state.have_coreboost = !!(edx & (1<<9));
218 machine_state.have_feedback = !!(edx & (1<<11));
220 cpuid(0x6, &eax, &ebx, &ecx, &edx);
221 machine_state.have_pstate_hw_coord = !!(ecx & 1);
223 INFO("P-State: AMD: Pstates=%d Coreboost=%d Feedback=%d PstateHWCoord=%d\n",
224 machine_state.have_pstate,
225 machine_state.have_coreboost,
226 machine_state.have_feedback,
227 machine_state.have_pstate_hw_coord);
229 amd_num_pstates = get_cpu_var(processors)->performance->state_count;
230 if (amd_num_pstates) {
231 for (i=0;i<amd_num_pstates;i++) {
232 INFO("P-State: %u: freq=%llu ctrl=%llx%s\n",
234 get_cpu_var(processors)->performance->states[i].core_frequency*1000,
235 get_cpu_var(processors)->performance->states[i].control,
236 get_cpu_var(processors)->performance->states[i].control != i ? (mapwrong=1, " ALERT - CTRL MAPPING NOT 1:1") : "");
240 ERROR("P-State: AMD: mapping of pstate and control is not 1:1 on this processor - we will probably not work corrrectly\n");
243 return machine_state.have_pstate;
249 static void init_arch_amd(void)
251 /* KCH: nothing to do here */
255 static void deinit_arch_amd(void)
257 /* KCH: nothing to do here */
261 static uint64_t get_pstate_amd(void)
263 struct p_state_stat_reg_amd pstat;
265 rdmsrl(MSR_PSTATE_STAT_REG_AMD, pstat.val);
267 get_cpu_var(core_state).cur_pstate=pstat.reg.pstate;
268 put_cpu_var(core_state);
270 return pstat.reg.pstate;
274 static void set_pstate_amd(uint64_t p)
276 struct p_state_ctl_reg_amd pctl;
278 if (p>get_cpu_var(core_state).max_pstate) {
279 p=get_cpu_var(core_state).max_pstate;
281 put_cpu_var(core_state);
286 wrmsrl(MSR_PSTATE_CTL_REG_AMD, pctl.val);
288 get_cpu_var(core_state).cur_pstate=p;
289 put_cpu_var(core_state);
294 * NOTE: HW may change this value at runtime
296 static uint64_t get_max_pstate_amd(void)
298 struct p_state_limit_reg_amd plimits;
300 rdmsrl(MSR_PSTATE_LIMIT_REG_AMD, plimits.val);
302 return plimits.reg.pstate_max;
306 static uint64_t get_min_pstate_amd(void)
308 struct p_state_limit_reg_amd plimits;
310 rdmsrl(MSR_PSTATE_LIMIT_REG_AMD, plimits.val);
312 return plimits.reg.pstate_limit;
316 static struct pstate_core_funcs amd_funcs =
318 .arch_init = init_arch_amd,
319 .arch_deinit = deinit_arch_amd,
320 .get_pstate = get_pstate_amd,
321 .set_pstate = set_pstate_amd,
322 .get_max_pstate = get_max_pstate_amd,
323 .get_min_pstate = get_min_pstate_amd,
328 /***********************************************************
330 **********************************************************/
334 This implementation uses SpeedStep, but does check
335 to see if the other features (MPERF/APERF, Turbo/IDA, HWP)
339 /* Intel System Programmer's Manual Vol. 3B, 14-2 */
340 #define MSR_MPERF_IA32 0x000000e7
341 #define MSR_APERF_IA32 0x000000e8
342 #define MSR_MISC_ENABLE_IA32 0x000001a0
343 #define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
344 #define MSR_PLATFORM_INFO_IA32 0x000000ce
345 #define MSR_PERF_CTL_IA32 0x00000199
346 #define MSR_PERF_STAT_IA32 0x00000198
347 #define MSR_ENERY_PERF_BIAS_IA32 0x000001b0
350 /* Note that the actual meaning of the pstate
351 in the control and status registers is actually
352 implementation dependent, unlike AMD. The "official"
353 way to figure it out the mapping from pstate to
354 these values is via ACPI. What is written in the register
355 is an "id" of an operation point
357 "Often", the 16 bit field consists of a high order byte
358 which is the frequency (the multiplier) and the low order
361 // MSR_PERF_CTL_IA32 r/w
362 struct perf_ctl_reg_intel {
366 // This is the target
367 // Note, not the ACPI pstate, but
368 // Intel's notion of pstate is that it's opaque
369 // for lots of implementations it seems to be
370 // frequency_id : voltage_id
371 // where frequency_id is typically the multiplier
372 uint16_t pstate : 16;
373 uint16_t reserved : 16;
374 // set to 1 to *disengage* dynamic acceleration
375 // Note that "IDA" and "Turbo" use the same interface
376 uint16_t dynamic_accel_disable : 1;
377 uint32_t reserved2 : 31;
379 } __attribute__((packed));
380 } __attribute__((packed));
382 // MSR_PERF_STAT_IA32 r
383 struct perf_stat_reg_intel {
387 // this is the current
388 uint16_t pstate : 16;
389 uint64_t reserved : 48;
391 } __attribute__((packed));
392 } __attribute__((packed));
394 // MSR_ENERGY_PERF_BIAS_IA32 r/w
395 struct enery_perf_bias_reg_intel {
399 // this is the current
400 uint8_t policy_hint : 4;
401 uint64_t reserved : 60;
403 } __attribute__((packed));
404 } __attribute__((packed));
407 struct turbo_mode_info_reg_intel {
412 uint8_t max_noturbo_ratio : 8;
414 uint8_t ppin_cap : 1;
416 uint8_t ratio_limit : 1;
417 uint8_t tdc_tdp_limit : 1;
419 uint8_t min_ratio : 8;
422 } __attribute__((packed));
423 } __attribute__((packed));
425 // This replicates the critical information in Linux's struct acpi_processor_px
426 // To make it easier to port to other OSes.
427 struct intel_pstate_info {
428 uint64_t freq; // KHz
429 uint64_t ctrl; // What to write into the _CTL MSR to get this
432 // The internal array will be used if we cannot build the table locally
433 static struct intel_pstate_info *intel_pstate_to_ctrl_internal=0;
434 static int intel_num_pstates_internal=0;
436 // These will either point to the internal array or to a constructed array
437 static struct intel_pstate_info *intel_pstate_to_ctrl=0;
438 static int intel_num_pstates=0;
441 /* CPUID.01:ECX.AES(7) */
442 static uint8_t supports_pstates_intel(void)
444 /* NOTE: CPUID.06H:ECX.SETBH[bit 3] is set and it also implies the presence of a new architectural MSR called IA32_ENERGY_PERF_BIAS (1B0H).
446 uint32_t eax, ebx, ecx, edx;
448 cpuid(0x1, &eax, &ebx, &ecx, &edx);
449 machine_state.have_speedstep = !!(ecx & (1 << 7));
451 cpuid(0x6, &eax, &ebx, &ecx, &edx);
452 machine_state.have_pstate_hw_coord = !!(ecx & 1); // ?
453 machine_state.have_opportunistic = !!(eax & 1<<1);
454 machine_state.have_policy_hint = !!(ecx & 1<<3);
455 machine_state.have_hwp = !!(eax & 1<<7);
456 machine_state.have_hdc = !!(eax & 1<<13);
458 cpuid(0x5, &eax, &ebx, &ecx, &edx);
459 machine_state.have_mwait_ext = !!(ecx & 1);
460 machine_state.have_mwait_int = !!(ecx & 1<<1);
463 // Note we test all the available hardware features documented as of August 2014
464 // We are only currently using speed_step, however.
466 INFO("P-State: Intel: Speedstep=%d, PstateHWCoord=%d, Opportunistic=%d PolicyHint=%d HWP=%d HDC=%d, MwaitExt=%d MwaitInt=%d \n",
467 machine_state.have_speedstep,
468 machine_state.have_pstate_hw_coord,
469 machine_state.have_opportunistic,
470 machine_state.have_policy_hint,
471 machine_state.have_hwp,
472 machine_state.have_hdc,
473 machine_state.have_mwait_ext,
474 machine_state.have_mwait_int );
477 if (machine_state.have_speedstep) {
479 // Build mapping table (from "pstate" (0..) to ctrl value for MSR
480 if (!(get_cpu_var(processors)) || !(get_cpu_var(processors)->performance) ) {
481 put_cpu_var(processors);
482 // no acpi... revert to internal table
483 intel_pstate_to_ctrl=intel_pstate_to_ctrl_internal;
484 intel_num_pstates=intel_num_pstates_internal;
486 intel_num_pstates = get_cpu_var(processors)->performance->state_count;
487 if (intel_num_pstates) {
488 intel_pstate_to_ctrl = palacios_alloc(sizeof(struct intel_pstate_info)*intel_num_pstates);
489 if (!intel_pstate_to_ctrl) {
490 ERROR("P-State: Cannot allocate space for mapping...\n");
493 for (i=0;i<intel_num_pstates;i++) {
494 intel_pstate_to_ctrl[i].freq = get_cpu_var(processors)->performance->states[i].core_frequency*1000;
495 intel_pstate_to_ctrl[i].ctrl = get_cpu_var(processors)->performance->states[i].control;
499 ERROR("P-State: Strange, machine has ACPI DVFS but no states...\n");
502 put_cpu_var(processors);
503 INFO("P-State: Intel - State Mapping (%u states) follows\n",intel_num_pstates);
504 for (i=0;i<intel_num_pstates;i++) {
505 INFO("P-State: Intel Mapping %u: freq=%llu ctrl=%llx\n",
506 i, intel_pstate_to_ctrl[i].freq,intel_pstate_to_ctrl[i].ctrl);
509 INFO("P-State: Intel: No speedstep here\n");
513 return machine_state.have_speedstep;
517 static void init_arch_intel(void)
521 rdmsrl(MSR_MISC_ENABLE_IA32, val);
523 //INFO("P-State: prior ENABLE=%llx\n",val);
525 // store prior speedstep setting
526 get_cpu_var(core_state).prior_speedstep=(val >> 16) & 0x1;
527 put_cpu_var(core_state);
529 // enable speedstep (probably already on)
531 wrmsrl(MSR_MISC_ENABLE_IA32, val);
533 //INFO("P-State: write ENABLE=%llx\n",val);
537 static void deinit_arch_intel(void)
541 rdmsrl(MSR_MISC_ENABLE_IA32, val);
543 //INFO("P-State: deinit: ENABLE=%llx\n",val);
545 val &= ~(1ULL << 16);
546 val |= get_cpu_var(core_state).prior_speedstep << 16;
547 put_cpu_var(core_state);
549 wrmsrl(MSR_MISC_ENABLE_IA32, val);
551 //INFO("P-state: deinit ENABLE=%llx\n",val);
555 /* TODO: Intel P-states require sampling at intervals... */
556 static uint64_t get_pstate_intel(void)
560 rdmsrl(MSR_PERF_STAT_IA32,val);
562 //INFO("P-State: Get: 0x%llx\n", val);
564 // should check if turbo is active, in which case
565 // this value is not the whole story
570 static void set_pstate_intel(uint64_t p)
575 if (intel_num_pstates==0) {
578 if (p>=intel_num_pstates) {
579 p=intel_num_pstates-1;
583 ctrl=intel_pstate_to_ctrl[p].ctrl;
585 /* ...Intel IDA (dynamic acceleration)
586 if (c->no_turbo && !c->turbo_disabled) {
590 // leave all bits along expect for the likely
593 rdmsrl(MSR_PERF_CTL_IA32, val);
594 //INFO("P-State: Pre-Set: 0x%llx\n", val);
597 val |= ctrl & 0xffffULL;
599 //INFO("P-State: Set: 0x%llx\n", val);
601 wrmsrl(MSR_PERF_CTL_IA32, val);
603 get_cpu_var(core_state).cur_pstate = p;
604 put_cpu_var(core_state);
608 static uint64_t get_min_pstate_intel(void)
615 static uint64_t get_max_pstate_intel (void)
617 if (intel_num_pstates==0) {
620 return intel_num_pstates-1;
624 static struct pstate_core_funcs intel_funcs =
626 .arch_init = init_arch_intel,
627 .arch_deinit = deinit_arch_intel,
628 .get_pstate = get_pstate_intel,
629 .set_pstate = set_pstate_intel,
630 .get_max_pstate = get_max_pstate_intel,
631 .get_min_pstate = get_min_pstate_intel,
636 /***********************************************
637 Arch determination and setup
638 ***********************************************/
640 static inline void cpuid_string (uint32_t id, uint32_t dest[4])
643 :"=a"(*dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3))
648 static int get_cpu_vendor (char name[13])
653 cpuid_string(0,dest);
655 ((uint32_t*)name)[0]=dest[1];
656 ((uint32_t*)name)[1]=dest[3];
657 ((uint32_t*)name)[2]=dest[2];
664 static int is_intel (void)
667 get_cpu_vendor(name);
668 return !strcmp(name,"GenuineIntel");
672 static int is_amd (void)
675 get_cpu_vendor(name);
676 return !strcmp(name,"AuthenticAMD");
679 static int pstate_arch_setup(void)
683 machine_state.arch = AMD;
684 machine_state.funcs = &amd_funcs;
685 machine_state.supports_pstates = supports_pstates_amd();
686 INFO("PSTATE: P-State initialized for AMD\n");
687 } else if (is_intel()) {
688 machine_state.arch = INTEL;
689 machine_state.funcs = &intel_funcs;
690 machine_state.supports_pstates = supports_pstates_intel();
691 INFO("PSTATE: P-State initialized for INTEL (Work in progress...)\n");
695 machine_state.arch = OTHER;
696 machine_state.funcs = NULL;
697 machine_state.supports_pstates = 0;
698 INFO("PSTATE: P-state control: No support for direct control on this architecture\n");
707 /******************************************************************
709 *****************************************************************/
711 static unsigned cpus_using_v3_governor;
712 static DEFINE_MUTEX(v3_governor_mutex);
714 /* KCH: this will tell us when there is an actual frequency transition */
715 static int v3_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
718 struct cpufreq_freqs *freq = data;
720 if (per_cpu(core_state, freq->cpu).mode != V3_PSTATE_EXTERNAL_CONTROL) {
724 if (val == CPUFREQ_POSTCHANGE) {
725 DEBUG("P-State: frequency change took effect on cpu %u (now %u kHz)\n",
726 freq->cpu, freq->new);
727 per_cpu(core_state, freq->cpu).cur_freq_khz = freq->new;
735 static struct notifier_block v3_cpufreq_notifier_block = {
736 .notifier_call = v3_cpufreq_notifier
741 * This stub governor is simply a placeholder for preventing
742 * frequency changes from the Linux side. For now, we simply leave
743 * the frequency as is when we acquire control.
745 static int governor_run(struct cpufreq_policy *policy, unsigned int event)
747 unsigned cpu = policy->cpu;
750 /* we can't use cpufreq_driver_target here as it can result
751 * in a circular dependency, so we'll keep the current frequency as is
753 case CPUFREQ_GOV_START:
754 BUG_ON(!policy->cur);
756 mutex_lock(&v3_governor_mutex);
758 if (cpus_using_v3_governor == 0) {
759 cpufreq_register_notifier(&v3_cpufreq_notifier_block,
760 CPUFREQ_TRANSITION_NOTIFIER);
763 cpus_using_v3_governor++;
765 per_cpu(core_state, cpu).set_freq_khz = policy->cur;
766 per_cpu(core_state, cpu).cur_freq_khz = policy->cur;
767 per_cpu(core_state, cpu).max_freq_khz = policy->max;
768 per_cpu(core_state, cpu).min_freq_khz = policy->min;
770 mutex_unlock(&v3_governor_mutex);
772 case CPUFREQ_GOV_STOP:
773 mutex_lock(&v3_governor_mutex);
775 cpus_using_v3_governor--;
777 if (cpus_using_v3_governor == 0) {
778 cpufreq_unregister_notifier(
779 &v3_cpufreq_notifier_block,
780 CPUFREQ_TRANSITION_NOTIFIER);
783 per_cpu(core_state, cpu).set_freq_khz = 0;
784 per_cpu(core_state, cpu).cur_freq_khz = 0;
785 per_cpu(core_state, cpu).max_freq_khz = 0;
786 per_cpu(core_state, cpu).min_freq_khz = 0;
788 mutex_unlock(&v3_governor_mutex);
790 case CPUFREQ_GOV_LIMITS:
794 ERROR("Undefined governor command (%u)\n", event);
802 static struct cpufreq_governor stub_governor =
804 .name = PALACIOS_GOVNAME,
805 .governor = governor_run,
806 .owner = THIS_MODULE,
810 static struct workqueue_struct *pstate_wq;
813 struct work_struct work;
819 static inline void pstate_register_linux_governor(void)
821 cpufreq_register_governor(&stub_governor);
825 static inline void pstate_unregister_linux_governor(void)
827 cpufreq_unregister_governor(&stub_governor);
831 static int pstate_linux_init(void)
833 pstate_register_linux_governor();
834 pstate_wq = create_workqueue("v3vee_pstate_wq");
836 ERROR("Could not create work queue\n");
843 pstate_unregister_linux_governor();
848 static void pstate_linux_deinit(void)
850 pstate_unregister_linux_governor();
851 flush_workqueue(pstate_wq);
852 destroy_workqueue(pstate_wq);
856 static int get_current_governor(char **buf, unsigned int cpu)
858 struct cpufreq_policy * policy = palacios_alloc(sizeof(struct cpufreq_policy));
859 char * govname = NULL;
862 ERROR("could not allocate cpufreq_policy\n");
866 if (cpufreq_get_policy(policy, cpu) != 0) {
867 ERROR("Could not get current cpufreq policy\n");
871 /* We're in interrupt context, should probably not wait here */
872 govname = palacios_alloc(MAX_GOV_NAME_LEN);
874 ERROR("Could not allocate space for governor name\n");
878 strncpy(govname, policy->governor->name, MAX_GOV_NAME_LEN);
879 govname[MAX_GOV_NAME_LEN-1] = 0;
881 get_cpu_var(core_state).linux_governor = govname;
882 put_cpu_var(core_state);
886 palacios_free(policy);
891 palacios_free(policy);
896 /* passed to the userspacehelper interface for cleanup */
897 static void gov_switch_cleanup(struct subprocess_info * s)
899 palacios_free(s->argv[2]);
900 palacios_free(s->argv);
906 * @s - the governor to switch to
907 * TODO: this should probably be submitted to a work queue
908 * so we don't have to run it in interrupt context
910 static int governor_switch(char * s, unsigned int cpu)
912 char * path_str = NULL;
915 static char * envp[] = {
918 "PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL };
921 argv = palacios_alloc(4*sizeof(char*));
923 ERROR("Couldn't allocate argv struct\n");
927 path_str = palacios_alloc(MAX_PATH_LEN);
929 ERROR("Couldn't allocate path string\n");
932 memset(path_str, 0, MAX_PATH_LEN);
934 snprintf(path_str, MAX_PATH_LEN, "echo %s > /sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor", s, cpu);
941 /* KCH: we can't wait here to actually see if we succeeded, we're in interrupt context */
943 #if LINUX_VERSION_CODE <= KERNEL_VERSION(3,9,0)
944 return call_usermodehelper_fns("/bin/sh", argv, envp, UMH_NO_WAIT, NULL, gov_switch_cleanup, NULL);
947 struct subprocess_info *sp;
949 sp = call_usermodehelper_setup("/bin/sh", argv, envp, GFP_ATOMIC, NULL, gov_switch_cleanup, NULL);
954 return call_usermodehelper_exec(sp,0);
964 static inline void free_linux_governor(void)
966 palacios_free(get_cpu_var(core_state).linux_governor);
967 put_cpu_var(core_state);
971 static int linux_setup_palacios_governor(void)
974 unsigned int cpu = get_cpu();
977 /* KCH: we assume the v3vee governor is already
978 * registered with kernel by this point
981 if (get_current_governor(&gov, cpu) < 0) {
982 ERROR("Could not get current governor\n");
986 DEBUG("saving current governor (%s)\n", gov);
988 get_cpu_var(core_state).linux_governor = gov;
989 put_cpu_var(core_state);
991 DEBUG("setting the new governor (%s)\n", PALACIOS_GOVNAME);
993 /* set the new one to ours */
995 if (governor_switch(PALACIOS_GOVNAME, cpu) < 0) {
996 ERROR("Could not set governor to (%s)\n", PALACIOS_GOVNAME);
1005 static uint64_t linux_get_pstate(void)
1007 struct cpufreq_policy * policy = NULL;
1008 struct cpufreq_frequency_table *table;
1010 unsigned int count = 0;
1011 unsigned int cpu = get_cpu();
1015 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1017 ERROR("Could not allocate policy struct\n");
1021 cpufreq_get_policy(policy, cpu);
1022 table = cpufreq_frequency_get_table(cpu);
1024 for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
1026 if (table[i].frequency == CPUFREQ_ENTRY_INVALID) {
1030 if (table[i].frequency == policy->cur) {
1037 palacios_free(policy);
1044 static uint64_t linux_get_freq(void)
1047 struct cpufreq_policy * policy = NULL;
1048 unsigned int cpu = get_cpu();
1051 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1053 ERROR("Could not allocate policy struct\n");
1057 if (cpufreq_get_policy(policy, cpu)) {
1058 ERROR("Could not get current policy\n");
1064 palacios_free(policy);
1070 pstate_switch_workfn (struct work_struct *work)
1072 pstate_work_t * pwork = (pstate_work_t*)work;
1073 struct cpufreq_policy * policy = NULL;
1075 unsigned int cpu = get_cpu();
1078 mutex_lock(&v3_governor_mutex);
1080 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1082 ERROR("Could not allocate space for cpufreq policy\n");
1086 if (cpufreq_get_policy(policy, cpu) != 0) {
1087 ERROR("Could not get cpufreq policy\n");
1092 get_cpu_var(core_state).set_freq_khz = freq;
1094 if (freq < get_cpu_var(core_state).min_freq_khz) {
1095 freq = get_cpu_var(core_state).min_freq_khz;
1097 if (freq > get_cpu_var(core_state).max_freq_khz) {
1098 freq = get_cpu_var(core_state).max_freq_khz;
1100 put_cpu_var(core_state);
1102 INFO("P-state: requesting frequency change on core %u to %llu\n", cpu, freq);
1103 __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
1106 palacios_free(policy);
1108 palacios_free(work);
1109 mutex_unlock(&v3_governor_mutex);
1113 static int linux_set_pstate(uint64_t p)
1115 struct cpufreq_policy * policy = NULL;
1116 struct cpufreq_frequency_table *table;
1117 pstate_work_t * work = NULL;
1119 unsigned int count = 0;
1122 unsigned int cpu = get_cpu();
1125 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1127 ERROR("Could not allocate policy struct\n");
1131 work = (pstate_work_t*)palacios_alloc(sizeof(pstate_work_t));
1133 ERROR("Could not allocate work struct\n");
1137 if (cpufreq_get_policy(policy, cpu)) {
1138 ERROR("Could not get current policy\n");
1141 table = cpufreq_frequency_get_table(cpu);
1143 for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
1145 if (table[i].frequency == CPUFREQ_ENTRY_INVALID) {
1151 INIT_WORK((struct work_struct*)work, pstate_switch_workfn);
1152 work->freq = table[i].frequency;
1153 queue_work(pstate_wq, (struct work_struct*)work);
1163 /* we need to deal with the case in which we get a number > max pstate */
1165 INIT_WORK((struct work_struct*)work, pstate_switch_workfn);
1166 work->freq = table[last_valid].frequency;
1167 queue_work(pstate_wq, (struct work_struct*)work);
1170 palacios_free(policy);
1174 palacios_free(work);
1176 palacios_free(policy);
1181 static int linux_set_freq(uint64_t f)
1183 struct cpufreq_policy * policy = NULL;
1184 pstate_work_t * work = NULL;
1186 unsigned int cpu = get_cpu();
1189 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1191 ERROR("Could not allocate policy struct\n");
1195 work = (pstate_work_t*)palacios_alloc(sizeof(pstate_work_t));
1197 ERROR("Could not allocate work struct\n");
1201 if (cpufreq_get_policy(policy, cpu) != 0) {
1202 ERROR("Could not get cpufreq policy\n");
1206 if (f < policy->min) {
1208 } else if (f > policy->max) {
1214 INIT_WORK((struct work_struct*)work, pstate_switch_workfn);
1216 queue_work(pstate_wq, (struct work_struct*)work);
1218 palacios_free(policy);
1222 palacios_free(work);
1224 palacios_free(policy);
1229 static int linux_restore_defaults(void)
1232 unsigned int cpu = get_cpu();
1235 gov = get_cpu_var(core_state).linux_governor;
1236 put_cpu_var(core_state);
1238 DEBUG("restoring previous governor (%s)\n", gov);
1240 if (governor_switch(gov, cpu) < 0) {
1241 ERROR("Could not restore governor to (%s)\n", gov);
1245 free_linux_governor();
1249 free_linux_governor();
1255 /******************************************************************
1256 Generic Interface as provided to Palacios and to the rest of the
1258 ******************************************************************/
1260 static void init_core(void)
1263 struct cpufreq_policy *p;
1266 //DEBUG("P-State Core Init\n");
1268 get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL;
1269 get_cpu_var(core_state).cur_pstate = 0;
1271 if (machine_state.funcs) {
1272 get_cpu_var(core_state).min_pstate = machine_state.funcs->get_min_pstate();
1273 get_cpu_var(core_state).max_pstate = machine_state.funcs->get_max_pstate();
1275 get_cpu_var(core_state).min_pstate = 0;
1276 get_cpu_var(core_state).max_pstate = 0;
1280 cpu = get_cpu(); put_cpu();
1282 p = cpufreq_cpu_get(cpu);
1285 get_cpu_var(core_state).have_cpufreq = 0;
1286 get_cpu_var(core_state).min_freq_khz=0;
1287 get_cpu_var(core_state).max_freq_khz=0;
1288 get_cpu_var(core_state).cur_freq_khz=0;
1290 get_cpu_var(core_state).have_cpufreq = 1;
1291 get_cpu_var(core_state).min_freq_khz=p->min;
1292 get_cpu_var(core_state).max_freq_khz=p->max;
1293 get_cpu_var(core_state).cur_freq_khz=p->cur; } cpufreq_cpu_put(p);
1294 put_cpu_var(core_state);
1297 for (i=0;i<get_cpu_var(processors)->performance->state_count; i++) {
1298 INFO("P-State: %u: freq=%llu ctrl=%llx",
1300 get_cpu_var(processors)->performance->states[i].core_frequency*1000,
1301 get_cpu_var(processors)->performance->states[i].control);
1303 put_cpu_var(processors);
1308 void palacios_pstate_ctrl_release(void);
1311 static void deinit_core(void)
1313 DEBUG("P-State Core Deinit\n");
1314 palacios_pstate_ctrl_release();
1320 void palacios_pstate_ctrl_get_chars(struct v3_cpu_pstate_chars *c)
1322 memset(c,0,sizeof(struct v3_cpu_pstate_chars));
1325 c->features = V3_PSTATE_INTERNAL_CONTROL;
1327 if (get_cpu_var(core_state).have_cpufreq) {
1328 c->features |= V3_PSTATE_EXTERNAL_CONTROL;
1331 if (machine_state.arch==AMD || machine_state.arch==INTEL) {
1332 c->features |= V3_PSTATE_DIRECT_CONTROL;
1334 c->cur_mode = get_cpu_var(core_state).mode;
1335 c->min_pstate = get_cpu_var(core_state).min_pstate;
1336 c->max_pstate = get_cpu_var(core_state).max_pstate;
1337 c->cur_pstate = get_cpu_var(core_state).cur_pstate;
1338 c->min_freq_khz = get_cpu_var(core_state).min_freq_khz;
1339 c->max_freq_khz = get_cpu_var(core_state).max_freq_khz;
1340 c->cur_freq_khz = get_cpu_var(core_state).cur_freq_khz;
1342 put_cpu_var(core_state);
1349 uint64_t palacios_pstate_ctrl_get_pstate(void)
1351 if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) {
1352 put_cpu_var(core_state);
1353 return machine_state.funcs->get_pstate();
1354 } else if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1355 put_cpu_var(core_state);
1356 return linux_get_pstate();
1358 put_cpu_var(core_state);
1364 void palacios_pstate_ctrl_set_pstate(uint64_t p)
1366 if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) {
1367 put_cpu_var(core_state);
1368 machine_state.funcs->set_pstate(p);
1369 } else if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1370 put_cpu_var(core_state);
1371 linux_set_pstate(p);
1373 put_cpu_var(core_state);
1378 void palacios_pstate_ctrl_set_pstate_wrapper(void *p)
1380 palacios_pstate_ctrl_set_pstate((uint8_t)(uint64_t)p);
1384 uint64_t palacios_pstate_ctrl_get_freq(void)
1386 if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1387 put_cpu_var(core_state);
1388 return linux_get_freq();
1390 put_cpu_var(core_state);
1396 void palacios_pstate_ctrl_set_freq(uint64_t p)
1398 if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1399 put_cpu_var(core_state);
1402 put_cpu_var(core_state);
1407 static int switch_to_external(void)
1409 DEBUG("switch from host control to external\n");
1411 if (!(get_cpu_var(core_state).have_cpufreq)) {
1412 put_cpu_var(core_state);
1413 ERROR("No cpufreq - cannot switch to external...\n");
1416 put_cpu_var(core_state);
1418 linux_setup_palacios_governor();
1420 get_cpu_var(core_state).mode=V3_PSTATE_EXTERNAL_CONTROL;
1421 put_cpu_var(core_state);
1427 static int switch_to_direct(void)
1429 DEBUG("switch from host control to direct\n");
1431 if (get_cpu_var(core_state).have_cpufreq) {
1432 put_cpu_var(core_state);
1433 DEBUG("switch to direct from cpufreq\n");
1435 // The implementation would set the policy and governor to peg cpu
1436 // regardless of load
1437 linux_setup_palacios_governor();
1439 put_cpu_var(core_state);
1442 if (machine_state.funcs && machine_state.funcs->arch_init) {
1443 get_cpu_var(core_state).mode=V3_PSTATE_DIRECT_CONTROL;
1445 machine_state.funcs->arch_init();
1447 put_cpu_var(core_state);
1454 static int switch_to_internal(void)
1456 DEBUG("switch from host control to internal\n");
1458 if (get_cpu_var(core_state).have_cpufreq) {
1459 put_cpu_var(core_state);
1460 DEBUG("switch to internal on machine with cpu freq\n");
1461 linux_setup_palacios_governor();
1463 put_cpu_var(core_state);
1466 get_cpu_var(core_state).mode=V3_PSTATE_INTERNAL_CONTROL;
1468 put_cpu_var(core_state);
1474 static int switch_from_external(void)
1476 if (!(get_cpu_var(core_state).have_cpufreq)) {
1477 put_cpu_var(core_state);
1478 ERROR("No cpufreq - how did we get here... external...\n");
1481 put_cpu_var(core_state);
1483 DEBUG("Switching back to host control from external\n");
1485 if (get_cpu_var(core_state).have_cpufreq) {
1486 put_cpu_var(core_state);
1487 linux_restore_defaults();
1489 put_cpu_var(core_state);
1492 get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL;
1493 put_cpu_var(core_state);
1499 static int switch_from_direct(void)
1502 DEBUG("Switching back to host control from direct\n");
1504 // Set maximum performance, just in case there is no host control
1505 machine_state.funcs->set_pstate(get_cpu_var(core_state).min_pstate);
1506 machine_state.funcs->arch_deinit();
1508 if (get_cpu_var(core_state).have_cpufreq) {
1509 put_cpu_var(core_state);
1510 linux_restore_defaults();
1512 put_cpu_var(core_state);
1515 get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL;
1517 put_cpu_var(core_state);
1523 static int switch_from_internal(void)
1525 DEBUG("Switching back to host control from internal\n");
1527 if (get_cpu_var(core_state).have_cpufreq) {
1528 put_cpu_var(core_state);
1529 linux_restore_defaults();
1531 put_cpu_var(core_state);
1534 get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL;
1536 put_cpu_var(core_state);
1543 void palacios_pstate_ctrl_acquire(uint32_t type)
1545 if (get_cpu_var(core_state).mode != V3_PSTATE_HOST_CONTROL) {
1546 put_cpu_var(core_state);
1547 palacios_pstate_ctrl_release();
1549 put_cpu_var(core_state);
1553 case V3_PSTATE_EXTERNAL_CONTROL:
1554 switch_to_external();
1556 case V3_PSTATE_DIRECT_CONTROL:
1559 case V3_PSTATE_INTERNAL_CONTROL:
1560 switch_to_internal();
1563 ERROR("Unknown pstate control type %u\n",type);
1569 // Wrappers for xcalls
1570 static void palacios_pstate_ctrl_acquire_external(void)
1572 palacios_pstate_ctrl_acquire(V3_PSTATE_EXTERNAL_CONTROL);
1575 static void palacios_pstate_ctrl_acquire_direct(void)
1577 palacios_pstate_ctrl_acquire(V3_PSTATE_DIRECT_CONTROL);
1581 void palacios_pstate_ctrl_release(void)
1583 if (get_cpu_var(core_state).mode == V3_PSTATE_HOST_CONTROL) {
1584 put_cpu_var(core_state);
1587 put_cpu_var(core_state);
1589 switch (get_cpu_var(core_state).mode) {
1590 case V3_PSTATE_EXTERNAL_CONTROL:
1591 put_cpu_var(core_state);
1592 switch_from_external();
1594 case V3_PSTATE_DIRECT_CONTROL:
1595 put_cpu_var(core_state);
1596 switch_from_direct();
1598 case V3_PSTATE_INTERNAL_CONTROL:
1599 put_cpu_var(core_state);
1600 switch_from_internal();
1603 put_cpu_var(core_state);
1604 ERROR("Unknown pstate control type %u\n",core_state.mode);
1610 static void update_hw_pstate(void *arg)
1612 if (machine_state.funcs && machine_state.funcs->get_pstate) {
1613 get_cpu_var(core_state).cur_hw_pstate = machine_state.funcs->get_pstate();
1614 put_cpu_var(core_state);
1616 get_cpu_var(core_state).cur_hw_pstate = 0;
1617 put_cpu_var(core_state);
1622 /***************************************************************************
1623 PROC Interface to expose state
1624 ***************************************************************************/
1626 static int pstate_show(struct seq_file * file, void * v)
1629 unsigned int numcpus = num_online_cpus();
1631 seq_printf(file, "V3VEE DVFS Status\n\n");
1633 for (cpu=0;cpu<numcpus;cpu++) {
1634 palacios_xcall(cpu,update_hw_pstate,0);
1637 for (cpu=0;cpu<numcpus;cpu++) {
1638 struct pstate_core_info *s = &per_cpu(core_state,cpu);
1639 seq_printf(file,"pcore %u: hw pstate 0x%llx mode %s ",cpu,
1641 s->mode==V3_PSTATE_HOST_CONTROL ? "host" :
1642 s->mode==V3_PSTATE_EXTERNAL_CONTROL ? "external" :
1643 s->mode==V3_PSTATE_DIRECT_CONTROL ? "direct" :
1644 s->mode==V3_PSTATE_INTERNAL_CONTROL ? "internal" : "UNKNOWN");
1645 if (s->mode==V3_PSTATE_EXTERNAL_CONTROL) {
1646 seq_printf(file,"(min=%llu max=%llu cur=%llu) ", s->min_freq_khz, s->max_freq_khz, s->cur_freq_khz);
1648 if (s->mode==V3_PSTATE_DIRECT_CONTROL) {
1649 seq_printf(file,"(min=%llu max=%llu cur=%llu) ",s->min_pstate, s->max_pstate, s->cur_pstate);
1651 seq_printf(file,"\n");
1656 static int pstate_open(struct inode * inode, struct file * file)
1658 return single_open(file, pstate_show, NULL);
1662 static struct file_operations pstate_fops = {
1663 .owner = THIS_MODULE,
1664 .open = pstate_open,
1666 .llseek = seq_lseek,
1667 .release = seq_release
1670 static int pstate_hw_show(struct seq_file * file, void * v)
1674 seq_printf(file, "V3VEE DVFS Hardware Info\n(all logical cores assumed identical)\n\n");
1676 seq_printf(file, "Arch: \t%s\n"
1678 machine_state.arch==INTEL ? "Intel" :
1679 machine_state.arch==AMD ? "AMD" : "Other",
1680 machine_state.supports_pstates ? "Yes" : "No");
1683 #define YN(x) ((x) ? "Y" : "N")
1685 if (machine_state.arch==INTEL) {
1686 seq_printf(file,"SpeedStep: \t%s\n",YN(machine_state.have_speedstep));
1687 seq_printf(file,"APERF/MPERF: \t%s\n",YN(machine_state.have_pstate_hw_coord));
1688 seq_printf(file,"IDA or TurboCore: \t%s\n",YN(machine_state.have_opportunistic));
1689 seq_printf(file,"Policy Hint: \t%s\n",YN(machine_state.have_policy_hint));
1690 seq_printf(file,"Hardware Policy: \t%s\n",YN(machine_state.have_hwp));
1691 seq_printf(file,"Hardware Duty Cycle: \t%s\n",YN(machine_state.have_hdc));
1692 seq_printf(file,"MWAIT extensions: \t%s\n",YN(machine_state.have_mwait_ext));
1693 seq_printf(file,"MWAIT wake on intr: \t%s\n",YN(machine_state.have_mwait_int));
1696 if (machine_state.arch==AMD) {
1697 seq_printf(file,"PState: \t%s\n",YN(machine_state.have_pstate));
1698 seq_printf(file,"APERF/MPERF: \t%s\n",YN(machine_state.have_pstate_hw_coord));
1699 seq_printf(file,"CoreBoost: \t%s\n",YN(machine_state.have_coreboost));
1700 seq_printf(file,"Feedback: \t%s\n",YN(machine_state.have_feedback));
1704 seq_printf(file,"\nPstate\tCtrl\tKHz\tmW\tuS(X)\tuS(B)\n");
1705 numstates = get_cpu_var(processors)->performance->state_count;
1707 seq_printf(file,"UNKNOWN\n");
1710 for (i=0;i<numstates;i++) {
1712 "%u\t%llx\t%llu\t%llu\t%llu\t%llu\n",
1714 get_cpu_var(processors)->performance->states[i].control,
1715 get_cpu_var(processors)->performance->states[i].core_frequency*1000,
1716 get_cpu_var(processors)->performance->states[i].power,
1717 get_cpu_var(processors)->performance->states[i].transition_latency,
1718 get_cpu_var(processors)->performance->states[i].bus_master_latency);
1721 put_cpu_var(processors);
1723 seq_printf(file,"\nAvailable Modes:");
1724 seq_printf(file," host");
1725 if (get_cpu_var(core_state).have_cpufreq) {
1726 seq_printf(file," external");
1728 put_cpu_var(core_state);
1729 if (machine_state.supports_pstates) {
1730 seq_printf(file," direct");
1732 seq_printf(file," internal\n");
1737 static int pstate_hw_open(struct inode * inode, struct file * file)
1739 return single_open(file, pstate_hw_show, NULL);
1743 static struct file_operations pstate_hw_fops = {
1744 .owner = THIS_MODULE,
1745 .open = pstate_hw_open,
1747 .llseek = seq_lseek,
1748 .release = seq_release
1752 int pstate_proc_setup(void)
1754 struct proc_dir_entry *proc;
1755 struct proc_dir_entry *prochw;
1757 PAL_PROC_CREATE(proc,"v3-dvfs",0444,palacios_get_procdir(),&pstate_fops);
1760 ERROR("Failed to create proc entry for p-state control\n");
1764 INFO("/proc/v3vee/v3-dvfs successfully created\n");
1766 PAL_PROC_CREATE(prochw,"v3-dvfs-hw",0444,palacios_get_procdir(),&pstate_hw_fops);
1769 ERROR("Failed to create proc entry for p-state hw info\n");
1773 INFO("/proc/v3vee/v3-dvfs-hw successfully created\n");
1778 void pstate_proc_teardown(void)
1780 remove_proc_entry("v3-dvfs-hw",palacios_get_procdir());
1781 remove_proc_entry("v3-dvfs",palacios_get_procdir());
1784 /********************************************************************
1785 User interface (ioctls)
1786 ********************************************************************/
1788 static int dvfs_ctrl(unsigned int cmd, unsigned long arg)
1790 struct v3_dvfs_ctrl_request r;
1792 if (copy_from_user(&r,(void __user*)arg,sizeof(struct v3_dvfs_ctrl_request))) {
1793 ERROR("Failed to copy DVFS request from user\n");
1797 if (r.pcore >= num_online_cpus()) {
1798 ERROR("Cannot apply DVFS request to pcore %u\n",r.pcore);
1803 case V3_DVFS_ACQUIRE: {
1804 switch (r.acq_type) {
1805 case V3_DVFS_EXTERNAL:
1806 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_external, NULL);
1809 case V3_DVFS_DIRECT:
1810 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_direct, NULL);
1814 ERROR("Unknown DVFS acquire type %u\n",r.acq_type);
1819 case V3_DVFS_RELEASE: {
1820 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_release, NULL);
1824 case V3_DVFS_SETFREQ: {
1825 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_freq,(void*)r.freq_khz);
1829 case V3_DVFS_SETPSTATE: {
1830 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_pstate_wrapper,(void*)(uint64_t)r.pstate);
1834 ERROR("Unknown DVFS command %u\n",r.cmd);
1842 void pstate_user_setup(void)
1844 add_global_ctrl(V3_DVFS_CTRL, dvfs_ctrl);
1848 void pstate_user_teardown(void)
1850 remove_global_ctrl(V3_DVFS_CTRL);
1853 static struct v3_host_pstate_ctrl_iface hooks = {
1854 .get_chars = palacios_pstate_ctrl_get_chars,
1855 .acquire = palacios_pstate_ctrl_acquire,
1856 .release = palacios_pstate_ctrl_release,
1857 .set_pstate = palacios_pstate_ctrl_set_pstate,
1858 .get_pstate = palacios_pstate_ctrl_get_pstate,
1859 .set_freq = palacios_pstate_ctrl_set_freq,
1860 .get_freq = palacios_pstate_ctrl_get_freq,
1865 static int pstate_ctrl_init(void)
1868 unsigned int numcpus = num_online_cpus();
1870 pstate_arch_setup();
1872 for (cpu=0;cpu<numcpus;cpu++) {
1873 palacios_xcall(cpu,(void ((*)(void*)))init_core,0);
1876 V3_Init_Pstate_Ctrl(&hooks);
1878 if (pstate_proc_setup()) {
1879 ERROR("Unable to initialize P-State Control\n");
1883 pstate_user_setup();
1885 pstate_linux_init();
1887 INFO("P-State Control Initialized\n");
1892 static int pstate_ctrl_deinit(void)
1895 unsigned int numcpus=num_online_cpus();
1897 pstate_linux_deinit();
1899 pstate_user_teardown();
1901 pstate_proc_teardown();
1903 // release pstate control if we have it, and we need to do this on each processor
1904 for (cpu=0;cpu<numcpus;cpu++) {
1905 palacios_xcall(cpu,(void (*)(void *))deinit_core,0);
1909 // Free any mapping table we built for Intel
1910 if (intel_pstate_to_ctrl && intel_pstate_to_ctrl != intel_pstate_to_ctrl_internal) {
1911 palacios_free(intel_pstate_to_ctrl);
1919 static struct linux_ext pstate_ext = {
1920 .name = "PSTATE_CTRL",
1921 .init = pstate_ctrl_init,
1922 .deinit = pstate_ctrl_deinit,
1924 .guest_deinit = NULL,
1928 register_extension(&pstate_ext);