2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2014, the V3VEE Project <http://www.v3vee.org>
11 * all rights reserved.
13 * Author: Kyle C. Hale <kh@u.northwestern.edu>
14 * Shiva Rao <shiva.rao.717@gmail.com>
15 * Peter Dinda <pdinda@northwestern.edu>
17 * This is free software. you are permitted to use,
18 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
21 #include <linux/uaccess.h>
22 #include <linux/seq_file.h>
23 #include <linux/proc_fs.h>
24 #include <linux/cpufreq.h>
25 #include <linux/kernel.h>
26 #include <linux/kmod.h>
27 #include <linux/module.h>
28 #include <linux/string.h>
29 #include <linux/interrupt.h>
30 #include <asm/processor.h>
32 #include <asm/msr-index.h>
34 // Used to determine the appropriate pstates values on Intel
35 #include <linux/acpi.h>
36 #include <acpi/processor.h>
38 #include <interfaces/vmm_pstate_ctrl.h>
41 #include "iface-pstate-ctrl.h"
43 #include "linux-exts.h"
46 This P-STATE control implementation includes the following modes.
47 You can switch between modes at any time.
49 - Internal control of processor states in Palacios (handoff from Linux)
50 When Palacios acuires this control, this module disables Linux cpufreq control
51 and allows code within Palacios unfettered access to the DVFS hardware.
52 - Direct control of Intel and AMD processor pstates using code in this module
53 When you acquire this control, this module disables Linux cpufreq control
54 and directly programs the processor itself in response to your requests
55 - External control of processor states via Linux
56 When you acuire this control, this module uses the Linux cpufreq control
57 to program the processor on your behelf
58 - Host control of processor stastes
59 This is the normal mode of DVFS control (e.g., Linux cpufreq)
61 Additionally, it provides a user-space interface for manipulating
62 p-state regardless of the host's functionality. This includes
63 an ioctl for commanding the implementation and a /proc file for
64 showing current status and capabilities. From user space, you can
65 use the Direct, External, and Host modes.
67 What we mean by "p-state" here is the processor's internal
68 configuration. For AMD, this is defined as being the same as
69 the ACPI-defined p-state. For Intel, it is not. There, it is the
70 contents of the perf ctl MSR, which is opaque. We try hard to
71 provide "p-states" that go from 0...max, by analogy or equivalence
77 #define PALACIOS_GOVNAME "v3vee"
78 #define MAX_PATH_LEN 128
79 #define MAX_GOV_NAME_LEN 16
82 struct pstate_core_info {
83 // Here we have the notion of host control
84 #define V3_PSTATE_HOST_CONTROL 0
85 // and all the modes from the Palacios interface:
86 // V3_PSTATE_EXTERNAL_CONTROL
87 // V3_PSTATE_DIRECT_CONTROL
88 // V3_PSTATE_INTERNAL_CONTROL
91 // Apply if we are under the DIRECT state
96 uint64_t cur_hw_pstate;
98 // Apply if we are under the EXTERNAL state
99 uint64_t set_freq_khz; // this is the frequency we're hoping to get
100 uint64_t cur_freq_khz;
101 uint64_t max_freq_khz;
102 uint64_t min_freq_khz;
105 uint8_t prior_speedstep;
106 uint8_t turbo_disabled;
111 // This is where we stash Linux's governor when we make a mode switch
112 char * linux_governor;
113 // We have this so we can restore the original frequency when we started
114 uint64_t original_hz;
119 static DEFINE_PER_CPU(struct pstate_core_info, core_state);
123 // These are used to assert DIRECT control over the core pstates
124 struct pstate_core_funcs {
125 void (*arch_init)(void);
126 void (*arch_deinit)(void);
127 uint64_t (*get_min_pstate)(void);
128 uint64_t (*get_max_pstate)(void);
129 uint64_t (*get_pstate)(void);
130 void (*set_pstate)(uint64_t pstate);
133 struct pstate_machine_info {
134 enum {INTEL, AMD, OTHER } arch;
135 int supports_pstates;
145 int have_opportunistic; // this means "Turbo Boost" or "IDA"
146 int have_policy_hint;
147 int have_hwp; // hardware-controlled performance states
148 int have_hdc; // hardware duty cycling
149 int have_mwait_ext; // mwait power extensions
150 int have_mwait_int; // mwait wakes on interrupt
153 int have_pstate_hw_coord; // mperf/aperf
155 // used for DIRECT control
156 struct pstate_core_funcs *funcs;
160 static struct pstate_machine_info machine_state;
163 /****************************************************
165 ***************************************************/
167 /* AMD Programmer's Manual Vol 2 (Rev 3, 2013), Sec. 17.1, pp.557 */
168 #define MSR_PSTATE_LIMIT_REG_AMD 0xc0010061
169 #define MSR_PSTATE_CTL_REG_AMD 0xc0010062
170 #define MSR_PSTATE_STAT_REG_AMD 0xc0010063
172 struct p_state_limit_reg_amd {
176 uint8_t pstate_limit : 4; /* lowest P-state value (highest perf.) supported currently (this can change at runtime) */
177 uint8_t pstate_max : 4; /* highest P-state value supported (lowest perf) */
180 } __attribute__((packed));
181 } __attribute__((packed));
184 struct p_state_stat_reg_amd {
191 } __attribute__((packed));
192 } __attribute__((packed));
195 struct p_state_ctl_reg_amd {
202 } __attribute__((packed));
203 } __attribute__((packed));
206 /* CPUID Fn8000_0007_EDX[HwPstate(7)] = 1 */
207 static uint8_t supports_pstates_amd (void)
213 uint32_t eax, ebx, ecx, edx;
215 cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
216 machine_state.have_pstate = !!(edx & (1 << 7));
217 machine_state.have_coreboost = !!(edx & (1<<9));
218 machine_state.have_feedback = !!(edx & (1<<11));
220 cpuid(0x6, &eax, &ebx, &ecx, &edx);
221 machine_state.have_pstate_hw_coord = !!(ecx & 1);
223 INFO("P-State: AMD: Pstates=%d Coreboost=%d Feedback=%d PstateHWCoord=%d\n",
224 machine_state.have_pstate,
225 machine_state.have_coreboost,
226 machine_state.have_feedback,
227 machine_state.have_pstate_hw_coord);
229 amd_num_pstates = get_cpu_var(processors)->performance->state_count;
230 if (amd_num_pstates) {
231 for (i=0;i<amd_num_pstates;i++) {
232 INFO("P-State: %u: freq=%llu ctrl=%llx%s\n",
234 get_cpu_var(processors)->performance->states[i].core_frequency*1000,
235 get_cpu_var(processors)->performance->states[i].control,
236 get_cpu_var(processors)->performance->states[i].control != i ? (mapwrong=1, " ALERT - CTRL MAPPING NOT 1:1") : "");
240 ERROR("P-State: AMD: mapping of pstate and control is not 1:1 on this processor - we will probably not work corrrectly\n");
243 return machine_state.have_pstate;
249 static void init_arch_amd(void)
251 /* KCH: nothing to do here */
255 static void deinit_arch_amd(void)
257 /* KCH: nothing to do here */
261 static uint64_t get_pstate_amd(void)
263 struct p_state_stat_reg_amd pstat;
265 rdmsrl(MSR_PSTATE_STAT_REG_AMD, pstat.val);
267 get_cpu_var(core_state).cur_pstate=pstat.reg.pstate;
268 put_cpu_var(core_state);
270 return pstat.reg.pstate;
274 static void set_pstate_amd(uint64_t p)
276 struct p_state_ctl_reg_amd pctl;
278 if (p>get_cpu_var(core_state).max_pstate) {
279 p=get_cpu_var(core_state).max_pstate;
281 put_cpu_var(core_state);
286 wrmsrl(MSR_PSTATE_CTL_REG_AMD, pctl.val);
288 get_cpu_var(core_state).cur_pstate=p;
289 put_cpu_var(core_state);
294 * NOTE: HW may change this value at runtime
296 static uint64_t get_max_pstate_amd(void)
298 struct p_state_limit_reg_amd plimits;
300 rdmsrl(MSR_PSTATE_LIMIT_REG_AMD, plimits.val);
302 return plimits.reg.pstate_max;
306 static uint64_t get_min_pstate_amd(void)
308 struct p_state_limit_reg_amd plimits;
310 rdmsrl(MSR_PSTATE_LIMIT_REG_AMD, plimits.val);
312 return plimits.reg.pstate_limit;
316 static struct pstate_core_funcs amd_funcs =
318 .arch_init = init_arch_amd,
319 .arch_deinit = deinit_arch_amd,
320 .get_pstate = get_pstate_amd,
321 .set_pstate = set_pstate_amd,
322 .get_max_pstate = get_max_pstate_amd,
323 .get_min_pstate = get_min_pstate_amd,
328 /***********************************************************
330 **********************************************************/
334 This implementation uses SpeedStep, but does check
335 to see if the other features (MPERF/APERF, Turbo/IDA, HWP)
339 /* Intel System Programmer's Manual Vol. 3B, 14-2 */
340 #define MSR_MPERF_IA32 0x000000e7
341 #define MSR_APERF_IA32 0x000000e8
342 #define MSR_MISC_ENABLE_IA32 0x000001a0
343 #define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
344 #define MSR_PLATFORM_INFO_IA32 0x000000ce
345 #define MSR_PERF_CTL_IA32 0x00000199
346 #define MSR_PERF_STAT_IA32 0x00000198
347 #define MSR_ENERY_PERF_BIAS_IA32 0x000001b0
350 /* Note that the actual meaning of the pstate
351 in the control and status registers is actually
352 implementation dependent, unlike AMD. The "official"
353 way to figure it out the mapping from pstate to
354 these values is via ACPI. What is written in the register
355 is an "id" of an operation point
357 "Often", the 16 bit field consists of a high order byte
358 which is the frequency (the multiplier) and the low order
361 // MSR_PERF_CTL_IA32 r/w
362 struct perf_ctl_reg_intel {
366 // This is the target
367 // Note, not the ACPI pstate, but
368 // Intel's notion of pstate is that it's opaque
369 // for lots of implementations it seems to be
370 // frequency_id : voltage_id
371 // where frequency_id is typically the multiplier
372 uint16_t pstate : 16;
373 uint16_t reserved : 16;
374 // set to 1 to *disengage* dynamic acceleration
375 // Note that "IDA" and "Turbo" use the same interface
376 uint16_t dynamic_accel_disable : 1;
377 uint32_t reserved2 : 31;
379 } __attribute__((packed));
380 } __attribute__((packed));
382 // MSR_PERF_STAT_IA32 r
383 struct perf_stat_reg_intel {
387 // this is the current
388 uint16_t pstate : 16;
389 uint64_t reserved : 48;
391 } __attribute__((packed));
392 } __attribute__((packed));
394 // MSR_ENERGY_PERF_BIAS_IA32 r/w
395 struct enery_perf_bias_reg_intel {
399 // this is the current
400 uint8_t policy_hint : 4;
401 uint64_t reserved : 60;
403 } __attribute__((packed));
404 } __attribute__((packed));
407 struct turbo_mode_info_reg_intel {
412 uint8_t max_noturbo_ratio : 8;
414 uint8_t ppin_cap : 1;
416 uint8_t ratio_limit : 1;
417 uint8_t tdc_tdp_limit : 1;
419 uint8_t min_ratio : 8;
422 } __attribute__((packed));
423 } __attribute__((packed));
425 // This replicates the critical information in Linux's struct acpi_processor_px
426 // To make it easier to port to other OSes.
427 struct intel_pstate_info {
428 uint64_t freq; // KHz
429 uint64_t ctrl; // What to write into the _CTL MSR to get this
432 // The internal array will be used if we cannot build the table locally
433 static struct intel_pstate_info *intel_pstate_to_ctrl_internal=0;
434 static int intel_num_pstates_internal=0;
436 // These will either point to the internal array or to a constructed array
437 static struct intel_pstate_info *intel_pstate_to_ctrl=0;
438 static int intel_num_pstates=0;
441 /* CPUID.01:ECX.AES(7) */
442 static uint8_t supports_pstates_intel(void)
444 /* NOTE: CPUID.06H:ECX.SETBH[bit 3] is set and it also implies the presence of a new architectural MSR called IA32_ENERGY_PERF_BIAS (1B0H).
446 uint32_t eax, ebx, ecx, edx;
448 cpuid(0x1, &eax, &ebx, &ecx, &edx);
449 machine_state.have_speedstep = !!(ecx & (1 << 7));
451 cpuid(0x6, &eax, &ebx, &ecx, &edx);
452 machine_state.have_pstate_hw_coord = !!(ecx & 1); // ?
453 machine_state.have_opportunistic = !!(eax & 1<<1);
454 machine_state.have_policy_hint = !!(ecx & 1<<3);
455 machine_state.have_hwp = !!(eax & 1<<7);
456 machine_state.have_hdc = !!(eax & 1<<13);
458 cpuid(0x5, &eax, &ebx, &ecx, &edx);
459 machine_state.have_mwait_ext = !!(ecx & 1);
460 machine_state.have_mwait_int = !!(ecx & 1<<1);
463 // Note we test all the available hardware features documented as of August 2014
464 // We are only currently using speed_step, however.
466 INFO("P-State: Intel: Speedstep=%d, PstateHWCoord=%d, Opportunistic=%d PolicyHint=%d HWP=%d HDC=%d, MwaitExt=%d MwaitInt=%d \n",
467 machine_state.have_speedstep,
468 machine_state.have_pstate_hw_coord,
469 machine_state.have_opportunistic,
470 machine_state.have_policy_hint,
471 machine_state.have_hwp,
472 machine_state.have_hdc,
473 machine_state.have_mwait_ext,
474 machine_state.have_mwait_int );
477 if (machine_state.have_speedstep) {
479 // Build mapping table (from "pstate" (0..) to ctrl value for MSR
480 if (!(get_cpu_var(processors)) || !(get_cpu_var(processors)->performance) ) {
481 put_cpu_var(processors);
482 // no acpi... revert to internal table
483 intel_pstate_to_ctrl=intel_pstate_to_ctrl_internal;
484 intel_num_pstates=intel_num_pstates_internal;
486 intel_num_pstates = get_cpu_var(processors)->performance->state_count;
487 if (intel_num_pstates) {
488 intel_pstate_to_ctrl = palacios_alloc(sizeof(struct intel_pstate_info)*intel_num_pstates);
489 if (!intel_pstate_to_ctrl) {
490 ERROR("P-State: Cannot allocate space for mapping...\n");
493 for (i=0;i<intel_num_pstates;i++) {
494 intel_pstate_to_ctrl[i].freq = get_cpu_var(processors)->performance->states[i].core_frequency*1000;
495 intel_pstate_to_ctrl[i].ctrl = get_cpu_var(processors)->performance->states[i].control;
499 ERROR("P-State: Strange, machine has ACPI DVFS but no states...\n");
502 put_cpu_var(processors);
503 INFO("P-State: Intel - State Mapping (%u states) follows\n",intel_num_pstates);
504 for (i=0;i<intel_num_pstates;i++) {
505 INFO("P-State: Intel Mapping %u: freq=%llu ctrl=%llx\n",
506 i, intel_pstate_to_ctrl[i].freq,intel_pstate_to_ctrl[i].ctrl);
509 INFO("P-State: Intel: No speedstep here\n");
513 return machine_state.have_speedstep;
517 static void init_arch_intel(void)
521 rdmsrl(MSR_MISC_ENABLE_IA32, val);
523 //INFO("P-State: prior ENABLE=%llx\n",val);
525 // store prior speedstep setting
526 get_cpu_var(core_state).prior_speedstep=(val >> 16) & 0x1;
527 put_cpu_var(core_state);
529 // enable speedstep (probably already on)
531 wrmsrl(MSR_MISC_ENABLE_IA32, val);
533 //INFO("P-State: write ENABLE=%llx\n",val);
537 static void deinit_arch_intel(void)
541 rdmsrl(MSR_MISC_ENABLE_IA32, val);
543 //INFO("P-State: deinit: ENABLE=%llx\n",val);
545 val &= ~(1ULL << 16);
546 val |= get_cpu_var(core_state).prior_speedstep << 16;
547 put_cpu_var(core_state);
549 wrmsrl(MSR_MISC_ENABLE_IA32, val);
551 //INFO("P-state: deinit ENABLE=%llx\n",val);
555 /* TODO: Intel P-states require sampling at intervals... */
556 static uint64_t get_pstate_intel(void)
560 rdmsrl(MSR_PERF_STAT_IA32,val);
562 //INFO("P-State: Get: 0x%llx\n", val);
564 // should check if turbo is active, in which case
565 // this value is not the whole story
570 static void set_pstate_intel(uint64_t p)
575 if (intel_num_pstates==0) {
578 if (p>=intel_num_pstates) {
579 p=intel_num_pstates-1;
583 ctrl=intel_pstate_to_ctrl[p].ctrl;
585 /* ...Intel IDA (dynamic acceleration)
586 if (c->no_turbo && !c->turbo_disabled) {
590 // leave all bits along expect for the likely
593 rdmsrl(MSR_PERF_CTL_IA32, val);
594 //INFO("P-State: Pre-Set: 0x%llx\n", val);
597 val |= ctrl & 0xffffULL;
599 //INFO("P-State: Set: 0x%llx\n", val);
601 wrmsrl(MSR_PERF_CTL_IA32, val);
603 get_cpu_var(core_state).cur_pstate = p;
604 put_cpu_var(core_state);
608 static uint64_t get_min_pstate_intel(void)
615 static uint64_t get_max_pstate_intel (void)
617 if (intel_num_pstates==0) {
620 return intel_num_pstates-1;
624 static struct pstate_core_funcs intel_funcs =
626 .arch_init = init_arch_intel,
627 .arch_deinit = deinit_arch_intel,
628 .get_pstate = get_pstate_intel,
629 .set_pstate = set_pstate_intel,
630 .get_max_pstate = get_max_pstate_intel,
631 .get_min_pstate = get_min_pstate_intel,
636 /***********************************************
637 Arch determination and setup
638 ***********************************************/
640 static inline void cpuid_string (uint32_t id, uint32_t dest[4])
643 :"=a"(*dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3))
648 static int get_cpu_vendor (char name[13])
653 cpuid_string(0,dest);
655 ((uint32_t*)name)[0]=dest[1];
656 ((uint32_t*)name)[1]=dest[3];
657 ((uint32_t*)name)[2]=dest[2];
664 static int is_intel (void)
667 get_cpu_vendor(name);
668 return !strcmp(name,"GenuineIntel");
672 static int is_amd (void)
675 get_cpu_vendor(name);
676 return !strcmp(name,"AuthenticAMD");
679 static int pstate_arch_setup(void)
683 machine_state.arch = AMD;
684 machine_state.funcs = &amd_funcs;
685 machine_state.supports_pstates = supports_pstates_amd();
686 INFO("PSTATE: P-State initialized for AMD\n");
687 } else if (is_intel()) {
688 machine_state.arch = INTEL;
689 machine_state.funcs = &intel_funcs;
690 machine_state.supports_pstates = supports_pstates_intel();
691 INFO("PSTATE: P-State initialized for INTEL (Work in progress...)\n");
695 machine_state.arch = OTHER;
696 machine_state.funcs = NULL;
697 machine_state.supports_pstates = 0;
698 INFO("PSTATE: P-state control: No support for direct control on this architecture\n");
707 /******************************************************************
709 *****************************************************************/
711 static unsigned cpus_using_v3_governor;
712 static DEFINE_MUTEX(v3_governor_mutex);
714 /* KCH: this will tell us when there is an actual frequency transition */
715 static int v3_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
718 struct cpufreq_freqs *freq = data;
720 if (per_cpu(core_state, freq->cpu).mode != V3_PSTATE_EXTERNAL_CONTROL) {
724 if (val == CPUFREQ_POSTCHANGE) {
725 DEBUG("P-State: frequency change took effect on cpu %u (now %u kHz)\n",
726 freq->cpu, freq->new);
727 per_cpu(core_state, freq->cpu).cur_freq_khz = freq->new;
735 static struct notifier_block v3_cpufreq_notifier_block = {
736 .notifier_call = v3_cpufreq_notifier
741 * This stub governor is simply a placeholder for preventing
742 * frequency changes from the Linux side. For now, we simply leave
743 * the frequency as is when we acquire control.
745 static int governor_run(struct cpufreq_policy *policy, unsigned int event)
747 unsigned cpu = policy->cpu;
750 /* we can't use cpufreq_driver_target here as it can result
751 * in a circular dependency, so we'll keep the current frequency as is
753 case CPUFREQ_GOV_START:
754 BUG_ON(!policy->cur);
756 mutex_lock(&v3_governor_mutex);
758 if (cpus_using_v3_governor == 0) {
759 cpufreq_register_notifier(&v3_cpufreq_notifier_block,
760 CPUFREQ_TRANSITION_NOTIFIER);
763 cpus_using_v3_governor++;
765 per_cpu(core_state, cpu).set_freq_khz = policy->cur;
766 per_cpu(core_state, cpu).cur_freq_khz = policy->cur;
767 per_cpu(core_state, cpu).max_freq_khz = policy->max;
768 per_cpu(core_state, cpu).min_freq_khz = policy->min;
770 mutex_unlock(&v3_governor_mutex);
772 case CPUFREQ_GOV_STOP:
773 mutex_lock(&v3_governor_mutex);
775 cpus_using_v3_governor--;
777 if (cpus_using_v3_governor == 0) {
778 cpufreq_unregister_notifier(
779 &v3_cpufreq_notifier_block,
780 CPUFREQ_TRANSITION_NOTIFIER);
783 per_cpu(core_state, cpu).set_freq_khz = 0;
784 per_cpu(core_state, cpu).cur_freq_khz = 0;
785 per_cpu(core_state, cpu).max_freq_khz = 0;
786 per_cpu(core_state, cpu).min_freq_khz = 0;
788 mutex_unlock(&v3_governor_mutex);
790 case CPUFREQ_GOV_LIMITS:
794 ERROR("Undefined governor command (%u)\n", event);
802 static struct cpufreq_governor stub_governor =
804 .name = PALACIOS_GOVNAME,
805 .governor = governor_run,
806 .owner = THIS_MODULE,
810 static struct workqueue_struct *pstate_wq;
813 struct work_struct work;
819 static inline void pstate_register_linux_governor(void)
821 cpufreq_register_governor(&stub_governor);
825 static inline void pstate_unregister_linux_governor(void)
827 cpufreq_unregister_governor(&stub_governor);
831 static int pstate_linux_init(void)
833 pstate_register_linux_governor();
834 pstate_wq = create_workqueue("v3vee_pstate_wq");
836 ERROR("Could not create work queue\n");
843 pstate_unregister_linux_governor();
848 static void pstate_linux_deinit(void)
850 pstate_unregister_linux_governor();
851 flush_workqueue(pstate_wq);
852 destroy_workqueue(pstate_wq);
856 static int get_current_governor(char **buf, unsigned int cpu)
858 struct cpufreq_policy * policy = palacios_alloc(sizeof(struct cpufreq_policy));
859 char * govname = NULL;
862 ERROR("could not allocate cpufreq_policy\n");
866 if (cpufreq_get_policy(policy, cpu) != 0) {
867 ERROR("Could not get current cpufreq policy\n");
871 /* We're in interrupt context, should probably not wait here */
872 govname = palacios_alloc(MAX_GOV_NAME_LEN);
874 ERROR("Could not allocate space for governor name\n");
878 strncpy(govname, policy->governor->name, MAX_GOV_NAME_LEN);
879 govname[MAX_GOV_NAME_LEN-1] = 0;
881 get_cpu_var(core_state).linux_governor = govname;
882 put_cpu_var(core_state);
886 palacios_free(policy);
891 palacios_free(policy);
896 /* passed to the userspacehelper interface for cleanup */
897 static void gov_switch_cleanup(struct subprocess_info * s)
899 palacios_free(s->argv[2]);
900 palacios_free(s->argv);
906 * @s - the governor to switch to
907 * TODO: this should probably be submitted to a work queue
908 * so we don't have to run it in interrupt context
910 static int governor_switch(char * s, unsigned int cpu)
912 char * path_str = NULL;
915 static char * envp[] = {
918 "PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL };
921 argv = palacios_alloc(4*sizeof(char*));
923 ERROR("Couldn't allocate argv struct\n");
927 path_str = palacios_alloc(MAX_PATH_LEN);
929 ERROR("Couldn't allocate path string\n");
932 memset(path_str, 0, MAX_PATH_LEN);
934 snprintf(path_str, MAX_PATH_LEN, "echo %s > /sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor", s, cpu);
941 /* KCH: we can't wait here to actually see if we succeeded, we're in interrupt context */
942 return call_usermodehelper_fns("/bin/sh", argv, envp, UMH_NO_WAIT, NULL, gov_switch_cleanup, NULL);
950 static inline void free_linux_governor(void)
952 palacios_free(get_cpu_var(core_state).linux_governor);
953 put_cpu_var(core_state);
957 static int linux_setup_palacios_governor(void)
960 unsigned int cpu = get_cpu();
963 /* KCH: we assume the v3vee governor is already
964 * registered with kernel by this point
967 if (get_current_governor(&gov, cpu) < 0) {
968 ERROR("Could not get current governor\n");
972 DEBUG("saving current governor (%s)\n", gov);
974 get_cpu_var(core_state).linux_governor = gov;
975 put_cpu_var(core_state);
977 DEBUG("setting the new governor (%s)\n", PALACIOS_GOVNAME);
979 /* set the new one to ours */
981 if (governor_switch(PALACIOS_GOVNAME, cpu) < 0) {
982 ERROR("Could not set governor to (%s)\n", PALACIOS_GOVNAME);
991 static uint64_t linux_get_pstate(void)
993 struct cpufreq_policy * policy = NULL;
994 struct cpufreq_frequency_table *table;
996 unsigned int count = 0;
997 unsigned int cpu = get_cpu();
1001 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1003 ERROR("Could not allocate policy struct\n");
1007 cpufreq_get_policy(policy, cpu);
1008 table = cpufreq_frequency_get_table(cpu);
1010 for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
1012 if (table[i].frequency == CPUFREQ_ENTRY_INVALID) {
1016 if (table[i].frequency == policy->cur) {
1023 palacios_free(policy);
1030 static uint64_t linux_get_freq(void)
1033 struct cpufreq_policy * policy = NULL;
1034 unsigned int cpu = get_cpu();
1037 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1039 ERROR("Could not allocate policy struct\n");
1043 if (cpufreq_get_policy(policy, cpu)) {
1044 ERROR("Could not get current policy\n");
1050 palacios_free(policy);
1056 pstate_switch_workfn (struct work_struct *work)
1058 pstate_work_t * pwork = (pstate_work_t*)work;
1059 struct cpufreq_policy * policy = NULL;
1061 unsigned int cpu = get_cpu();
1064 mutex_lock(&v3_governor_mutex);
1066 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1068 ERROR("Could not allocate space for cpufreq policy\n");
1072 if (cpufreq_get_policy(policy, cpu) != 0) {
1073 ERROR("Could not get cpufreq policy\n");
1078 get_cpu_var(core_state).set_freq_khz = freq;
1080 if (freq < get_cpu_var(core_state).min_freq_khz) {
1081 freq = get_cpu_var(core_state).min_freq_khz;
1083 if (freq > get_cpu_var(core_state).max_freq_khz) {
1084 freq = get_cpu_var(core_state).max_freq_khz;
1086 put_cpu_var(core_state);
1088 INFO("P-state: requesting frequency change on core %u to %llu\n", cpu, freq);
1089 __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
1092 palacios_free(policy);
1094 palacios_free(work);
1095 mutex_unlock(&v3_governor_mutex);
1099 static int linux_set_pstate(uint64_t p)
1101 struct cpufreq_policy * policy = NULL;
1102 struct cpufreq_frequency_table *table;
1103 pstate_work_t * work = NULL;
1105 unsigned int count = 0;
1108 unsigned int cpu = get_cpu();
1111 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1113 ERROR("Could not allocate policy struct\n");
1117 work = (pstate_work_t*)palacios_alloc(sizeof(pstate_work_t));
1119 ERROR("Could not allocate work struct\n");
1123 if (cpufreq_get_policy(policy, cpu)) {
1124 ERROR("Could not get current policy\n");
1127 table = cpufreq_frequency_get_table(cpu);
1129 for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
1131 if (table[i].frequency == CPUFREQ_ENTRY_INVALID) {
1137 INIT_WORK((struct work_struct*)work, pstate_switch_workfn);
1138 work->freq = table[i].frequency;
1139 queue_work(pstate_wq, (struct work_struct*)work);
1149 /* we need to deal with the case in which we get a number > max pstate */
1151 INIT_WORK((struct work_struct*)work, pstate_switch_workfn);
1152 work->freq = table[last_valid].frequency;
1153 queue_work(pstate_wq, (struct work_struct*)work);
1156 palacios_free(policy);
1160 palacios_free(work);
1162 palacios_free(policy);
1167 static int linux_set_freq(uint64_t f)
1169 struct cpufreq_policy * policy = NULL;
1170 pstate_work_t * work = NULL;
1172 unsigned int cpu = get_cpu();
1175 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1177 ERROR("Could not allocate policy struct\n");
1181 work = (pstate_work_t*)palacios_alloc(sizeof(pstate_work_t));
1183 ERROR("Could not allocate work struct\n");
1187 if (cpufreq_get_policy(policy, cpu) != 0) {
1188 ERROR("Could not get cpufreq policy\n");
1192 if (f < policy->min) {
1194 } else if (f > policy->max) {
1200 INIT_WORK((struct work_struct*)work, pstate_switch_workfn);
1202 queue_work(pstate_wq, (struct work_struct*)work);
1204 palacios_free(policy);
1208 palacios_free(work);
1210 palacios_free(policy);
1215 static int linux_restore_defaults(void)
1218 unsigned int cpu = get_cpu();
1221 gov = get_cpu_var(core_state).linux_governor;
1222 put_cpu_var(core_state);
1224 DEBUG("restoring previous governor (%s)\n", gov);
1226 if (governor_switch(gov, cpu) < 0) {
1227 ERROR("Could not restore governor to (%s)\n", gov);
1231 free_linux_governor();
1235 free_linux_governor();
1241 /******************************************************************
1242 Generic Interface as provided to Palacios and to the rest of the
1244 ******************************************************************/
1246 static void init_core(void)
1249 struct cpufreq_policy *p;
1252 //DEBUG("P-State Core Init\n");
1254 get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL;
1255 get_cpu_var(core_state).cur_pstate = 0;
1257 if (machine_state.funcs) {
1258 get_cpu_var(core_state).min_pstate = machine_state.funcs->get_min_pstate();
1259 get_cpu_var(core_state).max_pstate = machine_state.funcs->get_max_pstate();
1261 get_cpu_var(core_state).min_pstate = 0;
1262 get_cpu_var(core_state).max_pstate = 0;
1266 cpu = get_cpu(); put_cpu();
1268 p = cpufreq_cpu_get(cpu);
1271 get_cpu_var(core_state).have_cpufreq = 0;
1272 get_cpu_var(core_state).min_freq_khz=0;
1273 get_cpu_var(core_state).max_freq_khz=0;
1274 get_cpu_var(core_state).cur_freq_khz=0;
1276 get_cpu_var(core_state).have_cpufreq = 1;
1277 get_cpu_var(core_state).min_freq_khz=p->min;
1278 get_cpu_var(core_state).max_freq_khz=p->max;
1279 get_cpu_var(core_state).cur_freq_khz=p->cur; } cpufreq_cpu_put(p);
1280 put_cpu_var(core_state);
1283 for (i=0;i<get_cpu_var(processors)->performance->state_count; i++) {
1284 INFO("P-State: %u: freq=%llu ctrl=%llx",
1286 get_cpu_var(processors)->performance->states[i].core_frequency*1000,
1287 get_cpu_var(processors)->performance->states[i].control);
1289 put_cpu_var(processors);
1294 void palacios_pstate_ctrl_release(void);
1297 static void deinit_core(void)
1299 DEBUG("P-State Core Deinit\n");
1300 palacios_pstate_ctrl_release();
1306 void palacios_pstate_ctrl_get_chars(struct v3_cpu_pstate_chars *c)
1308 memset(c,0,sizeof(struct v3_cpu_pstate_chars));
1311 c->features = V3_PSTATE_INTERNAL_CONTROL;
1313 if (get_cpu_var(core_state).have_cpufreq) {
1314 c->features |= V3_PSTATE_EXTERNAL_CONTROL;
1317 if (machine_state.arch==AMD || machine_state.arch==INTEL) {
1318 c->features |= V3_PSTATE_DIRECT_CONTROL;
1320 c->cur_mode = get_cpu_var(core_state).mode;
1321 c->min_pstate = get_cpu_var(core_state).min_pstate;
1322 c->max_pstate = get_cpu_var(core_state).max_pstate;
1323 c->cur_pstate = get_cpu_var(core_state).cur_pstate;
1324 c->min_freq_khz = get_cpu_var(core_state).min_freq_khz;
1325 c->max_freq_khz = get_cpu_var(core_state).max_freq_khz;
1326 c->cur_freq_khz = get_cpu_var(core_state).cur_freq_khz;
1328 put_cpu_var(core_state);
1335 uint64_t palacios_pstate_ctrl_get_pstate(void)
1337 if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) {
1338 put_cpu_var(core_state);
1339 return machine_state.funcs->get_pstate();
1340 } else if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1341 put_cpu_var(core_state);
1342 return linux_get_pstate();
1344 put_cpu_var(core_state);
1350 void palacios_pstate_ctrl_set_pstate(uint64_t p)
1352 if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) {
1353 put_cpu_var(core_state);
1354 machine_state.funcs->set_pstate(p);
1355 } else if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1356 put_cpu_var(core_state);
1357 linux_set_pstate(p);
1359 put_cpu_var(core_state);
1364 void palacios_pstate_ctrl_set_pstate_wrapper(void *p)
1366 palacios_pstate_ctrl_set_pstate((uint8_t)(uint64_t)p);
1370 uint64_t palacios_pstate_ctrl_get_freq(void)
1372 if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1373 put_cpu_var(core_state);
1374 return linux_get_freq();
1376 put_cpu_var(core_state);
1382 void palacios_pstate_ctrl_set_freq(uint64_t p)
1384 if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1385 put_cpu_var(core_state);
1388 put_cpu_var(core_state);
1393 static int switch_to_external(void)
1395 DEBUG("switch from host control to external\n");
1397 if (!(get_cpu_var(core_state).have_cpufreq)) {
1398 put_cpu_var(core_state);
1399 ERROR("No cpufreq - cannot switch to external...\n");
1402 put_cpu_var(core_state);
1404 linux_setup_palacios_governor();
1406 get_cpu_var(core_state).mode=V3_PSTATE_EXTERNAL_CONTROL;
1407 put_cpu_var(core_state);
1413 static int switch_to_direct(void)
1415 DEBUG("switch from host control to direct\n");
1417 if (get_cpu_var(core_state).have_cpufreq) {
1418 put_cpu_var(core_state);
1419 DEBUG("switch to direct from cpufreq\n");
1421 // The implementation would set the policy and governor to peg cpu
1422 // regardless of load
1423 linux_setup_palacios_governor();
1425 put_cpu_var(core_state);
1428 if (machine_state.funcs && machine_state.funcs->arch_init) {
1429 get_cpu_var(core_state).mode=V3_PSTATE_DIRECT_CONTROL;
1431 machine_state.funcs->arch_init();
1433 put_cpu_var(core_state);
1440 static int switch_to_internal(void)
1442 DEBUG("switch from host control to internal\n");
1444 if (get_cpu_var(core_state).have_cpufreq) {
1445 put_cpu_var(core_state);
1446 DEBUG("switch to internal on machine with cpu freq\n");
1447 linux_setup_palacios_governor();
1449 put_cpu_var(core_state);
1452 get_cpu_var(core_state).mode=V3_PSTATE_INTERNAL_CONTROL;
1454 put_cpu_var(core_state);
1460 static int switch_from_external(void)
1462 if (!(get_cpu_var(core_state).have_cpufreq)) {
1463 put_cpu_var(core_state);
1464 ERROR("No cpufreq - how did we get here... external...\n");
1467 put_cpu_var(core_state);
1469 DEBUG("Switching back to host control from external\n");
1471 if (get_cpu_var(core_state).have_cpufreq) {
1472 put_cpu_var(core_state);
1473 linux_restore_defaults();
1475 put_cpu_var(core_state);
1478 get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL;
1479 put_cpu_var(core_state);
1485 static int switch_from_direct(void)
1488 DEBUG("Switching back to host control from direct\n");
1490 // Set maximum performance, just in case there is no host control
1491 machine_state.funcs->set_pstate(get_cpu_var(core_state).min_pstate);
1492 machine_state.funcs->arch_deinit();
1494 if (get_cpu_var(core_state).have_cpufreq) {
1495 put_cpu_var(core_state);
1496 linux_restore_defaults();
1498 put_cpu_var(core_state);
1501 get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL;
1503 put_cpu_var(core_state);
1509 static int switch_from_internal(void)
1511 DEBUG("Switching back to host control from internal\n");
1513 if (get_cpu_var(core_state).have_cpufreq) {
1514 put_cpu_var(core_state);
1515 linux_restore_defaults();
1517 put_cpu_var(core_state);
1520 get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL;
1522 put_cpu_var(core_state);
1529 void palacios_pstate_ctrl_acquire(uint32_t type)
1531 if (get_cpu_var(core_state).mode != V3_PSTATE_HOST_CONTROL) {
1532 put_cpu_var(core_state);
1533 palacios_pstate_ctrl_release();
1535 put_cpu_var(core_state);
1539 case V3_PSTATE_EXTERNAL_CONTROL:
1540 switch_to_external();
1542 case V3_PSTATE_DIRECT_CONTROL:
1545 case V3_PSTATE_INTERNAL_CONTROL:
1546 switch_to_internal();
1549 ERROR("Unknown pstate control type %u\n",type);
1555 // Wrappers for xcalls
1556 static void palacios_pstate_ctrl_acquire_external(void)
1558 palacios_pstate_ctrl_acquire(V3_PSTATE_EXTERNAL_CONTROL);
1561 static void palacios_pstate_ctrl_acquire_direct(void)
1563 palacios_pstate_ctrl_acquire(V3_PSTATE_DIRECT_CONTROL);
1567 void palacios_pstate_ctrl_release(void)
1569 if (get_cpu_var(core_state).mode == V3_PSTATE_HOST_CONTROL) {
1570 put_cpu_var(core_state);
1573 put_cpu_var(core_state);
1575 switch (get_cpu_var(core_state).mode) {
1576 case V3_PSTATE_EXTERNAL_CONTROL:
1577 put_cpu_var(core_state);
1578 switch_from_external();
1580 case V3_PSTATE_DIRECT_CONTROL:
1581 put_cpu_var(core_state);
1582 switch_from_direct();
1584 case V3_PSTATE_INTERNAL_CONTROL:
1585 put_cpu_var(core_state);
1586 switch_from_internal();
1589 put_cpu_var(core_state);
1590 ERROR("Unknown pstate control type %u\n",core_state.mode);
1596 static void update_hw_pstate(void *arg)
1598 if (machine_state.funcs && machine_state.funcs->get_pstate) {
1599 get_cpu_var(core_state).cur_hw_pstate = machine_state.funcs->get_pstate();
1600 put_cpu_var(core_state);
1602 get_cpu_var(core_state).cur_hw_pstate = 0;
1603 put_cpu_var(core_state);
1608 /***************************************************************************
1609 PROC Interface to expose state
1610 ***************************************************************************/
1612 static int pstate_show(struct seq_file * file, void * v)
1615 unsigned int numcpus = num_online_cpus();
1617 seq_printf(file, "V3VEE DVFS Status\n\n");
1619 for (cpu=0;cpu<numcpus;cpu++) {
1620 palacios_xcall(cpu,update_hw_pstate,0);
1623 for (cpu=0;cpu<numcpus;cpu++) {
1624 struct pstate_core_info *s = &per_cpu(core_state,cpu);
1625 seq_printf(file,"pcore %u: hw pstate 0x%llx mode %s ",cpu,
1627 s->mode==V3_PSTATE_HOST_CONTROL ? "host" :
1628 s->mode==V3_PSTATE_EXTERNAL_CONTROL ? "external" :
1629 s->mode==V3_PSTATE_DIRECT_CONTROL ? "direct" :
1630 s->mode==V3_PSTATE_INTERNAL_CONTROL ? "internal" : "UNKNOWN");
1631 if (s->mode==V3_PSTATE_EXTERNAL_CONTROL) {
1632 seq_printf(file,"(min=%llu max=%llu cur=%llu) ", s->min_freq_khz, s->max_freq_khz, s->cur_freq_khz);
1634 if (s->mode==V3_PSTATE_DIRECT_CONTROL) {
1635 seq_printf(file,"(min=%llu max=%llu cur=%llu) ",s->min_pstate, s->max_pstate, s->cur_pstate);
1637 seq_printf(file,"\n");
1642 static int pstate_open(struct inode * inode, struct file * file)
1644 return single_open(file, pstate_show, NULL);
1648 static struct file_operations pstate_fops = {
1649 .owner = THIS_MODULE,
1650 .open = pstate_open,
1652 .llseek = seq_lseek,
1653 .release = seq_release
1656 static int pstate_hw_show(struct seq_file * file, void * v)
1660 seq_printf(file, "V3VEE DVFS Hardware Info\n(all logical cores assumed identical)\n\n");
1662 seq_printf(file, "Arch: \t%s\n"
1664 machine_state.arch==INTEL ? "Intel" :
1665 machine_state.arch==AMD ? "AMD" : "Other",
1666 machine_state.supports_pstates ? "Yes" : "No");
1669 #define YN(x) ((x) ? "Y" : "N")
1671 if (machine_state.arch==INTEL) {
1672 seq_printf(file,"SpeedStep: \t%s\n",YN(machine_state.have_speedstep));
1673 seq_printf(file,"APERF/MPERF: \t%s\n",YN(machine_state.have_pstate_hw_coord));
1674 seq_printf(file,"IDA or TurboCore: \t%s\n",YN(machine_state.have_opportunistic));
1675 seq_printf(file,"Policy Hint: \t%s\n",YN(machine_state.have_policy_hint));
1676 seq_printf(file,"Hardware Policy: \t%s\n",YN(machine_state.have_hwp));
1677 seq_printf(file,"Hardware Duty Cycle: \t%s\n",YN(machine_state.have_hdc));
1678 seq_printf(file,"MWAIT extensions: \t%s\n",YN(machine_state.have_mwait_ext));
1679 seq_printf(file,"MWAIT wake on intr: \t%s\n",YN(machine_state.have_mwait_int));
1682 if (machine_state.arch==AMD) {
1683 seq_printf(file,"PState: \t%s\n",YN(machine_state.have_pstate));
1684 seq_printf(file,"APERF/MPERF: \t%s\n",YN(machine_state.have_pstate_hw_coord));
1685 seq_printf(file,"CoreBoost: \t%s\n",YN(machine_state.have_coreboost));
1686 seq_printf(file,"Feedback: \t%s\n",YN(machine_state.have_feedback));
1690 seq_printf(file,"\nPstate\tCtrl\tKHz\tmW\tuS(X)\tuS(B)\n");
1691 numstates = get_cpu_var(processors)->performance->state_count;
1693 seq_printf(file,"UNKNOWN\n");
1696 for (i=0;i<numstates;i++) {
1698 "%u\t%llx\t%llu\t%llu\t%llu\t%llu\n",
1700 get_cpu_var(processors)->performance->states[i].control,
1701 get_cpu_var(processors)->performance->states[i].core_frequency*1000,
1702 get_cpu_var(processors)->performance->states[i].power,
1703 get_cpu_var(processors)->performance->states[i].transition_latency,
1704 get_cpu_var(processors)->performance->states[i].bus_master_latency);
1707 put_cpu_var(processors);
1709 seq_printf(file,"\nAvailable Modes:");
1710 seq_printf(file," host");
1711 if (get_cpu_var(core_state).have_cpufreq) {
1712 seq_printf(file," external");
1714 put_cpu_var(core_state);
1715 if (machine_state.supports_pstates) {
1716 seq_printf(file," direct");
1718 seq_printf(file," internal\n");
1723 static int pstate_hw_open(struct inode * inode, struct file * file)
1725 return single_open(file, pstate_hw_show, NULL);
1729 static struct file_operations pstate_hw_fops = {
1730 .owner = THIS_MODULE,
1731 .open = pstate_hw_open,
1733 .llseek = seq_lseek,
1734 .release = seq_release
1738 int pstate_proc_setup(void)
1740 struct proc_dir_entry *proc;
1741 struct proc_dir_entry *prochw;
1743 proc = create_proc_entry("v3-dvfs",0444, palacios_get_procdir());
1746 ERROR("Failed to create proc entry for p-state control\n");
1750 proc->proc_fops = &pstate_fops;
1752 INFO("/proc/v3vee/v3-dvfs successfully created\n");
1754 prochw = create_proc_entry("v3-dvfs-hw",0444,palacios_get_procdir());
1758 ERROR("Failed to create proc entry for p-state hw info\n");
1762 prochw->proc_fops = &pstate_hw_fops;
1764 INFO("/proc/v3vee/v3-dvfs-hw successfully created\n");
1769 void pstate_proc_teardown(void)
1771 remove_proc_entry("v3-dvfs-hw",palacios_get_procdir());
1772 remove_proc_entry("v3-dvfs",palacios_get_procdir());
1775 /********************************************************************
1776 User interface (ioctls)
1777 ********************************************************************/
1779 static int dvfs_ctrl(unsigned int cmd, unsigned long arg)
1781 struct v3_dvfs_ctrl_request r;
1783 if (copy_from_user(&r,(void __user*)arg,sizeof(struct v3_dvfs_ctrl_request))) {
1784 ERROR("Failed to copy DVFS request from user\n");
1788 if (r.pcore >= num_online_cpus()) {
1789 ERROR("Cannot apply DVFS request to pcore %u\n",r.pcore);
1794 case V3_DVFS_ACQUIRE: {
1795 switch (r.acq_type) {
1796 case V3_DVFS_EXTERNAL:
1797 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_external, NULL);
1800 case V3_DVFS_DIRECT:
1801 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_direct, NULL);
1805 ERROR("Unknown DVFS acquire type %u\n",r.acq_type);
1810 case V3_DVFS_RELEASE: {
1811 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_release, NULL);
1815 case V3_DVFS_SETFREQ: {
1816 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_freq,(void*)r.freq_khz);
1820 case V3_DVFS_SETPSTATE: {
1821 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_pstate_wrapper,(void*)(uint64_t)r.pstate);
1825 ERROR("Unknown DVFS command %u\n",r.cmd);
1833 void pstate_user_setup(void)
1835 add_global_ctrl(V3_DVFS_CTRL, dvfs_ctrl);
1839 void pstate_user_teardown(void)
1841 remove_global_ctrl(V3_DVFS_CTRL);
1844 static struct v3_host_pstate_ctrl_iface hooks = {
1845 .get_chars = palacios_pstate_ctrl_get_chars,
1846 .acquire = palacios_pstate_ctrl_acquire,
1847 .release = palacios_pstate_ctrl_release,
1848 .set_pstate = palacios_pstate_ctrl_set_pstate,
1849 .get_pstate = palacios_pstate_ctrl_get_pstate,
1850 .set_freq = palacios_pstate_ctrl_set_freq,
1851 .get_freq = palacios_pstate_ctrl_get_freq,
1856 static int pstate_ctrl_init(void)
1859 unsigned int numcpus = num_online_cpus();
1861 pstate_arch_setup();
1863 for (cpu=0;cpu<numcpus;cpu++) {
1864 palacios_xcall(cpu,(void ((*)(void*)))init_core,0);
1867 V3_Init_Pstate_Ctrl(&hooks);
1869 if (pstate_proc_setup()) {
1870 ERROR("Unable to initialize P-State Control\n");
1874 pstate_user_setup();
1876 pstate_linux_init();
1878 INFO("P-State Control Initialized\n");
1883 static int pstate_ctrl_deinit(void)
1886 unsigned int numcpus=num_online_cpus();
1888 pstate_linux_deinit();
1890 pstate_user_teardown();
1892 pstate_proc_teardown();
1894 // release pstate control if we have it, and we need to do this on each processor
1895 for (cpu=0;cpu<numcpus;cpu++) {
1896 palacios_xcall(cpu,(void (*)(void *))deinit_core,0);
1900 // Free any mapping table we built for Intel
1901 if (intel_pstate_to_ctrl && intel_pstate_to_ctrl != intel_pstate_to_ctrl_internal) {
1902 palacios_free(intel_pstate_to_ctrl);
1910 static struct linux_ext pstate_ext = {
1911 .name = "PSTATE_CTRL",
1912 .init = pstate_ctrl_init,
1913 .deinit = pstate_ctrl_deinit,
1915 .guest_deinit = NULL,
1919 register_extension(&pstate_ext);