2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2014, the V3VEE Project <http://www.v3vee.org>
11 * all rights reserved.
13 * Author: Kyle C. Hale <kh@u.northwestern.edu>
14 * Shiva Rao <shiva.rao.717@gmail.com>
15 * Peter Dinda <pdinda@northwestern.edu>
17 * This is free software. you are permitted to use,
18 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
21 #include <linux/uaccess.h>
22 #include <linux/seq_file.h>
23 #include <linux/proc_fs.h>
24 #include <linux/cpufreq.h>
25 #include <linux/kernel.h>
26 #include <linux/kmod.h>
27 #include <linux/module.h>
28 #include <linux/string.h>
29 #include <linux/interrupt.h>
30 #include <asm/processor.h>
32 #include <asm/msr-index.h>
34 // Used to determine the appropriate pstates values on Intel
35 #include <linux/acpi.h>
36 #include <acpi/processor.h>
38 #include <interfaces/vmm_pstate_ctrl.h>
41 #include "iface-pstate-ctrl.h"
43 #include "linux-exts.h"
46 This P-STATE control implementation includes the following modes.
47 You can switch between modes at any time.
49 - Internal control of processor states in Palacios (handoff from Linux)
50 When Palacios acuires this control, this module disables Linux cpufreq control
51 and allows code within Palacios unfettered access to the DVFS hardware.
52 - Direct control of Intel and AMD processor pstates using code in this module
53 When you acquire this control, this module disables Linux cpufreq control
54 and directly programs the processor itself in response to your requests
55 - External control of processor states via Linux
56 When you acuire this control, this module uses the Linux cpufreq control
57 to program the processor on your behelf
58 - Host control of processor stastes
59 This is the normal mode of DVFS control (e.g., Linux cpufreq)
61 Additionally, it provides a user-space interface for manipulating
62 p-state regardless of the host's functionality. This includes
63 an ioctl for commanding the implementation and a /proc file for
64 showing current status and capabilities. From user space, you can
65 use the Direct, External, and Host modes.
67 What we mean by "p-state" here is the processor's internal
68 configuration. For AMD, this is defined as being the same as
69 the ACPI-defined p-state. For Intel, it is not. There, it is the
70 contents of the perf ctl MSR, which is opaque. We try hard to
71 provide "p-states" that go from 0...max, by analogy or equivalence
77 #define PALACIOS_GOVNAME "v3vee"
78 #define MAX_PATH_LEN 128
79 #define MAX_GOV_NAME_LEN 16
82 struct pstate_core_info {
83 // Here we have the notion of host control
84 #define V3_PSTATE_HOST_CONTROL 0
85 // and all the modes from the Palacios interface:
86 // V3_PSTATE_EXTERNAL_CONTROL
87 // V3_PSTATE_DIRECT_CONTROL
88 // V3_PSTATE_INTERNAL_CONTROL
91 // Apply if we are under the DIRECT state
96 uint64_t cur_hw_pstate;
98 // Apply if we are under the EXTERNAL state
99 uint64_t set_freq_khz; // this is the frequency we're hoping to get
100 uint64_t cur_freq_khz;
101 uint64_t max_freq_khz;
102 uint64_t min_freq_khz;
105 uint8_t prior_speedstep;
106 uint8_t turbo_disabled;
111 // This is where we stash Linux's governor when we make a mode switch
112 char * linux_governor;
113 // We have this so we can restore the original frequency when we started
114 uint64_t original_hz;
119 static DEFINE_PER_CPU(struct pstate_core_info, core_state);
123 // These are used to assert DIRECT control over the core pstates
124 struct pstate_core_funcs {
125 void (*arch_init)(void);
126 void (*arch_deinit)(void);
127 uint64_t (*get_min_pstate)(void);
128 uint64_t (*get_max_pstate)(void);
129 uint64_t (*get_pstate)(void);
130 void (*set_pstate)(uint64_t pstate);
133 struct pstate_machine_info {
134 enum {INTEL, AMD, OTHER } arch;
135 int supports_pstates;
145 int have_opportunistic; // this means "Turbo Boost" or "IDA"
146 int have_policy_hint;
147 int have_hwp; // hardware-controlled performance states
148 int have_hdc; // hardware duty cycling
149 int have_mwait_ext; // mwait power extensions
150 int have_mwait_int; // mwait wakes on interrupt
153 int have_pstate_hw_coord; // mperf/aperf
155 // used for DIRECT control
156 struct pstate_core_funcs *funcs;
160 static struct pstate_machine_info machine_state;
163 /****************************************************
165 ***************************************************/
167 /* AMD Programmer's Manual Vol 2 (Rev 3, 2013), Sec. 17.1, pp.557 */
168 #define MSR_PSTATE_LIMIT_REG_AMD 0xc0010061
169 #define MSR_PSTATE_CTL_REG_AMD 0xc0010062
170 #define MSR_PSTATE_STAT_REG_AMD 0xc0010063
172 struct p_state_limit_reg_amd {
176 uint8_t pstate_limit : 4; /* lowest P-state value (highest perf.) supported currently (this can change at runtime) */
177 uint8_t pstate_max : 4; /* highest P-state value supported (lowest perf) */
180 } __attribute__((packed));
181 } __attribute__((packed));
184 struct p_state_stat_reg_amd {
191 } __attribute__((packed));
192 } __attribute__((packed));
195 struct p_state_ctl_reg_amd {
202 } __attribute__((packed));
203 } __attribute__((packed));
206 /* CPUID Fn8000_0007_EDX[HwPstate(7)] = 1 */
207 static uint8_t supports_pstates_amd (void)
213 uint32_t eax, ebx, ecx, edx;
215 cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
216 machine_state.have_pstate = !!(edx & (1 << 7));
217 machine_state.have_coreboost = !!(edx & (1<<9));
218 machine_state.have_feedback = !!(edx & (1<<11));
220 cpuid(0x6, &eax, &ebx, &ecx, &edx);
221 machine_state.have_pstate_hw_coord = !!(ecx & 1);
223 INFO("P-State: AMD: Pstates=%d Coreboost=%d Feedback=%d PstateHWCoord=%d\n",
224 machine_state.have_pstate,
225 machine_state.have_coreboost,
226 machine_state.have_feedback,
227 machine_state.have_pstate_hw_coord);
229 amd_num_pstates = get_cpu_var(processors)->performance->state_count;
230 if (amd_num_pstates) {
231 for (i=0;i<amd_num_pstates;i++) {
232 INFO("P-State: %u: freq=%llu ctrl=%llx%s\n",
234 get_cpu_var(processors)->performance->states[i].core_frequency*1000,
235 get_cpu_var(processors)->performance->states[i].control,
236 get_cpu_var(processors)->performance->states[i].control != i ? (mapwrong=1, " ALERT - CTRL MAPPING NOT 1:1") : "");
240 ERROR("P-State: AMD: mapping of pstate and control is not 1:1 on this processor - we will probably not work corrrectly\n");
243 return machine_state.have_pstate;
249 static void init_arch_amd(void)
251 /* KCH: nothing to do here */
255 static void deinit_arch_amd(void)
257 /* KCH: nothing to do here */
261 static uint64_t get_pstate_amd(void)
263 struct p_state_stat_reg_amd pstat;
265 rdmsrl(MSR_PSTATE_STAT_REG_AMD, pstat.val);
267 get_cpu_var(core_state).cur_pstate=pstat.reg.pstate;
268 put_cpu_var(core_state);
270 return pstat.reg.pstate;
274 static void set_pstate_amd(uint64_t p)
276 struct p_state_ctl_reg_amd pctl;
278 if (p>get_cpu_var(core_state).max_pstate) {
279 p=get_cpu_var(core_state).max_pstate;
281 put_cpu_var(core_state);
286 wrmsrl(MSR_PSTATE_CTL_REG_AMD, pctl.val);
288 get_cpu_var(core_state).cur_pstate=p;
289 put_cpu_var(core_state);
294 * NOTE: HW may change this value at runtime
296 static uint64_t get_max_pstate_amd(void)
298 struct p_state_limit_reg_amd plimits;
300 rdmsrl(MSR_PSTATE_LIMIT_REG_AMD, plimits.val);
302 return plimits.reg.pstate_max;
306 static uint64_t get_min_pstate_amd(void)
308 struct p_state_limit_reg_amd plimits;
310 rdmsrl(MSR_PSTATE_LIMIT_REG_AMD, plimits.val);
312 return plimits.reg.pstate_limit;
316 static struct pstate_core_funcs amd_funcs =
318 .arch_init = init_arch_amd,
319 .arch_deinit = deinit_arch_amd,
320 .get_pstate = get_pstate_amd,
321 .set_pstate = set_pstate_amd,
322 .get_max_pstate = get_max_pstate_amd,
323 .get_min_pstate = get_min_pstate_amd,
328 /***********************************************************
330 **********************************************************/
334 This implementation uses SpeedStep, but does check
335 to see if the other features (MPERF/APERF, Turbo/IDA, HWP)
339 /* Intel System Programmer's Manual Vol. 3B, 14-2 */
340 #define MSR_MPERF_IA32 0x000000e7
341 #define MSR_APERF_IA32 0x000000e8
342 #define MSR_MISC_ENABLE_IA32 0x000001a0
343 #define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
344 #define MSR_PLATFORM_INFO_IA32 0x000000ce
345 #define MSR_PERF_CTL_IA32 0x00000199
346 #define MSR_PERF_STAT_IA32 0x00000198
347 #define MSR_ENERY_PERF_BIAS_IA32 0x000001b0
350 /* Note that the actual meaning of the pstate
351 in the control and status registers is actually
352 implementation dependent, unlike AMD. The "official"
353 way to figure it out the mapping from pstate to
354 these values is via ACPI. What is written in the register
355 is an "id" of an operation point
357 "Often", the 16 bit field consists of a high order byte
358 which is the frequency (the multiplier) and the low order
361 // MSR_PERF_CTL_IA32 r/w
362 struct perf_ctl_reg_intel {
366 // This is the target
367 // Note, not the ACPI pstate, but
368 // Intel's notion of pstate is that it's opaque
369 // for lots of implementations it seems to be
370 // frequency_id : voltage_id
371 // where frequency_id is typically the multiplier
372 uint16_t pstate : 16;
373 uint16_t reserved : 16;
374 // set to 1 to *disengage* dynamic acceleration
375 // Note that "IDA" and "Turbo" use the same interface
376 uint16_t dynamic_accel_disable : 1;
377 uint32_t reserved2 : 31;
379 } __attribute__((packed));
380 } __attribute__((packed));
382 // MSR_PERF_STAT_IA32 r
383 struct perf_stat_reg_intel {
387 // this is the current
388 uint16_t pstate : 16;
389 uint64_t reserved : 48;
391 } __attribute__((packed));
392 } __attribute__((packed));
394 // MSR_ENERGY_PERF_BIAS_IA32 r/w
395 struct enery_perf_bias_reg_intel {
399 // this is the current
400 uint8_t policy_hint : 4;
401 uint64_t reserved : 60;
403 } __attribute__((packed));
404 } __attribute__((packed));
407 struct turbo_mode_info_reg_intel {
412 uint8_t max_noturbo_ratio : 8;
414 uint8_t ppin_cap : 1;
416 uint8_t ratio_limit : 1;
417 uint8_t tdc_tdp_limit : 1;
419 uint8_t min_ratio : 8;
422 } __attribute__((packed));
423 } __attribute__((packed));
425 // This replicates the critical information in Linux's struct acpi_processor_px
426 // To make it easier to port to other OSes.
427 struct intel_pstate_info {
428 uint64_t freq; // KHz
429 uint64_t ctrl; // What to write into the _CTL MSR to get this
432 // The internal array will be used if we cannot build the table locally
433 static struct intel_pstate_info *intel_pstate_to_ctrl_internal=0;
434 static int intel_num_pstates_internal=0;
436 // These will either point to the internal array or to a constructed array
437 static struct intel_pstate_info *intel_pstate_to_ctrl=0;
438 static int intel_num_pstates=0;
441 /* CPUID.01:ECX.AES(7) */
442 static uint8_t supports_pstates_intel(void)
444 /* NOTE: CPUID.06H:ECX.SETBH[bit 3] is set and it also implies the presence of a new architectural MSR called IA32_ENERGY_PERF_BIAS (1B0H).
446 uint32_t eax, ebx, ecx, edx;
448 cpuid(0x1, &eax, &ebx, &ecx, &edx);
449 machine_state.have_speedstep = !!(ecx & (1 << 7));
451 cpuid(0x6, &eax, &ebx, &ecx, &edx);
452 machine_state.have_pstate_hw_coord = !!(ecx & 1); // ?
453 machine_state.have_opportunistic = !!(eax & 1<<1);
454 machine_state.have_policy_hint = !!(ecx & 1<<3);
455 machine_state.have_hwp = !!(eax & 1<<7);
456 machine_state.have_hdc = !!(eax & 1<<13);
458 cpuid(0x5, &eax, &ebx, &ecx, &edx);
459 machine_state.have_mwait_ext = !!(ecx & 1);
460 machine_state.have_mwait_int = !!(ecx & 1<<1);
463 // Note we test all the available hardware features documented as of August 2014
464 // We are only currently using speed_step, however.
466 INFO("P-State: Intel: Speedstep=%d, PstateHWCoord=%d, Opportunistic=%d PolicyHint=%d HWP=%d HDC=%d, MwaitExt=%d MwaitInt=%d \n",
467 machine_state.have_speedstep,
468 machine_state.have_pstate_hw_coord,
469 machine_state.have_opportunistic,
470 machine_state.have_policy_hint,
471 machine_state.have_hwp,
472 machine_state.have_hdc,
473 machine_state.have_mwait_ext,
474 machine_state.have_mwait_int );
477 if (machine_state.have_speedstep) {
479 // Build mapping table (from "pstate" (0..) to ctrl value for MSR
480 if (!(get_cpu_var(processors)) || !(get_cpu_var(processors)->performance) ) {
481 put_cpu_var(processors);
482 // no acpi... revert to internal table
483 intel_pstate_to_ctrl=intel_pstate_to_ctrl_internal;
484 intel_num_pstates=intel_num_pstates_internal;
486 intel_num_pstates = get_cpu_var(processors)->performance->state_count;
487 if (intel_num_pstates) {
488 intel_pstate_to_ctrl = palacios_alloc(sizeof(struct intel_pstate_info)*intel_num_pstates);
489 if (!intel_pstate_to_ctrl) {
490 ERROR("P-State: Cannot allocate space for mapping...\n");
493 for (i=0;i<intel_num_pstates;i++) {
494 intel_pstate_to_ctrl[i].freq = get_cpu_var(processors)->performance->states[i].core_frequency*1000;
495 intel_pstate_to_ctrl[i].ctrl = get_cpu_var(processors)->performance->states[i].control;
499 ERROR("P-State: Strange, machine has ACPI DVFS but no states...\n");
502 put_cpu_var(processors);
503 INFO("P-State: Intel - State Mapping (%u states) follows\n",intel_num_pstates);
504 for (i=0;i<intel_num_pstates;i++) {
505 INFO("P-State: Intel Mapping %u: freq=%llu ctrl=%llx\n",
506 i, intel_pstate_to_ctrl[i].freq,intel_pstate_to_ctrl[i].ctrl);
509 INFO("P-State: Intel: No speedstep here\n");
513 return machine_state.have_speedstep;
517 static void init_arch_intel(void)
521 rdmsrl(MSR_MISC_ENABLE_IA32, val);
523 //INFO("P-State: prior ENABLE=%llx\n",val);
525 // store prior speedstep setting
526 get_cpu_var(core_state).prior_speedstep=(val >> 16) & 0x1;
527 put_cpu_var(core_state);
529 // enable speedstep (probably already on)
531 wrmsrl(MSR_MISC_ENABLE_IA32, val);
533 //INFO("P-State: write ENABLE=%llx\n",val);
537 static void deinit_arch_intel(void)
541 rdmsrl(MSR_MISC_ENABLE_IA32, val);
543 //INFO("P-State: deinit: ENABLE=%llx\n",val);
545 val &= ~(1ULL << 16);
546 val |= get_cpu_var(core_state).prior_speedstep << 16;
547 put_cpu_var(core_state);
549 wrmsrl(MSR_MISC_ENABLE_IA32, val);
551 //INFO("P-state: deinit ENABLE=%llx\n",val);
555 /* TODO: Intel P-states require sampling at intervals... */
556 static uint64_t get_pstate_intel(void)
560 rdmsrl(MSR_PERF_STAT_IA32,val);
562 //INFO("P-State: Get: 0x%llx\n", val);
564 // should check if turbo is active, in which case
565 // this value is not the whole story
570 static void set_pstate_intel(uint64_t p)
575 if (intel_num_pstates==0) {
578 if (p>=intel_num_pstates) {
579 p=intel_num_pstates-1;
583 ctrl=intel_pstate_to_ctrl[p].ctrl;
585 /* ...Intel IDA (dynamic acceleration)
586 if (c->no_turbo && !c->turbo_disabled) {
590 // leave all bits along expect for the likely
593 rdmsrl(MSR_PERF_CTL_IA32, val);
594 //INFO("P-State: Pre-Set: 0x%llx\n", val);
597 val |= ctrl & 0xffffULL;
599 //INFO("P-State: Set: 0x%llx\n", val);
601 wrmsrl(MSR_PERF_CTL_IA32, val);
603 get_cpu_var(core_state).cur_pstate = p;
604 put_cpu_var(core_state);
608 static uint64_t get_min_pstate_intel(void)
615 static uint64_t get_max_pstate_intel (void)
617 if (intel_num_pstates==0) {
620 return intel_num_pstates-1;
624 static struct pstate_core_funcs intel_funcs =
626 .arch_init = init_arch_intel,
627 .arch_deinit = deinit_arch_intel,
628 .get_pstate = get_pstate_intel,
629 .set_pstate = set_pstate_intel,
630 .get_max_pstate = get_max_pstate_intel,
631 .get_min_pstate = get_min_pstate_intel,
636 /***********************************************
637 Arch determination and setup
638 ***********************************************/
640 static inline void cpuid_string (uint32_t id, uint32_t dest[4])
643 :"=a"(*dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3))
648 static int get_cpu_vendor (char name[13])
653 cpuid_string(0,dest);
655 ((uint32_t*)name)[0]=dest[1];
656 ((uint32_t*)name)[1]=dest[3];
657 ((uint32_t*)name)[2]=dest[2];
664 static int is_intel (void)
667 get_cpu_vendor(name);
668 return !strcmp(name,"GenuineIntel");
672 static int is_amd (void)
675 get_cpu_vendor(name);
676 return !strcmp(name,"AuthenticAMD");
679 static int pstate_arch_setup(void)
683 machine_state.arch = AMD;
684 machine_state.funcs = &amd_funcs;
685 machine_state.supports_pstates = supports_pstates_amd();
686 INFO("PSTATE: P-State initialized for AMD\n");
687 } else if (is_intel()) {
688 machine_state.arch = INTEL;
689 machine_state.funcs = &intel_funcs;
690 machine_state.supports_pstates = supports_pstates_intel();
691 INFO("PSTATE: P-State initialized for INTEL (Work in progress...)\n");
695 machine_state.arch = OTHER;
696 machine_state.funcs = NULL;
697 machine_state.supports_pstates = 0;
698 INFO("PSTATE: P-state control: No support for direct control on this architecture\n");
707 /******************************************************************
709 *****************************************************************/
711 static unsigned cpus_using_v3_governor;
712 static DEFINE_MUTEX(v3_governor_mutex);
714 /* KCH: this will tell us when there is an actual frequency transition */
715 static int v3_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
718 struct cpufreq_freqs *freq = data;
720 if (per_cpu(core_state, freq->cpu).mode != V3_PSTATE_EXTERNAL_CONTROL) {
724 if (val == CPUFREQ_POSTCHANGE) {
725 DEBUG("P-State: frequency change took effect on cpu %u (now %u kHz)\n",
726 freq->cpu, freq->new);
727 per_cpu(core_state, freq->cpu).cur_freq_khz = freq->new;
735 static struct notifier_block v3_cpufreq_notifier_block = {
736 .notifier_call = v3_cpufreq_notifier
741 * This stub governor is simply a placeholder for preventing
742 * frequency changes from the Linux side. For now, we simply leave
743 * the frequency as is when we acquire control.
745 static int governor_run(struct cpufreq_policy *policy, unsigned int event)
747 unsigned cpu = policy->cpu;
750 /* we can't use cpufreq_driver_target here as it can result
751 * in a circular dependency, so we'll keep the current frequency as is
753 case CPUFREQ_GOV_START:
754 BUG_ON(!policy->cur);
756 mutex_lock(&v3_governor_mutex);
758 if (cpus_using_v3_governor == 0) {
759 cpufreq_register_notifier(&v3_cpufreq_notifier_block,
760 CPUFREQ_TRANSITION_NOTIFIER);
763 cpus_using_v3_governor++;
765 per_cpu(core_state, cpu).set_freq_khz = policy->cur;
766 per_cpu(core_state, cpu).cur_freq_khz = policy->cur;
767 per_cpu(core_state, cpu).max_freq_khz = policy->max;
768 per_cpu(core_state, cpu).min_freq_khz = policy->min;
770 mutex_unlock(&v3_governor_mutex);
772 case CPUFREQ_GOV_STOP:
773 mutex_lock(&v3_governor_mutex);
775 cpus_using_v3_governor--;
777 if (cpus_using_v3_governor == 0) {
778 cpufreq_unregister_notifier(
779 &v3_cpufreq_notifier_block,
780 CPUFREQ_TRANSITION_NOTIFIER);
783 per_cpu(core_state, cpu).set_freq_khz = 0;
784 per_cpu(core_state, cpu).cur_freq_khz = 0;
785 per_cpu(core_state, cpu).max_freq_khz = 0;
786 per_cpu(core_state, cpu).min_freq_khz = 0;
788 mutex_unlock(&v3_governor_mutex);
790 case CPUFREQ_GOV_LIMITS:
794 ERROR("Undefined governor command (%u)\n", event);
802 static struct cpufreq_governor stub_governor =
804 .name = PALACIOS_GOVNAME,
805 .governor = governor_run,
806 .owner = THIS_MODULE,
810 static struct workqueue_struct *pstate_wq;
813 struct work_struct work;
819 static inline void pstate_register_linux_governor(void)
821 cpufreq_register_governor(&stub_governor);
825 static inline void pstate_unregister_linux_governor(void)
827 cpufreq_unregister_governor(&stub_governor);
831 static int pstate_linux_init(void)
833 pstate_register_linux_governor();
834 pstate_wq = create_workqueue("v3vee_pstate_wq");
836 ERROR("Could not create work queue\n");
843 pstate_unregister_linux_governor();
848 static void pstate_linux_deinit(void)
850 pstate_unregister_linux_governor();
851 flush_workqueue(pstate_wq);
852 destroy_workqueue(pstate_wq);
856 static int get_current_governor(char **buf, unsigned int cpu)
858 struct cpufreq_policy * policy = palacios_alloc(sizeof(struct cpufreq_policy));
859 char * govname = NULL;
862 ERROR("could not allocate cpufreq_policy\n");
866 if (cpufreq_get_policy(policy, cpu) != 0) {
867 ERROR("Could not get current cpufreq policy\n");
871 /* We're in interrupt context, should probably not wait here */
872 govname = palacios_alloc(MAX_GOV_NAME_LEN);
874 ERROR("Could not allocate space for governor name\n");
878 strncpy(govname, policy->governor->name, MAX_GOV_NAME_LEN);
880 get_cpu_var(core_state).linux_governor = govname;
881 put_cpu_var(core_state);
885 palacios_free(policy);
890 palacios_free(policy);
895 /* passed to the userspacehelper interface for cleanup */
896 static void gov_switch_cleanup(struct subprocess_info * s)
898 palacios_free(s->argv[2]);
899 palacios_free(s->argv);
905 * @s - the governor to switch to
906 * TODO: this should probably be submitted to a work queue
907 * so we don't have to run it in interrupt context
909 static int governor_switch(char * s, unsigned int cpu)
911 char * path_str = NULL;
914 static char * envp[] = {
917 "PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL };
920 argv = palacios_alloc(4*sizeof(char*));
922 ERROR("Couldn't allocate argv struct\n");
926 path_str = palacios_alloc(MAX_PATH_LEN);
928 ERROR("Couldn't allocate path string\n");
931 memset(path_str, 0, MAX_PATH_LEN);
933 snprintf(path_str, MAX_PATH_LEN, "echo %s > /sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor", s, cpu);
940 /* KCH: we can't wait here to actually see if we succeeded, we're in interrupt context */
941 return call_usermodehelper_fns("/bin/sh", argv, envp, UMH_NO_WAIT, NULL, gov_switch_cleanup, NULL);
949 static inline void free_linux_governor(void)
951 palacios_free(get_cpu_var(core_state).linux_governor);
952 put_cpu_var(core_state);
956 static int linux_setup_palacios_governor(void)
959 unsigned int cpu = get_cpu();
962 /* KCH: we assume the v3vee governor is already
963 * registered with kernel by this point
966 if (get_current_governor(&gov, cpu) < 0) {
967 ERROR("Could not get current governor\n");
971 DEBUG("saving current governor (%s)\n", gov);
973 get_cpu_var(core_state).linux_governor = gov;
974 put_cpu_var(core_state);
976 DEBUG("setting the new governor (%s)\n", PALACIOS_GOVNAME);
978 /* set the new one to ours */
980 if (governor_switch(PALACIOS_GOVNAME, cpu) < 0) {
981 ERROR("Could not set governor to (%s)\n", PALACIOS_GOVNAME);
990 static uint64_t linux_get_pstate(void)
992 struct cpufreq_policy * policy = NULL;
993 struct cpufreq_frequency_table *table;
995 unsigned int count = 0;
996 unsigned int cpu = get_cpu();
1000 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1002 ERROR("Could not allocate policy struct\n");
1006 cpufreq_get_policy(policy, cpu);
1007 table = cpufreq_frequency_get_table(cpu);
1009 for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
1011 if (table[i].frequency == CPUFREQ_ENTRY_INVALID) {
1015 if (table[i].frequency == policy->cur) {
1022 palacios_free(policy);
1029 static uint64_t linux_get_freq(void)
1032 struct cpufreq_policy * policy = NULL;
1033 unsigned int cpu = get_cpu();
1036 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1038 ERROR("Could not allocate policy struct\n");
1042 if (cpufreq_get_policy(policy, cpu)) {
1043 ERROR("Could not get current policy\n");
1049 palacios_free(policy);
1055 pstate_switch_workfn (struct work_struct *work)
1057 pstate_work_t * pwork = (pstate_work_t*)work;
1058 struct cpufreq_policy * policy = NULL;
1060 unsigned int cpu = get_cpu();
1063 mutex_lock(&v3_governor_mutex);
1065 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1067 ERROR("Could not allocate space for cpufreq policy\n");
1071 if (cpufreq_get_policy(policy, cpu) != 0) {
1072 ERROR("Could not get cpufreq policy\n");
1077 get_cpu_var(core_state).set_freq_khz = freq;
1079 if (freq < get_cpu_var(core_state).min_freq_khz) {
1080 freq = get_cpu_var(core_state).min_freq_khz;
1082 if (freq > get_cpu_var(core_state).max_freq_khz) {
1083 freq = get_cpu_var(core_state).max_freq_khz;
1085 put_cpu_var(core_state);
1087 INFO("P-state: requesting frequency change on core %u to %llu\n", cpu, freq);
1088 __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
1091 palacios_free(policy);
1093 palacios_free(work);
1094 mutex_unlock(&v3_governor_mutex);
1098 static int linux_set_pstate(uint64_t p)
1100 struct cpufreq_policy * policy = NULL;
1101 struct cpufreq_frequency_table *table;
1102 pstate_work_t * work = NULL;
1104 unsigned int count = 0;
1107 unsigned int cpu = get_cpu();
1110 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1112 ERROR("Could not allocate policy struct\n");
1116 work = (pstate_work_t*)palacios_alloc(sizeof(pstate_work_t));
1118 ERROR("Could not allocate work struct\n");
1122 if (cpufreq_get_policy(policy, cpu)) {
1123 ERROR("Could not get current policy\n");
1126 table = cpufreq_frequency_get_table(cpu);
1128 for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
1130 if (table[i].frequency == CPUFREQ_ENTRY_INVALID) {
1136 INIT_WORK((struct work_struct*)work, pstate_switch_workfn);
1137 work->freq = table[i].frequency;
1138 queue_work(pstate_wq, (struct work_struct*)work);
1148 /* we need to deal with the case in which we get a number > max pstate */
1150 INIT_WORK((struct work_struct*)work, pstate_switch_workfn);
1151 work->freq = table[last_valid].frequency;
1152 queue_work(pstate_wq, (struct work_struct*)work);
1155 palacios_free(policy);
1159 palacios_free(work);
1161 palacios_free(policy);
1166 static int linux_set_freq(uint64_t f)
1168 struct cpufreq_policy * policy = NULL;
1169 pstate_work_t * work = NULL;
1171 unsigned int cpu = get_cpu();
1174 policy = palacios_alloc(sizeof(struct cpufreq_policy));
1176 ERROR("Could not allocate policy struct\n");
1180 work = (pstate_work_t*)palacios_alloc(sizeof(pstate_work_t));
1182 ERROR("Could not allocate work struct\n");
1186 if (cpufreq_get_policy(policy, cpu) != 0) {
1187 ERROR("Could not get cpufreq policy\n");
1191 if (f < policy->min) {
1193 } else if (f > policy->max) {
1199 INIT_WORK((struct work_struct*)work, pstate_switch_workfn);
1201 queue_work(pstate_wq, (struct work_struct*)work);
1203 palacios_free(policy);
1207 palacios_free(work);
1209 palacios_free(policy);
1214 static int linux_restore_defaults(void)
1217 unsigned int cpu = get_cpu();
1220 gov = get_cpu_var(core_state).linux_governor;
1221 put_cpu_var(core_state);
1223 DEBUG("restoring previous governor (%s)\n", gov);
1225 if (governor_switch(gov, cpu) < 0) {
1226 ERROR("Could not restore governor to (%s)\n", gov);
1230 free_linux_governor();
1234 free_linux_governor();
1240 /******************************************************************
1241 Generic Interface as provided to Palacios and to the rest of the
1243 ******************************************************************/
1245 static void init_core(void)
1248 struct cpufreq_policy *p;
1251 //DEBUG("P-State Core Init\n");
1253 get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL;
1254 get_cpu_var(core_state).cur_pstate = 0;
1256 if (machine_state.funcs) {
1257 get_cpu_var(core_state).min_pstate = machine_state.funcs->get_min_pstate();
1258 get_cpu_var(core_state).max_pstate = machine_state.funcs->get_max_pstate();
1260 get_cpu_var(core_state).min_pstate = 0;
1261 get_cpu_var(core_state).max_pstate = 0;
1265 cpu = get_cpu(); put_cpu();
1267 p = cpufreq_cpu_get(cpu);
1270 get_cpu_var(core_state).have_cpufreq = 0;
1271 get_cpu_var(core_state).min_freq_khz=0;
1272 get_cpu_var(core_state).max_freq_khz=0;
1273 get_cpu_var(core_state).cur_freq_khz=0;
1275 get_cpu_var(core_state).have_cpufreq = 1;
1276 get_cpu_var(core_state).min_freq_khz=p->min;
1277 get_cpu_var(core_state).max_freq_khz=p->max;
1278 get_cpu_var(core_state).cur_freq_khz=p->cur; } cpufreq_cpu_put(p);
1279 put_cpu_var(core_state);
1282 for (i=0;i<get_cpu_var(processors)->performance->state_count; i++) {
1283 INFO("P-State: %u: freq=%llu ctrl=%llx",
1285 get_cpu_var(processors)->performance->states[i].core_frequency*1000,
1286 get_cpu_var(processors)->performance->states[i].control);
1288 put_cpu_var(processors);
1293 void palacios_pstate_ctrl_release(void);
1296 static void deinit_core(void)
1298 DEBUG("P-State Core Deinit\n");
1299 palacios_pstate_ctrl_release();
1305 void palacios_pstate_ctrl_get_chars(struct v3_cpu_pstate_chars *c)
1307 memset(c,0,sizeof(struct v3_cpu_pstate_chars));
1310 c->features = V3_PSTATE_INTERNAL_CONTROL;
1312 if (get_cpu_var(core_state).have_cpufreq) {
1313 c->features |= V3_PSTATE_EXTERNAL_CONTROL;
1316 if (machine_state.arch==AMD || machine_state.arch==INTEL) {
1317 c->features |= V3_PSTATE_DIRECT_CONTROL;
1319 c->cur_mode = get_cpu_var(core_state).mode;
1320 c->min_pstate = get_cpu_var(core_state).min_pstate;
1321 c->max_pstate = get_cpu_var(core_state).max_pstate;
1322 c->cur_pstate = get_cpu_var(core_state).cur_pstate;
1323 c->min_freq_khz = get_cpu_var(core_state).min_freq_khz;
1324 c->max_freq_khz = get_cpu_var(core_state).max_freq_khz;
1325 c->cur_freq_khz = get_cpu_var(core_state).cur_freq_khz;
1327 put_cpu_var(core_state);
1334 uint64_t palacios_pstate_ctrl_get_pstate(void)
1336 if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) {
1337 put_cpu_var(core_state);
1338 return machine_state.funcs->get_pstate();
1339 } else if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1340 put_cpu_var(core_state);
1341 return linux_get_pstate();
1343 put_cpu_var(core_state);
1349 void palacios_pstate_ctrl_set_pstate(uint64_t p)
1351 if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) {
1352 put_cpu_var(core_state);
1353 machine_state.funcs->set_pstate(p);
1354 } else if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1355 put_cpu_var(core_state);
1356 linux_set_pstate(p);
1358 put_cpu_var(core_state);
1363 void palacios_pstate_ctrl_set_pstate_wrapper(void *p)
1365 palacios_pstate_ctrl_set_pstate((uint8_t)(uint64_t)p);
1369 uint64_t palacios_pstate_ctrl_get_freq(void)
1371 if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1372 put_cpu_var(core_state);
1373 return linux_get_freq();
1375 put_cpu_var(core_state);
1381 void palacios_pstate_ctrl_set_freq(uint64_t p)
1383 if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1384 put_cpu_var(core_state);
1387 put_cpu_var(core_state);
1392 static int switch_to_external(void)
1394 DEBUG("switch from host control to external\n");
1396 if (!(get_cpu_var(core_state).have_cpufreq)) {
1397 put_cpu_var(core_state);
1398 ERROR("No cpufreq - cannot switch to external...\n");
1401 put_cpu_var(core_state);
1403 linux_setup_palacios_governor();
1405 get_cpu_var(core_state).mode=V3_PSTATE_EXTERNAL_CONTROL;
1406 put_cpu_var(core_state);
1412 static int switch_to_direct(void)
1414 DEBUG("switch from host control to direct\n");
1416 if (get_cpu_var(core_state).have_cpufreq) {
1417 put_cpu_var(core_state);
1418 DEBUG("switch to direct from cpufreq\n");
1420 // The implementation would set the policy and governor to peg cpu
1421 // regardless of load
1422 linux_setup_palacios_governor();
1424 put_cpu_var(core_state);
1427 if (machine_state.funcs && machine_state.funcs->arch_init) {
1428 get_cpu_var(core_state).mode=V3_PSTATE_DIRECT_CONTROL;
1430 machine_state.funcs->arch_init();
1432 put_cpu_var(core_state);
1439 static int switch_to_internal(void)
1441 DEBUG("switch from host control to internal\n");
1443 if (get_cpu_var(core_state).have_cpufreq) {
1444 put_cpu_var(core_state);
1445 DEBUG("switch to internal on machine with cpu freq\n");
1446 linux_setup_palacios_governor();
1448 put_cpu_var(core_state);
1451 get_cpu_var(core_state).mode=V3_PSTATE_INTERNAL_CONTROL;
1453 put_cpu_var(core_state);
1459 static int switch_from_external(void)
1461 if (!(get_cpu_var(core_state).have_cpufreq)) {
1462 put_cpu_var(core_state);
1463 ERROR("No cpufreq - how did we get here... external...\n");
1466 put_cpu_var(core_state);
1468 DEBUG("Switching back to host control from external\n");
1470 if (get_cpu_var(core_state).have_cpufreq) {
1471 put_cpu_var(core_state);
1472 linux_restore_defaults();
1474 put_cpu_var(core_state);
1477 get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL;
1478 put_cpu_var(core_state);
1484 static int switch_from_direct(void)
1487 DEBUG("Switching back to host control from direct\n");
1489 // Set maximum performance, just in case there is no host control
1490 machine_state.funcs->set_pstate(get_cpu_var(core_state).min_pstate);
1491 machine_state.funcs->arch_deinit();
1493 if (get_cpu_var(core_state).have_cpufreq) {
1494 put_cpu_var(core_state);
1495 linux_restore_defaults();
1497 put_cpu_var(core_state);
1500 get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL;
1502 put_cpu_var(core_state);
1508 static int switch_from_internal(void)
1510 DEBUG("Switching back to host control from internal\n");
1512 if (get_cpu_var(core_state).have_cpufreq) {
1513 put_cpu_var(core_state);
1514 linux_restore_defaults();
1516 put_cpu_var(core_state);
1519 get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL;
1521 put_cpu_var(core_state);
1528 void palacios_pstate_ctrl_acquire(uint32_t type)
1530 if (get_cpu_var(core_state).mode != V3_PSTATE_HOST_CONTROL) {
1531 put_cpu_var(core_state);
1532 palacios_pstate_ctrl_release();
1534 put_cpu_var(core_state);
1538 case V3_PSTATE_EXTERNAL_CONTROL:
1539 switch_to_external();
1541 case V3_PSTATE_DIRECT_CONTROL:
1544 case V3_PSTATE_INTERNAL_CONTROL:
1545 switch_to_internal();
1548 ERROR("Unknown pstate control type %u\n",type);
1554 // Wrappers for xcalls
1555 static void palacios_pstate_ctrl_acquire_external(void)
1557 palacios_pstate_ctrl_acquire(V3_PSTATE_EXTERNAL_CONTROL);
1560 static void palacios_pstate_ctrl_acquire_direct(void)
1562 palacios_pstate_ctrl_acquire(V3_PSTATE_DIRECT_CONTROL);
1566 void palacios_pstate_ctrl_release(void)
1568 if (get_cpu_var(core_state).mode == V3_PSTATE_HOST_CONTROL) {
1569 put_cpu_var(core_state);
1572 put_cpu_var(core_state);
1574 switch (get_cpu_var(core_state).mode) {
1575 case V3_PSTATE_EXTERNAL_CONTROL:
1576 put_cpu_var(core_state);
1577 switch_from_external();
1579 case V3_PSTATE_DIRECT_CONTROL:
1580 put_cpu_var(core_state);
1581 switch_from_direct();
1583 case V3_PSTATE_INTERNAL_CONTROL:
1584 put_cpu_var(core_state);
1585 switch_from_internal();
1588 put_cpu_var(core_state);
1589 ERROR("Unknown pstate control type %u\n",core_state.mode);
1595 static void update_hw_pstate(void *arg)
1597 if (machine_state.funcs && machine_state.funcs->get_pstate) {
1598 get_cpu_var(core_state).cur_hw_pstate = machine_state.funcs->get_pstate();
1599 put_cpu_var(core_state);
1601 get_cpu_var(core_state).cur_hw_pstate = 0;
1602 put_cpu_var(core_state);
1607 /***************************************************************************
1608 PROC Interface to expose state
1609 ***************************************************************************/
1611 static int pstate_show(struct seq_file * file, void * v)
1614 unsigned int numcpus = num_online_cpus();
1616 seq_printf(file, "V3VEE DVFS Status\n\n");
1618 for (cpu=0;cpu<numcpus;cpu++) {
1619 palacios_xcall(cpu,update_hw_pstate,0);
1622 for (cpu=0;cpu<numcpus;cpu++) {
1623 struct pstate_core_info *s = &per_cpu(core_state,cpu);
1624 seq_printf(file,"pcore %u: hw pstate 0x%llx mode %s ",cpu,
1626 s->mode==V3_PSTATE_HOST_CONTROL ? "host" :
1627 s->mode==V3_PSTATE_EXTERNAL_CONTROL ? "external" :
1628 s->mode==V3_PSTATE_DIRECT_CONTROL ? "direct" :
1629 s->mode==V3_PSTATE_INTERNAL_CONTROL ? "internal" : "UNKNOWN");
1630 if (s->mode==V3_PSTATE_EXTERNAL_CONTROL) {
1631 seq_printf(file,"(min=%llu max=%llu cur=%llu) ", s->min_freq_khz, s->max_freq_khz, s->cur_freq_khz);
1633 if (s->mode==V3_PSTATE_DIRECT_CONTROL) {
1634 seq_printf(file,"(min=%llu max=%llu cur=%llu) ",s->min_pstate, s->max_pstate, s->cur_pstate);
1636 seq_printf(file,"\n");
1641 static int pstate_open(struct inode * inode, struct file * file)
1643 return single_open(file, pstate_show, NULL);
1647 static struct file_operations pstate_fops = {
1648 .owner = THIS_MODULE,
1649 .open = pstate_open,
1651 .llseek = seq_lseek,
1652 .release = seq_release
1655 static int pstate_hw_show(struct seq_file * file, void * v)
1659 seq_printf(file, "V3VEE DVFS Hardware Info\n(all logical cores assumed identical)\n\n");
1661 seq_printf(file, "Arch: \t%s\n"
1663 machine_state.arch==INTEL ? "Intel" :
1664 machine_state.arch==AMD ? "AMD" : "Other",
1665 machine_state.supports_pstates ? "Yes" : "No");
1668 #define YN(x) ((x) ? "Y" : "N")
1670 if (machine_state.arch==INTEL) {
1671 seq_printf(file,"SpeedStep: \t%s\n",YN(machine_state.have_speedstep));
1672 seq_printf(file,"APERF/MPERF: \t%s\n",YN(machine_state.have_pstate_hw_coord));
1673 seq_printf(file,"IDA or TurboCore: \t%s\n",YN(machine_state.have_opportunistic));
1674 seq_printf(file,"Policy Hint: \t%s\n",YN(machine_state.have_policy_hint));
1675 seq_printf(file,"Hardware Policy: \t%s\n",YN(machine_state.have_hwp));
1676 seq_printf(file,"Hardware Duty Cycle: \t%s\n",YN(machine_state.have_hdc));
1677 seq_printf(file,"MWAIT extensions: \t%s\n",YN(machine_state.have_mwait_ext));
1678 seq_printf(file,"MWAIT wake on intr: \t%s\n",YN(machine_state.have_mwait_int));
1681 if (machine_state.arch==AMD) {
1682 seq_printf(file,"PState: \t%s\n",YN(machine_state.have_pstate));
1683 seq_printf(file,"APERF/MPERF: \t%s\n",YN(machine_state.have_pstate_hw_coord));
1684 seq_printf(file,"CoreBoost: \t%s\n",YN(machine_state.have_coreboost));
1685 seq_printf(file,"Feedback: \t%s\n",YN(machine_state.have_feedback));
1689 seq_printf(file,"\nPstate\tCtrl\tKHz\tmW\tuS(X)\tuS(B)\n");
1690 numstates = get_cpu_var(processors)->performance->state_count;
1692 seq_printf(file,"UNKNOWN\n");
1695 for (i=0;i<numstates;i++) {
1697 "%u\t%llx\t%llu\t%llu\t%llu\t%llu\n",
1699 get_cpu_var(processors)->performance->states[i].control,
1700 get_cpu_var(processors)->performance->states[i].core_frequency*1000,
1701 get_cpu_var(processors)->performance->states[i].power,
1702 get_cpu_var(processors)->performance->states[i].transition_latency,
1703 get_cpu_var(processors)->performance->states[i].bus_master_latency);
1706 put_cpu_var(processors);
1708 seq_printf(file,"\nAvailable Modes:");
1709 seq_printf(file," host");
1710 if (get_cpu_var(core_state).have_cpufreq) {
1711 seq_printf(file," external");
1713 put_cpu_var(core_state);
1714 if (machine_state.supports_pstates) {
1715 seq_printf(file," direct");
1717 seq_printf(file," internal\n");
1722 static int pstate_hw_open(struct inode * inode, struct file * file)
1724 return single_open(file, pstate_hw_show, NULL);
1728 static struct file_operations pstate_hw_fops = {
1729 .owner = THIS_MODULE,
1730 .open = pstate_hw_open,
1732 .llseek = seq_lseek,
1733 .release = seq_release
1737 int pstate_proc_setup(void)
1739 struct proc_dir_entry *proc;
1740 struct proc_dir_entry *prochw;
1742 proc = create_proc_entry("v3-dvfs",0444, palacios_get_procdir());
1745 ERROR("Failed to create proc entry for p-state control\n");
1749 proc->proc_fops = &pstate_fops;
1751 INFO("/proc/v3vee/v3-dvfs successfully created\n");
1753 prochw = create_proc_entry("v3-dvfs-hw",0444,palacios_get_procdir());
1757 ERROR("Failed to create proc entry for p-state hw info\n");
1761 prochw->proc_fops = &pstate_hw_fops;
1763 INFO("/proc/v3vee/v3-dvfs-hw successfully created\n");
1768 void pstate_proc_teardown(void)
1770 remove_proc_entry("v3-dvfs-hw",palacios_get_procdir());
1771 remove_proc_entry("v3-dvfs",palacios_get_procdir());
1774 /********************************************************************
1775 User interface (ioctls)
1776 ********************************************************************/
1778 static int dvfs_ctrl(unsigned int cmd, unsigned long arg)
1780 struct v3_dvfs_ctrl_request r;
1782 if (copy_from_user(&r,(void __user*)arg,sizeof(struct v3_dvfs_ctrl_request))) {
1783 ERROR("Failed to copy DVFS request from user\n");
1787 if (r.pcore >= num_online_cpus()) {
1788 ERROR("Cannot apply DVFS request to pcore %u\n",r.pcore);
1793 case V3_DVFS_ACQUIRE: {
1794 switch (r.acq_type) {
1795 case V3_DVFS_EXTERNAL:
1796 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_external, NULL);
1799 case V3_DVFS_DIRECT:
1800 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_direct, NULL);
1804 ERROR("Unknown DVFS acquire type %u\n",r.acq_type);
1809 case V3_DVFS_RELEASE: {
1810 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_release, NULL);
1814 case V3_DVFS_SETFREQ: {
1815 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_freq,(void*)r.freq_khz);
1819 case V3_DVFS_SETPSTATE: {
1820 palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_pstate_wrapper,(void*)(uint64_t)r.pstate);
1824 ERROR("Unknown DVFS command %u\n",r.cmd);
1832 void pstate_user_setup(void)
1834 add_global_ctrl(V3_DVFS_CTRL, dvfs_ctrl);
1838 void pstate_user_teardown(void)
1840 remove_global_ctrl(V3_DVFS_CTRL);
1843 static struct v3_host_pstate_ctrl_iface hooks = {
1844 .get_chars = palacios_pstate_ctrl_get_chars,
1845 .acquire = palacios_pstate_ctrl_acquire,
1846 .release = palacios_pstate_ctrl_release,
1847 .set_pstate = palacios_pstate_ctrl_set_pstate,
1848 .get_pstate = palacios_pstate_ctrl_get_pstate,
1849 .set_freq = palacios_pstate_ctrl_set_freq,
1850 .get_freq = palacios_pstate_ctrl_get_freq,
1855 static int pstate_ctrl_init(void)
1858 unsigned int numcpus = num_online_cpus();
1860 pstate_arch_setup();
1862 for (cpu=0;cpu<numcpus;cpu++) {
1863 palacios_xcall(cpu,(void ((*)(void*)))init_core,0);
1866 V3_Init_Pstate_Ctrl(&hooks);
1868 if (pstate_proc_setup()) {
1869 ERROR("Unable to initialize P-State Control\n");
1873 pstate_user_setup();
1875 pstate_linux_init();
1877 INFO("P-State Control Initialized\n");
1882 static int pstate_ctrl_deinit(void)
1885 unsigned int numcpus=num_online_cpus();
1887 pstate_linux_deinit();
1889 pstate_user_teardown();
1891 pstate_proc_teardown();
1893 // release pstate control if we have it, and we need to do this on each processor
1894 for (cpu=0;cpu<numcpus;cpu++) {
1895 palacios_xcall(cpu,(void (*)(void *))deinit_core,0);
1899 // Free any mapping table we built for Intel
1900 if (intel_pstate_to_ctrl && intel_pstate_to_ctrl != intel_pstate_to_ctrl_internal) {
1901 palacios_free(intel_pstate_to_ctrl);
1909 static struct linux_ext pstate_ext = {
1910 .name = "PSTATE_CTRL",
1911 .init = pstate_ctrl_init,
1912 .deinit = pstate_ctrl_deinit,
1914 .guest_deinit = NULL,
1918 register_extension(&pstate_ext);