From: Kyle Hale Date: Sat, 23 Aug 2014 03:40:10 +0000 (-0500) Subject: added linux cpufreq interface to dvfs code. X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=d3fa9d0b82291fafc35ce9bb7e777256d78c0ecd;p=palacios.git added linux cpufreq interface to dvfs code. Writes (setting of frequency/governors) currently occur in sysfs via linux userspace helper API. Reads occur through the cpufreq policy interface. --- diff --git a/linux_module/iface-pstate-ctrl.c b/linux_module/iface-pstate-ctrl.c index 2c0aa9b..ac06b20 100644 --- a/linux_module/iface-pstate-ctrl.c +++ b/linux_module/iface-pstate-ctrl.c @@ -21,7 +21,11 @@ #include #include #include +#include #include +#include +#include +#include #include #include #include @@ -34,20 +38,23 @@ #include "linux-exts.h" /* - This P-STATE control implementation includes: + This P-STATE control implementation includes: - - Direct control of Intel and AMD processor pstates - - External control of processor states via Linux (unimplemented) - - Internal control of processor states in Palacios (handoff from Linux) + - Direct control of Intel and AMD processor pstates + - External control of processor states via Linux (unimplemented) + - Internal control of processor states in Palacios (handoff from Linux) - Additionally, it provides a user-space interface for manipulating - p-state regardless of the host's functionality. This includes - an ioctl for commanding the implementation and a /proc file for - showing current status and capabilities. + Additionally, it provides a user-space interface for manipulating + p-state regardless of the host's functionality. This includes + an ioctl for commanding the implementation and a /proc file for + showing current status and capabilities. */ +#define PALACIOS_GOVNAME "v3vee" +#define MAX_PATH_LEN 128 +#define MAX_GOV_NAME_LEN 16 struct pstate_core_info { @@ -58,7 +65,7 @@ struct pstate_core_info { // V3_PSTATE_DIRECT_CONTROL // V3_PSTATE_INTERNAL_CONTROL uint32_t mode; - + // Apply if we are under the DIRECT state uint8_t cur_pstate; uint8_t max_pstate; @@ -70,20 +77,26 @@ struct pstate_core_info { uint64_t cur_freq_khz; uint64_t max_freq_khz; uint64_t min_freq_khz; - + // Intel-specific uint8_t prior_speedstep; uint8_t turbo_disabled; uint8_t no_turbo; - + int have_cpufreq; - + + // This is where we stash Linux's governor when we make a mode switch + char * linux_governor; + // We have this so we can restore the original frequency when we started + uint64_t original_hz; + }; static DEFINE_PER_CPU(struct pstate_core_info, core_state); + // These are used to assert DIRECT control over the core pstates struct pstate_core_funcs { void (*arch_init)(void); @@ -125,8 +138,8 @@ static struct pstate_machine_info machine_state; /**************************************************** - AMD DIRECT CONTROL -***************************************************/ + AMD DIRECT CONTROL + ***************************************************/ /* AMD Programmer's Manual Vol 2 (Rev 3, 2013), Sec. 17.1, pp.557 */ #define MSR_PSTATE_LIMIT_REG_AMD 0xc0010061 @@ -181,26 +194,29 @@ static uint8_t supports_pstates_amd (void) machine_state.have_pstate_hw_coord = !!(ecx & 1); INFO("P-State: AMD: Pstates=%d Coreboost=%d Feedback=%d PstateHWCoord=%d\n", - machine_state.have_pstate, - machine_state.have_coreboost, - machine_state.have_feedback, - machine_state.have_pstate_hw_coord); - + machine_state.have_pstate, + machine_state.have_coreboost, + machine_state.have_feedback, + machine_state.have_pstate_hw_coord); + return machine_state.have_pstate; - - + + } + static void init_arch_amd(void) { /* KCH: nothing to do here */ } + static void deinit_arch_amd(void) { /* KCH: nothing to do here */ } + static uint8_t get_pstate_amd(void) { struct p_state_stat_reg_amd pstat; @@ -213,6 +229,7 @@ static uint8_t get_pstate_amd(void) return pstat.reg.pstate; } + static void set_pstate_amd(uint8_t p) { struct p_state_ctl_reg_amd pctl; @@ -225,6 +242,7 @@ static void set_pstate_amd(uint8_t p) put_cpu_var(core_state); } + /* * NOTE: HW may change this value at runtime */ @@ -262,14 +280,14 @@ static struct pstate_core_funcs amd_funcs = /*********************************************************** INTEL DIRECT CONTROL -**********************************************************/ + **********************************************************/ /* - This implementation uses SpeedStep, but does check - to see if the other features (MPERF/APERF, Turbo/IDA, HWP) - are available. -*/ + This implementation uses SpeedStep, but does check + to see if the other features (MPERF/APERF, Turbo/IDA, HWP) + are available. + */ /* Intel System Programmer's Manual Vol. 3B, 14-2 */ #define MSR_MPERF_IA32 0x000000e7 @@ -292,24 +310,24 @@ static struct pstate_core_funcs amd_funcs = "Often", the 16 bit field consists of a high order byte which is the frequency (the multiplier) and the low order byte is the voltage. -*/ + */ // MSR_PERF_CTL_IA32 r/w struct perf_ctl_reg_intel { union { uint64_t val; struct { - // This is the target - // Note, not the ACPI pstate, but - // Intel's notion of pstate is that it's opaque - // for lots of implementations it seems to be - // frequency_id : voltage_id - // where frequency_id is typically the multiplier - uint16_t pstate : 16; - uint16_t reserved : 16; - // set to 1 to *disengage* dynamic acceleration - // Note that "IDA" and "Turbo" use the same interface - uint16_t dynamic_accel_disable : 1; - uint32_t reserved2 : 31; + // This is the target + // Note, not the ACPI pstate, but + // Intel's notion of pstate is that it's opaque + // for lots of implementations it seems to be + // frequency_id : voltage_id + // where frequency_id is typically the multiplier + uint16_t pstate : 16; + uint16_t reserved : 16; + // set to 1 to *disengage* dynamic acceleration + // Note that "IDA" and "Turbo" use the same interface + uint16_t dynamic_accel_disable : 1; + uint32_t reserved2 : 31; } reg; } __attribute__((packed)); } __attribute__((packed)); @@ -319,9 +337,9 @@ struct perf_stat_reg_intel { union { uint64_t val; struct { - // this is the current - uint16_t pstate : 16; - uint64_t reserved : 48; + // this is the current + uint16_t pstate : 16; + uint64_t reserved : 48; } reg; } __attribute__((packed)); } __attribute__((packed)); @@ -331,9 +349,9 @@ struct enery_perf_bias_reg_intel { union { uint64_t val; struct { - // this is the current - uint8_t policy_hint : 4; - uint64_t reserved : 60; + // this is the current + uint8_t policy_hint : 4; + uint64_t reserved : 60; } reg; } __attribute__((packed)); } __attribute__((packed)); @@ -346,8 +364,8 @@ struct turbo_mode_info_reg_intel { uint8_t rsvd0 : 8; uint8_t max_noturbo_ratio : 8; uint8_t rsvd1 : 7; - uint8_t ppin_cap : 1; - uint8_t rsvd2 : 4; + uint8_t ppin_cap : 1; + uint8_t rsvd2 : 4; uint8_t ratio_limit : 1; uint8_t tdc_tdp_limit : 1; uint16_t rsvd3 : 10; @@ -356,13 +374,13 @@ struct turbo_mode_info_reg_intel { } reg; } __attribute__((packed)); } __attribute__((packed)); - + /* CPUID.01:ECX.AES(7) */ static uint8_t supports_pstates_intel(void) { /* NOTE: CPUID.06H:ECX.SETBH[bit 3] is set and it also implies the presence of a new architectural MSR called IA32_ENERGY_PERF_BIAS (1B0H). - */ + */ uint32_t eax, ebx, ecx, edx; cpuid(0x1, &eax, &ebx, &ecx, &edx); @@ -381,14 +399,14 @@ static uint8_t supports_pstates_intel(void) INFO("P-State: Intel: Speedstep=%d, PstateHWCoord=%d, Opportunistic=%d PolicyHint=%d HWP=%d HDC=%d, MwaitExt=%d MwaitInt=%d \n", - machine_state.have_speedstep, - machine_state.have_pstate_hw_coord, - machine_state.have_opportunistic, - machine_state.have_policy_hint, - machine_state.have_hwp, - machine_state.have_hdc, - machine_state.have_mwait_ext, - machine_state.have_mwait_int ); + machine_state.have_speedstep, + machine_state.have_pstate_hw_coord, + machine_state.have_opportunistic, + machine_state.have_policy_hint, + machine_state.have_hwp, + machine_state.have_hdc, + machine_state.have_mwait_ext, + machine_state.have_mwait_int ); return machine_state.have_speedstep; } @@ -421,7 +439,7 @@ static void deinit_arch_intel(void) put_cpu_var(core_state); wrmsrl(MSR_MISC_ENABLE_IA32, val); - + } /* TODO: Intel P-states require sampling at intervals... */ @@ -446,16 +464,16 @@ static uint8_t get_pstate_intel(void) return (uint8_t) (pstate>>8); } - + static void set_pstate_intel(uint8_t p) { uint64_t val; /* ...Intel IDA (dynamic acceleration) - if (c->no_turbo && !c->turbo_disabled) { - val |= 1 << 32; - } - */ + if (c->no_turbo && !c->turbo_disabled) { + val |= 1 << 32; + } + */ // leave all bits along expect for the likely // fid bits @@ -486,7 +504,7 @@ static uint8_t get_min_pstate_intel(void) static uint8_t get_max_pstate_intel (void) { struct turbo_mode_info_reg_intel t; - + rdmsrl(MSR_PLATFORM_INFO_IA32, t.val); return t.reg.max_noturbo_ratio; @@ -506,70 +524,70 @@ static struct pstate_core_funcs intel_funcs = /*********************************************** Arch determination and setup -***********************************************/ - + ***********************************************/ + static inline void cpuid_string (uint32_t id, uint32_t dest[4]) { asm volatile("cpuid" - :"=a"(*dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3)) - :"a"(id)); + :"=a"(*dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3)) + :"a"(id)); } - + static int get_cpu_vendor (char name[13]) { uint32_t dest[4]; uint32_t maxid; - + cpuid_string(0,dest); maxid=dest[0]; ((uint32_t*)name)[0]=dest[1]; ((uint32_t*)name)[1]=dest[3]; ((uint32_t*)name)[2]=dest[2]; name[12]=0; - + return maxid; } static int is_intel (void) { - char name[13]; - get_cpu_vendor(name); - return !strcmp(name,"GenuineIntel"); + char name[13]; + get_cpu_vendor(name); + return !strcmp(name,"GenuineIntel"); } static int is_amd (void) { - char name[13]; - get_cpu_vendor(name); - return !strcmp(name,"AuthenticAMD"); + char name[13]; + get_cpu_vendor(name); + return !strcmp(name,"AuthenticAMD"); } static int pstate_arch_setup(void) { - + if (is_amd()) { machine_state.arch = AMD; machine_state.funcs = &amd_funcs; - machine_state.supports_pstates = supports_pstates_amd(); - INFO("PSTATE: P-State initialized for AMD\n"); + machine_state.supports_pstates = supports_pstates_amd(); + INFO("PSTATE: P-State initialized for AMD\n"); } else if (is_intel()) { machine_state.arch = INTEL; machine_state.funcs = &intel_funcs; - machine_state.supports_pstates = supports_pstates_intel(); + machine_state.supports_pstates = supports_pstates_intel(); INFO("PSTATE: P-State initialized for INTEL (Work in progress...)\n"); return 0; - + } else { - machine_state.arch = OTHER; - machine_state.funcs = NULL; - machine_state.supports_pstates = 0; + machine_state.arch = OTHER; + machine_state.funcs = NULL; + machine_state.supports_pstates = 0; INFO("PSTATE: P-state control: No support for direct control on this architecture\n"); return 0; } - + return 0; } @@ -577,64 +595,356 @@ static int pstate_arch_setup(void) /****************************************************************** Linux Interface -*****************************************************************/ + *****************************************************************/ -#if 0 -// The purpose of the stub governor is the pretend to keep -// the processor at the maximum frequency, while we manipulate he -// processor ccre directly + +/* + * This stub governor is simply a placeholder for preventing + * frequency changes from the Linux side. For now, we simply leave + * the frequency as is when we acquire control. + */ static int governor_run(struct cpufreq_policy *policy, unsigned int event) { - switch (event) { - case CPUFREQ_GOV_START: - case CPUFREQ_GOV_STOP: - cpu_freq_driver_target(policy, policy->max_freq); - case CPUFREQ_GOV_LIMITS: + switch (event) { + /* we can't use cpufreq_driver_target here as it can result + * in a circular dependency, so we'll just do nothing. + */ + case CPUFREQ_GOV_START: + case CPUFREQ_GOV_STOP: + case CPUFREQ_GOV_LIMITS: + /* do nothing */ + break; + default: + ERROR("Undefined governor command\n"); + return -1; } + + return 0; } + static struct cpufreq_governor stub_governor = { - .name="PALACIOS_STUB", - .governor=governor_run, - .owner=.THIS_MODULE, + .name = PALACIOS_GOVNAME, + .governor = governor_run, + .owner = THIS_MODULE, +}; + + +static inline void pstate_register_linux_governor(void) +{ + cpufreq_register_governor(&stub_governor); +} + + +static inline void pstate_unregister_linux_governor(void) +{ + cpufreq_unregister_governor(&stub_governor); } -static void linux_init(void) + +static int get_current_governor(char **buf, unsigned int cpu) { - // get_policy - // - // change to userspace governor - or change to our do nothing governor? (call set_speed) - // stash the old governor - // tell governor to do max freq + struct cpufreq_policy * policy = palacios_alloc(sizeof(struct cpufreq_policy)); + char * govname = NULL; + + if (!policy) { + ERROR("could not allocate cpufreq_policy\n"); + return -1; + } + + if (cpufreq_get_policy(policy, cpu) != 0) { + ERROR("Could not get current cpufreq policy\n"); + goto out_err; + } + + /* We're in interrupt context, should probably not wait here */ + govname = palacios_alloc(MAX_GOV_NAME_LEN); + if (!govname) { + ERROR("Could not allocate space for governor name\n"); + goto out_err; + } + + strncpy(govname, policy->governor->name, MAX_GOV_NAME_LEN); + + get_cpu_var(core_state).linux_governor = govname; + put_cpu_var(core_state); + + *buf = govname; + palacios_free(policy); + + return 0; + +out_err: + palacios_free(policy); + return -1; } -static void linux_deinit(void) + +/* passed to the userspacehelper interface for cleanup */ +static void gov_switch_cleanup(struct subprocess_info * s) { + palacios_free(s->argv[2]); + palacios_free(s->argv); } -static uint8_t linux_get_pstate(void) + +/* + * Switch governors + * @s - the governor to switch to + */ +static int governor_switch(char * s, unsigned int cpu) { - return 0; + char * path_str = NULL; + char ** argv = NULL; + + static char * envp[] = { + "HOME=/", + "TERM=linux", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL }; + + + argv = palacios_alloc(4*sizeof(char*)); + if (!argv) { + ERROR("Couldn't allocate argv struct\n"); + return -1; + } + + path_str = palacios_alloc(MAX_PATH_LEN); + if (!path_str) { + ERROR("Couldn't allocate path string\n"); + goto out_freeargv; + } + memset(path_str, 0, MAX_PATH_LEN); + + snprintf(path_str, MAX_PATH_LEN, "echo %s > /sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor", s, cpu); + + argv[0] = "/bin/sh"; + argv[1] = "-c"; + argv[2] = path_str; + argv[3] = NULL; + + /* KCH: we can't wait here to actually see if we succeeded, we're in interrupt context */ + return call_usermodehelper_fns("/bin/sh", argv, envp, UMH_NO_WAIT, NULL, gov_switch_cleanup, NULL); + +out_freeargv: + palacios_free(argv); + return -1; } -static void linux_set_pstate(uint8_t p) + +static inline void free_linux_governor(void) { + palacios_free(get_cpu_var(core_state).linux_governor); + put_cpu_var(core_state); } -static void linux_restore_defaults(void) + +static int linux_setup_palacios_governor(void) { + char * gov; + unsigned int cpu = get_cpu(); + + /* KCH: we assume the v3vee governor is already + * registered with kernel by this point + */ + + if (get_current_governor(&gov, cpu) < 0) { + ERROR("Could not get current governor\n"); + return -1; + } + + DEBUG("saving current governor (%s)\n", gov); + + get_cpu_var(core_state).linux_governor = gov; + put_cpu_var(core_state); + + DEBUG("setting the new governor (%s)\n", PALACIOS_GOVNAME); + + /* set the new one to ours */ + if (governor_switch(PALACIOS_GOVNAME, cpu) < 0) { + ERROR("Could not set governor to (%s)\n", PALACIOS_GOVNAME); + return -1; + } + + return 0; } + +#if 0 +static int linux_deinit(void) +{ + return 0; +} #endif +static int linux_get_pstate(void) +{ + struct cpufreq_policy * policy = NULL; + struct cpufreq_frequency_table *table; + int cpu = get_cpu(); + unsigned int i = 0; + unsigned int count = 0; + + policy = palacios_alloc(sizeof(struct cpufreq_policy)); + if (!policy) { + ERROR("Could not allocate policy struct\n"); + return -1; + } + + cpufreq_get_policy(policy, cpu); + table = cpufreq_frequency_get_table(cpu); + + for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { + + if (table[i].frequency == CPUFREQ_ENTRY_INVALID) { + continue; + } + + if (table[i].frequency == policy->cur) { + break; + } + + count++; + } + + palacios_free(policy); + return count; +} + + +static int linux_get_freq(void) +{ + struct cpufreq_policy * policy = NULL; + int cpu = get_cpu(); + + policy = palacios_alloc(sizeof(struct cpufreq_policy)); + if (!policy) { + ERROR("Could not allocate policy struct\n"); + return -1; + } + + if (cpufreq_get_policy(policy, cpu)) { + ERROR("Could not get current policy\n"); + return -1; + } + + return policy->cur; +} + + +static int linux_set_pstate(uint8_t p) +{ + struct cpufreq_policy * policy = NULL; + struct cpufreq_frequency_table *table; + int cpu = get_cpu(); + unsigned int i = 0; + unsigned int count = 0; + int state_set = 0; + int last_valid = 0; + + policy = palacios_alloc(sizeof(struct cpufreq_policy)); + if (!policy) { + ERROR("Could not allocate policy struct\n"); + return -1; + } + + if (cpufreq_get_policy(policy, cpu)) { + ERROR("Could not get current policy\n"); + goto out_err; + } + table = cpufreq_frequency_get_table(cpu); + + for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { + + if (table[i].frequency == CPUFREQ_ENTRY_INVALID) { + continue; + } + + if (count == p) { + cpufreq_driver_target(policy, table[i].frequency, CPUFREQ_RELATION_H); + state_set = 1; + } + + count++; + last_valid = i; + } + + /* we need to deal with the case in which we get a number > max pstate */ + if (!state_set) { + cpufreq_driver_target(policy, table[last_valid].frequency, CPUFREQ_RELATION_H); + } + + palacios_free(policy); + return 0; + +out_err: + palacios_free(policy); + return -1; +} + + +static int linux_set_freq(uint64_t f) +{ + struct cpufreq_policy * policy = NULL; + int cpu = get_cpu(); + uint64_t freq; + + policy = palacios_alloc(sizeof(struct cpufreq_policy)); + if (!policy) { + ERROR("Could not allocate policy struct\n"); + return -1; + } + + cpufreq_get_policy(policy, cpu); + + if (f < policy->min) { + freq = policy->min; + } else if (f > policy->max) { + freq = policy->max; + } else { + freq = f; + } + + cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_H); + + palacios_free(policy); + return 0; +} + + +static int linux_restore_defaults(void) +{ + unsigned int cpu = get_cpu(); + char * gov = NULL; + + gov = get_cpu_var(core_state).linux_governor; + put_cpu_var(core_state); + + DEBUG("restoring previous governor (%s)\n", gov); + + if (governor_switch(gov, cpu) < 0) { + ERROR("Could not restore governor to (%s)\n", gov); + goto out_err; + } + + free_linux_governor(); + return 0; + +out_err: + free_linux_governor(); + return -1; +} + + + /****************************************************************** Generic Interface as provided to Palacios and to the rest of the module -******************************************************************/ + ******************************************************************/ static void init_core(void) { @@ -646,13 +956,13 @@ static void init_core(void) get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL; get_cpu_var(core_state).cur_pstate = 0; - + if (machine_state.funcs) { - get_cpu_var(core_state).min_pstate = machine_state.funcs->get_min_pstate(); - get_cpu_var(core_state).max_pstate = machine_state.funcs->get_max_pstate(); + get_cpu_var(core_state).min_pstate = machine_state.funcs->get_min_pstate(); + get_cpu_var(core_state).max_pstate = machine_state.funcs->get_max_pstate(); } else { - get_cpu_var(core_state).min_pstate = 0; - get_cpu_var(core_state).max_pstate = 0; + get_cpu_var(core_state).min_pstate = 0; + get_cpu_var(core_state).max_pstate = 0; } @@ -661,20 +971,20 @@ static void init_core(void) p = cpufreq_cpu_get(cpu); if (!p) { - get_cpu_var(core_state).have_cpufreq = 0; - get_cpu_var(core_state).min_freq_khz=0; - get_cpu_var(core_state).max_freq_khz=0; - get_cpu_var(core_state).cur_freq_khz=0; + get_cpu_var(core_state).have_cpufreq = 0; + get_cpu_var(core_state).min_freq_khz=0; + get_cpu_var(core_state).max_freq_khz=0; + get_cpu_var(core_state).cur_freq_khz=0; } else { - get_cpu_var(core_state).have_cpufreq = 1; - get_cpu_var(core_state).min_freq_khz=p->min; - get_cpu_var(core_state).max_freq_khz=p->max; - get_cpu_var(core_state).cur_freq_khz=p->cur; - cpufreq_cpu_put(p); + get_cpu_var(core_state).have_cpufreq = 1; + get_cpu_var(core_state).min_freq_khz=p->min; + get_cpu_var(core_state).max_freq_khz=p->max; + get_cpu_var(core_state).cur_freq_khz=p->cur; + cpufreq_cpu_put(p); } put_cpu_var(core_state); - + } @@ -683,7 +993,9 @@ void palacios_pstate_ctrl_release(void); static void deinit_core(void) { + int cpu; DEBUG("P-State Core Deinit\n"); + cpu = get_cpu(); palacios_pstate_ctrl_release(); } @@ -692,16 +1004,16 @@ static void deinit_core(void) void palacios_pstate_ctrl_get_chars(struct v3_cpu_pstate_chars *c) { memset(c,0,sizeof(struct v3_cpu_pstate_chars)); - + c->features = V3_PSTATE_INTERNAL_CONTROL; if (get_cpu_var(core_state).have_cpufreq) { - c->features |= V3_PSTATE_EXTERNAL_CONTROL; + c->features |= V3_PSTATE_EXTERNAL_CONTROL; } if (machine_state.arch==AMD || machine_state.arch==INTEL) { - c->features |= V3_PSTATE_DIRECT_CONTROL; + c->features |= V3_PSTATE_DIRECT_CONTROL; } c->cur_mode = get_cpu_var(core_state).mode; c->min_pstate = get_cpu_var(core_state).min_pstate; @@ -713,27 +1025,34 @@ void palacios_pstate_ctrl_get_chars(struct v3_cpu_pstate_chars *c) put_cpu_var(core_state); - - + + } uint8_t palacios_pstate_ctrl_get_pstate(void) { if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) { - put_cpu_var(core_state); - return machine_state.funcs->get_pstate(); + put_cpu_var(core_state); + return machine_state.funcs->get_pstate(); + } else if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) { + put_cpu_var(core_state); + return linux_get_pstate(); } else { - put_cpu_var(core_state); - return 0; + put_cpu_var(core_state); + return 0; } } + void palacios_pstate_ctrl_set_pstate(uint8_t p) { if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) { - put_cpu_var(core_state); - machine_state.funcs->set_pstate(p); + put_cpu_var(core_state); + machine_state.funcs->set_pstate(p); + } else if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) { + put_cpu_var(core_state); + linux_set_pstate(p); } } @@ -743,128 +1062,135 @@ void palacios_pstate_ctrl_set_pstate_wrapper(void *p) palacios_pstate_ctrl_set_pstate((uint8_t)(uint64_t)p); } + uint64_t palacios_pstate_ctrl_get_freq(void) { if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) { - put_cpu_var(core_state); - ERROR("Unimplemented get freq\n"); - return 0; + put_cpu_var(core_state); + return linux_get_freq(); } else { - put_cpu_var(core_state); - return 0; + put_cpu_var(core_state); + return 0; } } + void palacios_pstate_ctrl_set_freq(uint64_t p) { if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) { - put_cpu_var(core_state); - ERROR("Unimplemented set freq\n"); + put_cpu_var(core_state); + linux_set_freq(p); } put_cpu_var(core_state); - } -static void switch_to_external(void) +static int switch_to_external(void) { if (!(get_cpu_var(core_state).have_cpufreq)) { - put_cpu_var(core_state); - ERROR("No cpufreq - cannot switch to external...\n"); - return; + put_cpu_var(core_state); + ERROR("No cpufreq - cannot switch to external...\n"); + return -1; } put_cpu_var(core_state); - ERROR("Unimplemented switch to external...\n"); + DEBUG("Switching to external control\n"); + return linux_restore_defaults(); } - -static void switch_to_direct(void) + + +static int switch_to_direct(void) { if (get_cpu_var(core_state).have_cpufreq) { - put_cpu_var(core_state); - ERROR("Unimplemented: switch to direct on machine with cpu freq\n"); - // The implementation would set the policy and governor to peg cpu - // regardless of load + put_cpu_var(core_state); + DEBUG("switch to direct from cpufreq\n"); + + // The implementation would set the policy and governor to peg cpu + // regardless of load + linux_setup_palacios_governor(); } if (machine_state.funcs && machine_state.funcs->arch_init) { - get_cpu_var(core_state).mode=V3_PSTATE_DIRECT_CONTROL; - - machine_state.funcs->arch_init(); + get_cpu_var(core_state).mode=V3_PSTATE_DIRECT_CONTROL; + + machine_state.funcs->arch_init(); - put_cpu_var(core_state); + put_cpu_var(core_state); } + return 0; } - -static void switch_to_internal(void) + +static int switch_to_internal(void) { if (get_cpu_var(core_state).have_cpufreq) { - put_cpu_var(core_state); - ERROR("Unimplemented: switch to internal on machine with cpu freq\n"); - return; - // The implementation would set the policy and governor to peg cpu - // regardless of load - exactly like direct + put_cpu_var(core_state); + DEBUG("switch to internal on machine with cpu freq\n"); + linux_setup_palacios_governor(); } get_cpu_var(core_state).mode=V3_PSTATE_INTERNAL_CONTROL; - + put_cpu_var(core_state); - return; + return 0; } -static void switch_from_external(void) +static int switch_from_external(void) { if (!(get_cpu_var(core_state).have_cpufreq)) { - put_cpu_var(core_state); - ERROR("No cpufreq - how did we get here... external...\n"); - return; + put_cpu_var(core_state); + ERROR("No cpufreq - how did we get here... external...\n"); + return -1; } - ERROR("Unimplemented switch from external...\n"); - + DEBUG("Switching from external...\n"); + linux_restore_defaults(); + get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL; put_cpu_var(core_state); + return 0; } - -static void switch_from_direct(void) + + +static int switch_from_direct(void) { - if (get_cpu_var(core_state).have_cpufreq) { - put_cpu_var(core_state); - ERROR("Unimplemented: switch from direct on machine with cpu freq - will just pretend to do so\n"); - // The implementation would switch back to default policy and governor + put_cpu_var(core_state); + DEBUG("Switching back to cpufreq control from direct\n"); + linux_restore_defaults(); } get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL; - machine_state.funcs->set_pstate(get_cpu_var(core_state).min_pstate); machine_state.funcs->arch_deinit(); put_cpu_var(core_state); + + return 0; } - -static void switch_from_internal(void) + +static int switch_from_internal(void) { if (get_cpu_var(core_state).have_cpufreq) { - put_cpu_var(core_state); - ERROR("Unimplemented: switch from internal on machine with cpu freq - will just pretend to do so\n"); - // The implementation would switch back to default policy and governor + put_cpu_var(core_state); + ERROR("Unimplemented: switch from internal on machine with cpu freq - will just pretend to do so\n"); + // The implementation would switch back to default policy and governor + linux_restore_defaults(); } get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL; put_cpu_var(core_state); - - return; + + return 0; } @@ -872,24 +1198,24 @@ static void switch_from_internal(void) void palacios_pstate_ctrl_acquire(uint32_t type) { if (get_cpu_var(core_state).mode != V3_PSTATE_HOST_CONTROL) { - palacios_pstate_ctrl_release(); + palacios_pstate_ctrl_release(); } put_cpu_var(core_state); switch (type) { - case V3_PSTATE_EXTERNAL_CONTROL: - switch_to_external(); - break; - case V3_PSTATE_DIRECT_CONTROL: - switch_to_direct(); - break; - case V3_PSTATE_INTERNAL_CONTROL: - switch_to_internal(); - break; - default: - ERROR("Unknown pstate control type %u\n",type); - break; + case V3_PSTATE_EXTERNAL_CONTROL: + switch_to_external(); + break; + case V3_PSTATE_DIRECT_CONTROL: + switch_to_direct(); + break; + case V3_PSTATE_INTERNAL_CONTROL: + switch_to_internal(); + break; + default: + ERROR("Unknown pstate control type %u\n",type); + break; } } @@ -908,47 +1234,46 @@ static void palacios_pstate_ctrl_acquire_direct(void) void palacios_pstate_ctrl_release(void) { - if (get_cpu_var(core_state).mode == V3_PSTATE_HOST_CONTROL) { - put_cpu_var(core_state); - return; + put_cpu_var(core_state); + return; } switch (get_cpu_var(core_state).mode) { - case V3_PSTATE_EXTERNAL_CONTROL: - switch_from_external(); - break; - case V3_PSTATE_DIRECT_CONTROL: - switch_from_direct(); - break; - case V3_PSTATE_INTERNAL_CONTROL: - switch_from_internal(); - break; - default: - ERROR("Unknown pstate control type %u\n",core_state.mode); - break; + case V3_PSTATE_EXTERNAL_CONTROL: + switch_from_external(); + break; + case V3_PSTATE_DIRECT_CONTROL: + switch_from_direct(); + break; + case V3_PSTATE_INTERNAL_CONTROL: + switch_from_internal(); + break; + default: + ERROR("Unknown pstate control type %u\n",core_state.mode); + break; } put_cpu_var(core_state); - + } static void update_hw_pstate(void *arg) { if (machine_state.funcs && machine_state.funcs->get_pstate) { - get_cpu_var(core_state).cur_hw_pstate = machine_state.funcs->get_pstate(); - put_cpu_var(core_state); + get_cpu_var(core_state).cur_hw_pstate = machine_state.funcs->get_pstate(); + put_cpu_var(core_state); } else { - get_cpu_var(core_state).cur_hw_pstate = 0; - put_cpu_var(core_state); + get_cpu_var(core_state).cur_hw_pstate = 0; + put_cpu_var(core_state); } } /*************************************************************************** PROC Interface to expose state -***************************************************************************/ + ***************************************************************************/ static int pstate_show(struct seq_file * file, void * v) { @@ -958,36 +1283,36 @@ static int pstate_show(struct seq_file * file, void * v) seq_printf(file, "V3VEE DVFS Status\n\n"); for (cpu=0;cpucur_hw_pstate, - s->mode==V3_PSTATE_HOST_CONTROL ? "host" : - s->mode==V3_PSTATE_EXTERNAL_CONTROL ? "external" : - s->mode==V3_PSTATE_DIRECT_CONTROL ? "direct" : - s->mode==V3_PSTATE_INTERNAL_CONTROL ? "internal" : "UNKNOWN"); - if (s->have_cpufreq) { - seq_printf(file,"external "); - } - if (machine_state.supports_pstates) { - seq_printf(file,"direct "); - } - seq_printf(file,"internal ] "); - if (s->mode==V3_PSTATE_EXTERNAL_CONTROL) { - seq_printf(file,"(min=%llu max=%llu cur=%llu) ", s->min_freq_khz, s->max_freq_khz, s->cur_freq_khz); - } - if (s->mode==V3_PSTATE_DIRECT_CONTROL) { - seq_printf(file,"(min=%u max=%u cur=%u) ", (uint32_t)s->min_pstate, (uint32_t)s->max_pstate, (uint32_t)s->cur_pstate); - } - seq_printf(file,"\n"); + struct pstate_core_info *s = &per_cpu(core_state,cpu); + seq_printf(file,"pcore %u: hw pstate %u mode %s of [ host ",cpu, + s->cur_hw_pstate, + s->mode==V3_PSTATE_HOST_CONTROL ? "host" : + s->mode==V3_PSTATE_EXTERNAL_CONTROL ? "external" : + s->mode==V3_PSTATE_DIRECT_CONTROL ? "direct" : + s->mode==V3_PSTATE_INTERNAL_CONTROL ? "internal" : "UNKNOWN"); + if (s->have_cpufreq) { + seq_printf(file,"external "); + } + if (machine_state.supports_pstates) { + seq_printf(file,"direct "); + } + seq_printf(file,"internal ] "); + if (s->mode==V3_PSTATE_EXTERNAL_CONTROL) { + seq_printf(file,"(min=%llu max=%llu cur=%llu) ", s->min_freq_khz, s->max_freq_khz, s->cur_freq_khz); + } + if (s->mode==V3_PSTATE_DIRECT_CONTROL) { + seq_printf(file,"(min=%u max=%u cur=%u) ", (uint32_t)s->min_pstate, (uint32_t)s->max_pstate, (uint32_t)s->cur_pstate); + } + seq_printf(file,"\n"); } return 0; } @@ -1009,19 +1334,19 @@ static struct file_operations pstate_fops = { int pstate_proc_setup(void) { struct proc_dir_entry *proc; - + proc = create_proc_entry("v3-dvfs",0444, palacios_get_procdir()); if (!proc) { - ERROR("Failed to create proc entry for p-state control\n"); - return -1; + ERROR("Failed to create proc entry for p-state control\n"); + return -1; } - + proc->proc_fops = &pstate_fops; - + return 0; } - + void pstate_proc_teardown(void) { remove_proc_entry("v3-dvfs",palacios_get_procdir()); @@ -1029,58 +1354,58 @@ void pstate_proc_teardown(void) /******************************************************************** User interface (ioctls) -********************************************************************/ + ********************************************************************/ static int dvfs_ctrl(unsigned int cmd, unsigned long arg) { struct v3_dvfs_ctrl_request r; if (copy_from_user(&r,(void __user*)arg,sizeof(struct v3_dvfs_ctrl_request))) { - ERROR("Failed to copy DVFS request from user\n"); - return -EFAULT; + ERROR("Failed to copy DVFS request from user\n"); + return -EFAULT; } if (r.pcore >= num_online_cpus()) { - ERROR("Cannot apply DVFS request to pcore %u\n",r.pcore); - return -EFAULT; + ERROR("Cannot apply DVFS request to pcore %u\n",r.pcore); + return -EFAULT; } switch (r.cmd) { - case V3_DVFS_ACQUIRE: { - switch (r.acq_type) { - case V3_DVFS_EXTERNAL: - palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_external,0); - return 0; - break; - case V3_DVFS_DIRECT: - palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_direct,0); - return 0; - break; - default: - ERROR("Unknown DVFS acquire type %u\n",r.acq_type); - return -EFAULT; - } - } - break; - case V3_DVFS_RELEASE: { - palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_release,0); - return 0; - } - break; - case V3_DVFS_SETFREQ: { - palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_freq,(void*)r.freq_khz); - return 0; - } - break; - case V3_DVFS_SETPSTATE: { - palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_pstate_wrapper,(void*)(uint64_t)r.pstate); - return 0; - } - default: { - ERROR("Unknown DVFS command %u\n",r.cmd); - return -EFAULT; - } - break; + case V3_DVFS_ACQUIRE: { + switch (r.acq_type) { + case V3_DVFS_EXTERNAL: + palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_external, NULL); + return 0; + break; + case V3_DVFS_DIRECT: + palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_direct, NULL); + return 0; + break; + default: + ERROR("Unknown DVFS acquire type %u\n",r.acq_type); + return -EFAULT; + } + } + break; + case V3_DVFS_RELEASE: { + palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_release, NULL); + return 0; + } + break; + case V3_DVFS_SETFREQ: { + palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_freq,(void*)r.freq_khz); + return 0; + } + break; + case V3_DVFS_SETPSTATE: { + palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_pstate_wrapper,(void*)(uint64_t)r.pstate); + return 0; + } + default: { + ERROR("Unknown DVFS command %u\n",r.cmd); + return -EFAULT; + } + break; } } @@ -1107,7 +1432,7 @@ static struct v3_host_pstate_ctrl_iface hooks = { }; - + static int pstate_ctrl_init(void) { unsigned int cpu; @@ -1116,18 +1441,20 @@ static int pstate_ctrl_init(void) pstate_arch_setup(); for (cpu=0;cpu