X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=blobdiff_plain;f=linux_module%2Fiface-pstate-ctrl.c;h=97a38dde3ccfdcd22634c796037b0bac610aaf2d;hb=809f5af4685741c062fcc24a2b734a67c136aded;hp=ac06b20111bf2f61a44a3dce4a0ba641075ed094;hpb=d3fa9d0b82291fafc35ce9bb7e777256d78c0ecd;p=palacios.git diff --git a/linux_module/iface-pstate-ctrl.c b/linux_module/iface-pstate-ctrl.c index ac06b20..97a38dd 100644 --- a/linux_module/iface-pstate-ctrl.c +++ b/linux_module/iface-pstate-ctrl.c @@ -21,15 +21,20 @@ #include #include #include -#include #include #include #include +#include #include +#include #include #include #include +// Used to determine the appropriate pstates values on Intel +#include +#include + #include #include "palacios.h" @@ -49,6 +54,12 @@ an ioctl for commanding the implementation and a /proc file for showing current status and capabilities. + What we mean by "pstate" here is the processor's internal + configuration. For AMD, this is defined as being the same as + the ACPI-defined p-state. For Intel, it is not. There, it is the + contents of the perf ctl MSR, which, often, is the frequency id + and voltage id (the multipliers). + */ @@ -74,6 +85,7 @@ struct pstate_core_info { uint8_t cur_hw_pstate; // Apply if we are under the EXTERNAL state + uint64_t set_freq_khz; // this is the frequency we're hoping to get uint64_t cur_freq_khz; uint64_t max_freq_khz; uint64_t min_freq_khz; @@ -101,10 +113,10 @@ static DEFINE_PER_CPU(struct pstate_core_info, core_state); struct pstate_core_funcs { void (*arch_init)(void); void (*arch_deinit)(void); - uint8_t (*get_min_pstate)(void); - uint8_t (*get_max_pstate)(void); - uint8_t (*get_pstate)(void); - void (*set_pstate)(uint8_t pstate); + uint64_t (*get_min_pstate)(void); + uint64_t (*get_max_pstate)(void); + uint64_t (*get_pstate)(void); + void (*set_pstate)(uint64_t pstate); }; struct pstate_machine_info { @@ -217,7 +229,7 @@ static void deinit_arch_amd(void) } -static uint8_t get_pstate_amd(void) +static uint64_t get_pstate_amd(void) { struct p_state_stat_reg_amd pstat; @@ -230,7 +242,7 @@ static uint8_t get_pstate_amd(void) } -static void set_pstate_amd(uint8_t p) +static void set_pstate_amd(uint64_t p) { struct p_state_ctl_reg_amd pctl; pctl.val = 0; @@ -246,7 +258,7 @@ static void set_pstate_amd(uint8_t p) /* * NOTE: HW may change this value at runtime */ -static uint8_t get_max_pstate_amd(void) +static uint64_t get_max_pstate_amd(void) { struct p_state_limit_reg_amd plimits; @@ -256,7 +268,7 @@ static uint8_t get_max_pstate_amd(void) } -static uint8_t get_min_pstate_amd(void) +static uint64_t get_min_pstate_amd(void) { struct p_state_limit_reg_amd plimits; @@ -375,6 +387,21 @@ struct turbo_mode_info_reg_intel { } __attribute__((packed)); } __attribute__((packed)); +// This replicates the critical information in Linux's struct acpi_processor_px +// To make it easier to port to other OSes. +struct intel_pstate_info { + uint64_t freq; // KHz + uint64_t ctrl; // What to write into the _CTL MSR to get this +}; + +// The internal array will be used if we cannot build the table locally +static struct intel_pstate_info *intel_pstate_to_ctrl_internal=0; +static int intel_num_pstates_internal=0; + +// These will either point to the internal array or to a constructed array +static struct intel_pstate_info *intel_pstate_to_ctrl=0; +static int intel_num_pstates=0; + /* CPUID.01:ECX.AES(7) */ static uint8_t supports_pstates_intel(void) @@ -408,6 +435,43 @@ static uint8_t supports_pstates_intel(void) machine_state.have_mwait_ext, machine_state.have_mwait_int ); + + if (machine_state.have_speedstep) { + uint32_t i; + // Build mapping table (from "pstate" (0..) to ctrl value for MSR + if (!(get_cpu_var(processors)) || !(get_cpu_var(processors)->performance) ) { + put_cpu_var(processors); + // no acpi... revert to internal table + intel_pstate_to_ctrl=intel_pstate_to_ctrl_internal; + intel_num_pstates=intel_num_pstates_internal; + } else { + intel_num_pstates = get_cpu_var(processors)->performance->state_count; + if (intel_num_pstates) { + intel_pstate_to_ctrl = palacios_alloc(sizeof(struct intel_pstate_info)*intel_num_pstates); + if (!intel_pstate_to_ctrl) { + ERROR("P-State: Cannot allocate space for mapping...\n"); + intel_num_pstates=0; + } + for (i=0;iperformance->states[i].core_frequency*1000; + intel_pstate_to_ctrl[i].ctrl = get_cpu_var(processors)->performance->states[i].control; + } + + } else { + ERROR("P-State: Strange, machine has ACPI DVFS but no states...\n"); + } + } + put_cpu_var(processors); + INFO("P-State: Intel - State Mapping (%u states) follows\n",intel_num_pstates); + for (i=0;i> 16) & 0x1; put_cpu_var(core_state); @@ -426,6 +492,8 @@ static void init_arch_intel(void) val |= 1 << 16; wrmsrl(MSR_MISC_ENABLE_IA32, val); + //INFO("P-State: write ENABLE=%llx\n",val); + } static void deinit_arch_intel(void) @@ -434,40 +502,47 @@ static void deinit_arch_intel(void) rdmsrl(MSR_MISC_ENABLE_IA32, val); + //INFO("P-State: deinit: ENABLE=%llx\n",val); + val &= ~(1ULL << 16); val |= get_cpu_var(core_state).prior_speedstep << 16; put_cpu_var(core_state); wrmsrl(MSR_MISC_ENABLE_IA32, val); + //INFO("P-state: deinit ENABLE=%llx\n",val); + } /* TODO: Intel P-states require sampling at intervals... */ -static uint8_t get_pstate_intel(void) +static uint64_t get_pstate_intel(void) { uint64_t val; - uint16_t pstate; rdmsrl(MSR_PERF_STAT_IA32,val); - pstate = val & 0xffff; - - INFO("P-State: Get: 0x%llx\n", val); - - // Assume top byte is the FID - //if (pstate & 0xff ) { - // ERROR("P-State: Intel returns confusing pstate %u\n",pstate); - //} + //INFO("P-State: Get: 0x%llx\n", val); // should check if turbo is active, in which case // this value is not the whole story - return (uint8_t) (pstate>>8); + return val; } -static void set_pstate_intel(uint8_t p) +static void set_pstate_intel(uint64_t p) { uint64_t val; + uint64_t ctrl; + + if (intel_num_pstates==0) { + return ; + } else { + if (p>=intel_num_pstates) { + p=intel_num_pstates-1; + } + } + + ctrl=intel_pstate_to_ctrl[p].ctrl; /* ...Intel IDA (dynamic acceleration) if (c->no_turbo && !c->turbo_disabled) { @@ -478,8 +553,10 @@ static void set_pstate_intel(uint8_t p) // fid bits rdmsrl(MSR_PERF_CTL_IA32, val); - val &= ~0xff00ULL; - val |= ((uint64_t)p)<<8; + INFO("P-State: Pre-Set: 0x%llx\n", val); + + val &= ~0xffffULL; + val |= ctrl & 0xffffULL; INFO("P-State: Set: 0x%llx\n", val); @@ -490,24 +567,20 @@ static void set_pstate_intel(uint8_t p) } -static uint8_t get_min_pstate_intel(void) +static uint64_t get_min_pstate_intel(void) { - struct turbo_mode_info_reg_intel t; - - rdmsrl(MSR_PLATFORM_INFO_IA32, t.val); - - return t.reg.min_ratio; + return 0; } -static uint8_t get_max_pstate_intel (void) +static uint64_t get_max_pstate_intel (void) { - struct turbo_mode_info_reg_intel t; - - rdmsrl(MSR_PLATFORM_INFO_IA32, t.val); - - return t.reg.max_noturbo_ratio; + if (intel_num_pstates==0) { + return 0; + } else { + return intel_num_pstates-1; + } } static struct pstate_core_funcs intel_funcs = @@ -597,6 +670,34 @@ static int pstate_arch_setup(void) Linux Interface *****************************************************************/ +static unsigned cpus_using_v3_governor; +static DEFINE_MUTEX(v3_governor_mutex); + +/* KCH: this will tell us when there is an actual frequency transition */ +static int v3_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + + if (per_cpu(core_state, freq->cpu).mode != V3_PSTATE_EXTERNAL_CONTROL) { + return 0; + } + + if (val == CPUFREQ_POSTCHANGE) { + DEBUG("P-State: frequency change took effect on cpu %u (now %u kHz)\n", + freq->cpu, freq->new); + per_cpu(core_state, freq->cpu).cur_freq_khz = freq->new; + } + + return 0; + +} + + +static struct notifier_block v3_cpufreq_notifier_block = { + .notifier_call = v3_cpufreq_notifier +}; + /* * This stub governor is simply a placeholder for preventing @@ -605,18 +706,54 @@ static int pstate_arch_setup(void) */ static int governor_run(struct cpufreq_policy *policy, unsigned int event) { + unsigned cpu = policy->cpu; switch (event) { /* we can't use cpufreq_driver_target here as it can result - * in a circular dependency, so we'll just do nothing. + * in a circular dependency, so we'll keep the current frequency as is */ case CPUFREQ_GOV_START: + BUG_ON(!policy->cur); + + mutex_lock(&v3_governor_mutex); + + if (cpus_using_v3_governor == 0) { + cpufreq_register_notifier(&v3_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + cpus_using_v3_governor++; + + per_cpu(core_state, cpu).set_freq_khz = policy->cur; + per_cpu(core_state, cpu).cur_freq_khz = policy->cur; + per_cpu(core_state, cpu).max_freq_khz = policy->max; + per_cpu(core_state, cpu).min_freq_khz = policy->min; + + mutex_unlock(&v3_governor_mutex); + break; case CPUFREQ_GOV_STOP: + mutex_lock(&v3_governor_mutex); + + cpus_using_v3_governor--; + + if (cpus_using_v3_governor == 0) { + cpufreq_unregister_notifier( + &v3_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + per_cpu(core_state, cpu).set_freq_khz = 0; + per_cpu(core_state, cpu).cur_freq_khz = 0; + per_cpu(core_state, cpu).max_freq_khz = 0; + per_cpu(core_state, cpu).min_freq_khz = 0; + + mutex_unlock(&v3_governor_mutex); + break; case CPUFREQ_GOV_LIMITS: /* do nothing */ break; default: - ERROR("Undefined governor command\n"); + ERROR("Undefined governor command (%u)\n", event); return -1; } @@ -632,6 +769,15 @@ static struct cpufreq_governor stub_governor = }; +static struct workqueue_struct *pstate_wq; + +typedef struct { + struct work_struct work; + uint64_t freq; +} pstate_work_t; + + + static inline void pstate_register_linux_governor(void) { cpufreq_register_governor(&stub_governor); @@ -644,6 +790,31 @@ static inline void pstate_unregister_linux_governor(void) } +static int pstate_linux_init(void) +{ + pstate_register_linux_governor(); + pstate_wq = create_workqueue("v3vee_pstate_wq"); + if (!pstate_wq) { + ERROR("Could not create work queue\n"); + goto out_err; + } + + return 0; + +out_err: + pstate_unregister_linux_governor(); + return -1; +} + + +static void pstate_linux_deinit(void) +{ + pstate_unregister_linux_governor(); + flush_workqueue(pstate_wq); + destroy_workqueue(pstate_wq); +} + + static int get_current_governor(char **buf, unsigned int cpu) { struct cpufreq_policy * policy = palacios_alloc(sizeof(struct cpufreq_policy)); @@ -694,6 +865,8 @@ static void gov_switch_cleanup(struct subprocess_info * s) /* * Switch governors * @s - the governor to switch to + * TODO: this should probably be submitted to a work queue + * so we don't have to run it in interrupt context */ static int governor_switch(char * s, unsigned int cpu) { @@ -746,6 +919,7 @@ static int linux_setup_palacios_governor(void) { char * gov; unsigned int cpu = get_cpu(); + put_cpu(); /* KCH: we assume the v3vee governor is already * registered with kernel by this point @@ -764,6 +938,7 @@ static int linux_setup_palacios_governor(void) DEBUG("setting the new governor (%s)\n", PALACIOS_GOVNAME); /* set the new one to ours */ + if (governor_switch(PALACIOS_GOVNAME, cpu) < 0) { ERROR("Could not set governor to (%s)\n", PALACIOS_GOVNAME); return -1; @@ -773,21 +948,16 @@ static int linux_setup_palacios_governor(void) } -#if 0 -static int linux_deinit(void) -{ - return 0; -} -#endif - static int linux_get_pstate(void) { struct cpufreq_policy * policy = NULL; struct cpufreq_frequency_table *table; - int cpu = get_cpu(); unsigned int i = 0; unsigned int count = 0; + unsigned int cpu = get_cpu(); + put_cpu(); + policy = palacios_alloc(sizeof(struct cpufreq_policy)); if (!policy) { @@ -812,6 +982,8 @@ static int linux_get_pstate(void) } palacios_free(policy); + + put_cpu(); return count; } @@ -819,7 +991,8 @@ static int linux_get_pstate(void) static int linux_get_freq(void) { struct cpufreq_policy * policy = NULL; - int cpu = get_cpu(); + unsigned int cpu = get_cpu(); + put_cpu(); policy = palacios_alloc(sizeof(struct cpufreq_policy)); if (!policy) { @@ -835,16 +1008,61 @@ static int linux_get_freq(void) return policy->cur; } +static void +pstate_switch_workfn (struct work_struct *work) +{ + pstate_work_t * pwork = (pstate_work_t*)work; + struct cpufreq_policy * policy = NULL; + uint64_t freq; + unsigned int cpu = get_cpu(); + put_cpu(); + + mutex_lock(&v3_governor_mutex); + + policy = palacios_alloc(sizeof(struct cpufreq_policy)); + if (!policy) { + ERROR("Could not allocate space for cpufreq policy\n"); + goto out; + } + + if (cpufreq_get_policy(policy, cpu) != 0) { + ERROR("Could not get cpufreq policy\n"); + goto out1; + } + + freq = pwork->freq; + get_cpu_var(core_state).set_freq_khz = freq; + + if (freq < get_cpu_var(core_state).min_freq_khz) { + freq = get_cpu_var(core_state).min_freq_khz; + } + if (freq > get_cpu_var(core_state).max_freq_khz) { + freq = get_cpu_var(core_state).max_freq_khz; + } + put_cpu_var(core_state); + + INFO("P-state: requesting frequency change on core %u to %llu\n", cpu, freq); + __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); + +out1: + palacios_free(policy); +out: + palacios_free(work); + mutex_unlock(&v3_governor_mutex); +} + static int linux_set_pstate(uint8_t p) { struct cpufreq_policy * policy = NULL; struct cpufreq_frequency_table *table; - int cpu = get_cpu(); + pstate_work_t * work = NULL; unsigned int i = 0; unsigned int count = 0; int state_set = 0; int last_valid = 0; + unsigned int cpu = get_cpu(); + put_cpu(); policy = palacios_alloc(sizeof(struct cpufreq_policy)); if (!policy) { @@ -852,9 +1070,15 @@ static int linux_set_pstate(uint8_t p) return -1; } + work = (pstate_work_t*)palacios_alloc(sizeof(pstate_work_t)); + if (!work) { + ERROR("Could not allocate work struct\n"); + goto out_err; + } + if (cpufreq_get_policy(policy, cpu)) { ERROR("Could not get current policy\n"); - goto out_err; + goto out_err1; } table = cpufreq_frequency_get_table(cpu); @@ -865,8 +1089,13 @@ static int linux_set_pstate(uint8_t p) } if (count == p) { - cpufreq_driver_target(policy, table[i].frequency, CPUFREQ_RELATION_H); + + INIT_WORK((struct work_struct*)work, pstate_switch_workfn); + work->freq = table[i].frequency; + queue_work(pstate_wq, (struct work_struct*)work); + state_set = 1; + break; } count++; @@ -875,12 +1104,16 @@ static int linux_set_pstate(uint8_t p) /* we need to deal with the case in which we get a number > max pstate */ if (!state_set) { - cpufreq_driver_target(policy, table[last_valid].frequency, CPUFREQ_RELATION_H); + INIT_WORK((struct work_struct*)work, pstate_switch_workfn); + work->freq = table[last_valid].frequency; + queue_work(pstate_wq, (struct work_struct*)work); } palacios_free(policy); return 0; +out_err1: + palacios_free(work); out_err: palacios_free(policy); return -1; @@ -890,8 +1123,10 @@ out_err: static int linux_set_freq(uint64_t f) { struct cpufreq_policy * policy = NULL; - int cpu = get_cpu(); + pstate_work_t * work = NULL; uint64_t freq; + unsigned int cpu = get_cpu(); + put_cpu(); policy = palacios_alloc(sizeof(struct cpufreq_policy)); if (!policy) { @@ -899,7 +1134,16 @@ static int linux_set_freq(uint64_t f) return -1; } - cpufreq_get_policy(policy, cpu); + work = (pstate_work_t*)palacios_alloc(sizeof(pstate_work_t)); + if (!work) { + ERROR("Could not allocate work struct\n"); + goto out_err; + } + + if (cpufreq_get_policy(policy, cpu) != 0) { + ERROR("Could not get cpufreq policy\n"); + goto out_err1; + } if (f < policy->min) { freq = policy->min; @@ -909,17 +1153,26 @@ static int linux_set_freq(uint64_t f) freq = f; } - cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_H); + INIT_WORK((struct work_struct*)work, pstate_switch_workfn); + work->freq = freq; + queue_work(pstate_wq, (struct work_struct*)work); palacios_free(policy); return 0; + +out_err1: + palacios_free(work); +out_err: + palacios_free(policy); + return -1; } static int linux_restore_defaults(void) { - unsigned int cpu = get_cpu(); char * gov = NULL; + unsigned int cpu = get_cpu(); + put_cpu(); gov = get_cpu_var(core_state).linux_governor; put_cpu_var(core_state); @@ -950,6 +1203,7 @@ static void init_core(void) { unsigned cpu; struct cpufreq_policy *p; + unsigned int i; DEBUG("P-State Core Init\n"); @@ -979,12 +1233,16 @@ static void init_core(void) get_cpu_var(core_state).have_cpufreq = 1; get_cpu_var(core_state).min_freq_khz=p->min; get_cpu_var(core_state).max_freq_khz=p->max; - get_cpu_var(core_state).cur_freq_khz=p->cur; - cpufreq_cpu_put(p); - } - + get_cpu_var(core_state).cur_freq_khz=p->cur; } cpufreq_cpu_put(p); put_cpu_var(core_state); + for (i=0;iperformance->state_count; i++) { + INFO("P-State: %u: freq=%llu ctrl=%llx", + i, + get_cpu_var(processors)->performance->states[i].core_frequency*1000, + get_cpu_var(processors)->performance->states[i].control); + } + put_cpu_var(processors); } @@ -993,10 +1251,9 @@ void palacios_pstate_ctrl_release(void); static void deinit_core(void) { - int cpu; DEBUG("P-State Core Deinit\n"); - cpu = get_cpu(); palacios_pstate_ctrl_release(); + } @@ -1030,7 +1287,7 @@ void palacios_pstate_ctrl_get_chars(struct v3_cpu_pstate_chars *c) } -uint8_t palacios_pstate_ctrl_get_pstate(void) +uint64_t palacios_pstate_ctrl_get_pstate(void) { if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) { put_cpu_var(core_state); @@ -1045,7 +1302,7 @@ uint8_t palacios_pstate_ctrl_get_pstate(void) } -void palacios_pstate_ctrl_set_pstate(uint8_t p) +void palacios_pstate_ctrl_set_pstate(uint64_t p) { if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) { put_cpu_var(core_state); @@ -1053,7 +1310,9 @@ void palacios_pstate_ctrl_set_pstate(uint8_t p) } else if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) { put_cpu_var(core_state); linux_set_pstate(p); - } + } else { + put_cpu_var(core_state); + } } @@ -1080,27 +1339,36 @@ void palacios_pstate_ctrl_set_freq(uint64_t p) if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) { put_cpu_var(core_state); linux_set_freq(p); - } - put_cpu_var(core_state); + } else { + put_cpu_var(core_state); + } } static int switch_to_external(void) { + DEBUG("switch from host control to external\n"); + if (!(get_cpu_var(core_state).have_cpufreq)) { put_cpu_var(core_state); ERROR("No cpufreq - cannot switch to external...\n"); return -1; - } + } + put_cpu_var(core_state); + + linux_setup_palacios_governor(); + + get_cpu_var(core_state).mode=V3_PSTATE_EXTERNAL_CONTROL; put_cpu_var(core_state); - DEBUG("Switching to external control\n"); - return linux_restore_defaults(); + return 0; } static int switch_to_direct(void) { + DEBUG("switch from host control to direct\n"); + if (get_cpu_var(core_state).have_cpufreq) { put_cpu_var(core_state); DEBUG("switch to direct from cpufreq\n"); @@ -1108,6 +1376,8 @@ static int switch_to_direct(void) // The implementation would set the policy and governor to peg cpu // regardless of load linux_setup_palacios_governor(); + } else { + put_cpu_var(core_state); } if (machine_state.funcs && machine_state.funcs->arch_init) { @@ -1124,10 +1394,14 @@ static int switch_to_direct(void) static int switch_to_internal(void) { + DEBUG("switch from host control to internal\n"); + if (get_cpu_var(core_state).have_cpufreq) { put_cpu_var(core_state); DEBUG("switch to internal on machine with cpu freq\n"); linux_setup_palacios_governor(); + } else { + put_cpu_var(core_state); } get_cpu_var(core_state).mode=V3_PSTATE_INTERNAL_CONTROL; @@ -1145,12 +1419,18 @@ static int switch_from_external(void) ERROR("No cpufreq - how did we get here... external...\n"); return -1; } + put_cpu_var(core_state); - DEBUG("Switching from external...\n"); - linux_restore_defaults(); + DEBUG("Switching back to host control from external\n"); - get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL; + if (get_cpu_var(core_state).have_cpufreq) { + put_cpu_var(core_state); + linux_restore_defaults(); + } else { + put_cpu_var(core_state); + } + get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL; put_cpu_var(core_state); return 0; @@ -1159,18 +1439,22 @@ static int switch_from_external(void) static int switch_from_direct(void) { + + DEBUG("Switching back to host control from direct\n"); + + // Set maximum performance, just in case there is no host control + machine_state.funcs->set_pstate(get_cpu_var(core_state).min_pstate); + machine_state.funcs->arch_deinit(); + if (get_cpu_var(core_state).have_cpufreq) { put_cpu_var(core_state); - DEBUG("Switching back to cpufreq control from direct\n"); linux_restore_defaults(); + } else { + put_cpu_var(core_state); } get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL; - machine_state.funcs->set_pstate(get_cpu_var(core_state).min_pstate); - - machine_state.funcs->arch_deinit(); - put_cpu_var(core_state); return 0; @@ -1179,11 +1463,15 @@ static int switch_from_direct(void) static int switch_from_internal(void) { + DEBUG("Switching back to host control from internal\n"); + if (get_cpu_var(core_state).have_cpufreq) { put_cpu_var(core_state); - ERROR("Unimplemented: switch from internal on machine with cpu freq - will just pretend to do so\n"); + // ERROR("Unimplemented: switch from internal on machine with cpu freq - will just pretend to do so\n"); // The implementation would switch back to default policy and governor linux_restore_defaults(); + } else { + put_cpu_var(core_state); } get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL; @@ -1198,11 +1486,12 @@ static int switch_from_internal(void) void palacios_pstate_ctrl_acquire(uint32_t type) { if (get_cpu_var(core_state).mode != V3_PSTATE_HOST_CONTROL) { + put_cpu_var(core_state); palacios_pstate_ctrl_release(); + } else { + put_cpu_var(core_state); } - put_cpu_var(core_state); - switch (type) { case V3_PSTATE_EXTERNAL_CONTROL: switch_to_external(); @@ -1237,25 +1526,27 @@ void palacios_pstate_ctrl_release(void) if (get_cpu_var(core_state).mode == V3_PSTATE_HOST_CONTROL) { put_cpu_var(core_state); return; - } + } + put_cpu_var(core_state); switch (get_cpu_var(core_state).mode) { case V3_PSTATE_EXTERNAL_CONTROL: + put_cpu_var(core_state); switch_from_external(); break; case V3_PSTATE_DIRECT_CONTROL: + put_cpu_var(core_state); switch_from_direct(); break; case V3_PSTATE_INTERNAL_CONTROL: + put_cpu_var(core_state); switch_from_internal(); break; default: + put_cpu_var(core_state); ERROR("Unknown pstate control type %u\n",core_state.mode); break; } - - put_cpu_var(core_state); - } @@ -1293,7 +1584,7 @@ static int pstate_show(struct seq_file * file, void * v) for (cpu=0;cpucur_hw_pstate, s->mode==V3_PSTATE_HOST_CONTROL ? "host" : s->mode==V3_PSTATE_EXTERNAL_CONTROL ? "external" : @@ -1453,7 +1744,7 @@ static int pstate_ctrl_init(void) pstate_user_setup(); - pstate_register_linux_governor(); + pstate_linux_init(); INFO("P-State Control Initialized\n"); @@ -1465,7 +1756,7 @@ static int pstate_ctrl_deinit(void) unsigned int cpu; unsigned int numcpus=num_online_cpus(); - pstate_unregister_linux_governor(); + pstate_linux_deinit(); pstate_user_teardown(); @@ -1476,6 +1767,13 @@ static int pstate_ctrl_deinit(void) palacios_xcall(cpu,(void (*)(void *))deinit_core,0); } + + // Free any mapping table we built for Intel + if (intel_pstate_to_ctrl && intel_pstate_to_ctrl != intel_pstate_to_ctrl_internal) { + palacios_free(intel_pstate_to_ctrl); + } + + return 0; }