#include <linux/cpufreq.h>
#include <linux/kernel.h>
#include <linux/kmod.h>
+#include <linux/module.h>
#include <linux/string.h>
+#include <linux/interrupt.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/msr-index.h>
uint8_t cur_hw_pstate;
// Apply if we are under the EXTERNAL state
+ uint64_t set_freq_khz; // this is the frequency we're hoping to get
uint64_t cur_freq_khz;
uint64_t max_freq_khz;
uint64_t min_freq_khz;
Linux Interface
*****************************************************************/
+static unsigned cpus_using_v3_governor;
+static DEFINE_MUTEX(v3_governor_mutex);
+
+/* KCH: this will tell us when there is an actual frequency transition */
+static int v3_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+
+ if (per_cpu(core_state, freq->cpu).mode != V3_PSTATE_EXTERNAL_CONTROL) {
+ return 0;
+ }
+
+ if (val == CPUFREQ_POSTCHANGE) {
+ DEBUG("P-State: frequency change took effect on cpu %u (now %u kHz)\n",
+ freq->cpu, freq->new);
+ per_cpu(core_state, freq->cpu).cur_freq_khz = freq->new;
+ }
+
+ return 0;
+
+}
+
+
+static struct notifier_block v3_cpufreq_notifier_block = {
+ .notifier_call = v3_cpufreq_notifier
+};
+
/*
* This stub governor is simply a placeholder for preventing
*/
static int governor_run(struct cpufreq_policy *policy, unsigned int event)
{
+ unsigned cpu = policy->cpu;
switch (event) {
/* we can't use cpufreq_driver_target here as it can result
- * in a circular dependency, so we'll just do nothing.
+ * in a circular dependency, so we'll keep the current frequency as is
*/
case CPUFREQ_GOV_START:
+ BUG_ON(!policy->cur);
+
+ mutex_lock(&v3_governor_mutex);
+
+ if (cpus_using_v3_governor == 0) {
+ cpufreq_register_notifier(&v3_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ }
+
+ cpus_using_v3_governor++;
+
+ per_cpu(core_state, cpu).set_freq_khz = policy->cur;
+ per_cpu(core_state, cpu).cur_freq_khz = policy->cur;
+ per_cpu(core_state, cpu).max_freq_khz = policy->max;
+ per_cpu(core_state, cpu).min_freq_khz = policy->min;
+
+ mutex_unlock(&v3_governor_mutex);
+ break;
case CPUFREQ_GOV_STOP:
+ mutex_lock(&v3_governor_mutex);
+
+ cpus_using_v3_governor--;
+
+ if (cpus_using_v3_governor == 0) {
+ cpufreq_unregister_notifier(
+ &v3_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ }
+
+ per_cpu(core_state, cpu).set_freq_khz = 0;
+ per_cpu(core_state, cpu).cur_freq_khz = 0;
+ per_cpu(core_state, cpu).max_freq_khz = 0;
+ per_cpu(core_state, cpu).min_freq_khz = 0;
+
+ mutex_unlock(&v3_governor_mutex);
+ break;
case CPUFREQ_GOV_LIMITS:
/* do nothing */
break;
default:
- ERROR("Undefined governor command\n");
+ ERROR("Undefined governor command (%u)\n", event);
return -1;
}
};
+static struct workqueue_struct *pstate_wq;
+
+typedef struct {
+ struct work_struct work;
+ uint64_t freq;
+} pstate_work_t;
+
+
+
static inline void pstate_register_linux_governor(void)
{
cpufreq_register_governor(&stub_governor);
}
+static int pstate_linux_init(void)
+{
+ pstate_register_linux_governor();
+ pstate_wq = create_workqueue("v3vee_pstate_wq");
+ if (!pstate_wq) {
+ ERROR("Could not create work queue\n");
+ goto out_err;
+ }
+
+ return 0;
+
+out_err:
+ pstate_unregister_linux_governor();
+ return -1;
+}
+
+
+static void pstate_linux_deinit(void)
+{
+ pstate_unregister_linux_governor();
+ flush_workqueue(pstate_wq);
+ destroy_workqueue(pstate_wq);
+}
+
+
static int get_current_governor(char **buf, unsigned int cpu)
{
struct cpufreq_policy * policy = palacios_alloc(sizeof(struct cpufreq_policy));
/*
* Switch governors
* @s - the governor to switch to
+ * TODO: this should probably be submitted to a work queue
+ * so we don't have to run it in interrupt context
*/
static int governor_switch(char * s, unsigned int cpu)
{
{
char * gov;
unsigned int cpu = get_cpu();
+ put_cpu();
/* KCH: we assume the v3vee governor is already
* registered with kernel by this point
DEBUG("setting the new governor (%s)\n", PALACIOS_GOVNAME);
/* set the new one to ours */
+
if (governor_switch(PALACIOS_GOVNAME, cpu) < 0) {
ERROR("Could not set governor to (%s)\n", PALACIOS_GOVNAME);
return -1;
{
struct cpufreq_policy * policy = NULL;
struct cpufreq_frequency_table *table;
- int cpu = get_cpu();
unsigned int i = 0;
unsigned int count = 0;
+ unsigned int cpu = get_cpu();
+ put_cpu();
+
policy = palacios_alloc(sizeof(struct cpufreq_policy));
if (!policy) {
static int linux_get_freq(void)
{
struct cpufreq_policy * policy = NULL;
- int cpu = get_cpu();
+ unsigned int cpu = get_cpu();
+ put_cpu();
policy = palacios_alloc(sizeof(struct cpufreq_policy));
if (!policy) {
return policy->cur;
}
+static void
+pstate_switch_workfn (struct work_struct *work)
+{
+ pstate_work_t * pwork = (pstate_work_t*)work;
+ struct cpufreq_policy * policy = NULL;
+ uint64_t freq;
+ unsigned int cpu = get_cpu();
+ put_cpu();
+
+ mutex_lock(&v3_governor_mutex);
+
+ policy = palacios_alloc(sizeof(struct cpufreq_policy));
+ if (!policy) {
+ ERROR("Could not allocate space for cpufreq policy\n");
+ goto out;
+ }
+
+ if (cpufreq_get_policy(policy, cpu) != 0) {
+ ERROR("Could not get cpufreq policy\n");
+ goto out1;
+ }
+
+ freq = pwork->freq;
+ get_cpu_var(core_state).set_freq_khz = freq;
+
+ if (freq < get_cpu_var(core_state).min_freq_khz) {
+ freq = get_cpu_var(core_state).min_freq_khz;
+ }
+ if (freq > get_cpu_var(core_state).max_freq_khz) {
+ freq = get_cpu_var(core_state).max_freq_khz;
+ }
+ put_cpu_var(core_state);
+
+ INFO("P-state: requesting frequency change on core %u to %llu\n", cpu, freq);
+ __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
+
+out1:
+ palacios_free(policy);
+out:
+ palacios_free(work);
+ mutex_unlock(&v3_governor_mutex);
+}
+
static int linux_set_pstate(uint8_t p)
{
struct cpufreq_policy * policy = NULL;
struct cpufreq_frequency_table *table;
- int cpu = get_cpu();
+ pstate_work_t * work = NULL;
unsigned int i = 0;
unsigned int count = 0;
int state_set = 0;
int last_valid = 0;
+ unsigned int cpu = get_cpu();
+ put_cpu();
policy = palacios_alloc(sizeof(struct cpufreq_policy));
if (!policy) {
return -1;
}
+ work = (pstate_work_t*)palacios_alloc(sizeof(pstate_work_t));
+ if (!work) {
+ ERROR("Could not allocate work struct\n");
+ goto out_err;
+ }
+
if (cpufreq_get_policy(policy, cpu)) {
ERROR("Could not get current policy\n");
- goto out_err;
+ goto out_err1;
}
table = cpufreq_frequency_get_table(cpu);
}
if (count == p) {
- cpufreq_driver_target(policy, table[i].frequency, CPUFREQ_RELATION_H);
+
+ INIT_WORK((struct work_struct*)work, pstate_switch_workfn);
+ work->freq = table[i].frequency;
+ queue_work(pstate_wq, (struct work_struct*)work);
+
state_set = 1;
+ break;
}
count++;
/* we need to deal with the case in which we get a number > max pstate */
if (!state_set) {
- cpufreq_driver_target(policy, table[last_valid].frequency, CPUFREQ_RELATION_H);
+ INIT_WORK((struct work_struct*)work, pstate_switch_workfn);
+ work->freq = table[last_valid].frequency;
+ queue_work(pstate_wq, (struct work_struct*)work);
}
palacios_free(policy);
return 0;
+out_err1:
+ palacios_free(work);
out_err:
palacios_free(policy);
return -1;
static int linux_set_freq(uint64_t f)
{
struct cpufreq_policy * policy = NULL;
- int cpu = get_cpu();
+ pstate_work_t * work = NULL;
uint64_t freq;
+ unsigned int cpu = get_cpu();
+ put_cpu();
policy = palacios_alloc(sizeof(struct cpufreq_policy));
if (!policy) {
return -1;
}
- cpufreq_get_policy(policy, cpu);
+ work = (pstate_work_t*)palacios_alloc(sizeof(pstate_work_t));
+ if (!work) {
+ ERROR("Could not allocate work struct\n");
+ goto out_err;
+ }
+
+ if (cpufreq_get_policy(policy, cpu) != 0) {
+ ERROR("Could not get cpufreq policy\n");
+ goto out_err1;
+ }
if (f < policy->min) {
freq = policy->min;
freq = f;
}
- cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_H);
+ INIT_WORK((struct work_struct*)work, pstate_switch_workfn);
+ work->freq = freq;
+ queue_work(pstate_wq, (struct work_struct*)work);
palacios_free(policy);
return 0;
+
+out_err1:
+ palacios_free(work);
+out_err:
+ palacios_free(policy);
+ return -1;
}
static int linux_restore_defaults(void)
{
- unsigned int cpu = get_cpu();
char * gov = NULL;
+ unsigned int cpu = get_cpu();
+ put_cpu();
gov = get_cpu_var(core_state).linux_governor;
put_cpu_var(core_state);
get_cpu_var(core_state).have_cpufreq = 1;
get_cpu_var(core_state).min_freq_khz=p->min;
get_cpu_var(core_state).max_freq_khz=p->max;
- get_cpu_var(core_state).cur_freq_khz=p->cur;
- }
-
- cpufreq_cpu_put(p);
-
+ get_cpu_var(core_state).cur_freq_khz=p->cur; } cpufreq_cpu_put(p);
put_cpu_var(core_state);
for (i=0;i<get_cpu_var(processors)->performance->state_count; i++) {
{
DEBUG("P-State Core Deinit\n");
palacios_pstate_ctrl_release();
+
}
} else if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
put_cpu_var(core_state);
linux_set_pstate(p);
- }
+ } else {
+ put_cpu_var(core_state);
+ }
}
if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
put_cpu_var(core_state);
linux_set_freq(p);
- }
- put_cpu_var(core_state);
+ } else {
+ put_cpu_var(core_state);
+ }
}
put_cpu_var(core_state);
ERROR("No cpufreq - cannot switch to external...\n");
return -1;
- }
+ }
put_cpu_var(core_state);
linux_setup_palacios_governor();
// The implementation would set the policy and governor to peg cpu
// regardless of load
linux_setup_palacios_governor();
+ } else {
+ put_cpu_var(core_state);
}
if (machine_state.funcs && machine_state.funcs->arch_init) {
put_cpu_var(core_state);
DEBUG("switch to internal on machine with cpu freq\n");
linux_setup_palacios_governor();
+ } else {
+ put_cpu_var(core_state);
}
get_cpu_var(core_state).mode=V3_PSTATE_INTERNAL_CONTROL;
ERROR("No cpufreq - how did we get here... external...\n");
return -1;
}
+ put_cpu_var(core_state);
DEBUG("Switching back to host control from external\n");
if (get_cpu_var(core_state).have_cpufreq) {
- linux_restore_defaults();
+ put_cpu_var(core_state);
+ linux_restore_defaults();
+ } else {
+ put_cpu_var(core_state);
}
get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL;
-
put_cpu_var(core_state);
return 0;
machine_state.funcs->arch_deinit();
if (get_cpu_var(core_state).have_cpufreq) {
+ put_cpu_var(core_state);
linux_restore_defaults();
+ } else {
+ put_cpu_var(core_state);
}
get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL;
DEBUG("Switching back to host control from internal\n");
if (get_cpu_var(core_state).have_cpufreq) {
+ put_cpu_var(core_state);
// ERROR("Unimplemented: switch from internal on machine with cpu freq - will just pretend to do so\n");
// The implementation would switch back to default policy and governor
linux_restore_defaults();
+ } else {
+ put_cpu_var(core_state);
}
get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL;
void palacios_pstate_ctrl_acquire(uint32_t type)
{
if (get_cpu_var(core_state).mode != V3_PSTATE_HOST_CONTROL) {
+ put_cpu_var(core_state);
palacios_pstate_ctrl_release();
+ } else {
+ put_cpu_var(core_state);
}
- put_cpu_var(core_state);
-
switch (type) {
case V3_PSTATE_EXTERNAL_CONTROL:
switch_to_external();
if (get_cpu_var(core_state).mode == V3_PSTATE_HOST_CONTROL) {
put_cpu_var(core_state);
return;
- }
+ }
+ put_cpu_var(core_state);
switch (get_cpu_var(core_state).mode) {
case V3_PSTATE_EXTERNAL_CONTROL:
+ put_cpu_var(core_state);
switch_from_external();
break;
case V3_PSTATE_DIRECT_CONTROL:
+ put_cpu_var(core_state);
switch_from_direct();
break;
case V3_PSTATE_INTERNAL_CONTROL:
+ put_cpu_var(core_state);
switch_from_internal();
break;
default:
+ put_cpu_var(core_state);
ERROR("Unknown pstate control type %u\n",core_state.mode);
break;
}
-
- put_cpu_var(core_state);
-
}
pstate_user_setup();
- pstate_register_linux_governor();
+ pstate_linux_init();
INFO("P-State Control Initialized\n");
unsigned int cpu;
unsigned int numcpus=num_online_cpus();
- pstate_unregister_linux_governor();
+ pstate_linux_deinit();
pstate_user_teardown();