From: Kyle Hale, Shiva Rao, and Peter Dinda Date: Wed, 13 Aug 2014 00:03:12 +0000 (-0500) Subject: P-State control interface - Linux implementation X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=fd288e4dc51177f037f4752861eb95971fb1d1a0 P-State control interface - Linux implementation --- diff --git a/linux_module/Makefile b/linux_module/Makefile index 8c6c426..d89f187 100644 --- a/linux_module/Makefile +++ b/linux_module/Makefile @@ -40,6 +40,7 @@ v3vee-$(V3_CONFIG_MEM_TRACK) += memtrack.o v3vee-$(V3_CONFIG_HOST_PMU) += iface-pmu.o v3vee-$(V3_CONFIG_HOST_PWRSTAT) += iface-pwrstat.o +v3vee-$(V3_CONFIG_HOST_PSTATE_CTRL) += iface-pstate-ctrl.o v3vee-$(V3_CONFIG_VNET) += palacios-vnet.o \ palacios-vnet-ctrl.o \ diff --git a/linux_module/iface-pstate-ctrl.c b/linux_module/iface-pstate-ctrl.c new file mode 100644 index 0000000..3831ed0 --- /dev/null +++ b/linux_module/iface-pstate-ctrl.c @@ -0,0 +1,1006 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2014, the V3VEE Project + * all rights reserved. + * + * Author: Kyle C. Hale + * Shiva Rao + * Peter Dinda + * + * This is free software. you are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "palacios.h" +#include "iface-pstate-ctrl.h" + +#include "linux-exts.h" + +/* + This P-STATE control implementation includes: + + - Direct control of Intel and AMD processor pstates + - External control of processor states via Linux (unimplemented) + - Internal control of processor states in Palacios (handoff from Linux) + + Additionally, it provides a user-space interface for manipulating + p-state regardless of the host's functionality. This includes + an ioctl for commanding the implementation and a /proc file for + showing current status and capabilities. + +*/ + + + + +struct pstate_core_info { + // Here we have the notion of host control +#define V3_PSTATE_HOST_CONTROL 0 + // and all the modes from the Palacios interface: + // V3_PSTATE_EXTERNAL_CONTROL + // V3_PSTATE_DIRECT_CONTROL + // V3_PSTATE_INTERNAL_CONTROL + uint32_t mode; + + // Apply if we are under the DIRECT state + uint8_t cur_pstate; + uint8_t max_pstate; + uint8_t min_pstate; + + uint8_t cur_hw_pstate; + + // Apply if we are under the EXTERNAL state + uint64_t cur_freq_khz; + uint64_t max_freq_khz; + uint64_t min_freq_khz; + + // Intel-specific for DIRECT state + uint8_t turbo_disabled; + uint8_t no_turbo; + + int have_cpufreq; + +}; + + +static DEFINE_PER_CPU(struct pstate_core_info, core_state); + + +// These are used to assert DIRECT control over the core pstates +struct pstate_core_funcs { + void (*arch_init)(void); + void (*arch_deinit)(void); + uint8_t (*get_min_pstate)(void); + uint8_t (*get_max_pstate)(void); + uint8_t (*get_pstate)(void); + void (*set_pstate)(uint8_t pstate); +}; + +struct pstate_machine_info { + enum {INTEL, AMD, OTHER } arch; + int supports_pstates; + // used for DIRECT control + struct pstate_core_funcs *funcs; +}; + +static struct pstate_machine_info machine_state; + + +/**************************************************** + AMD DIRECT CONTROL +***************************************************/ + +/* AMD Programmer's Manual Vol 2 (Rev 3, 2013), Sec. 17.1, pp.557 */ +#define MSR_PSTATE_LIMIT_REG_AMD 0xc0010061 +#define MSR_PSTATE_CTL_REG_AMD 0xc0010062 +#define MSR_PSTATE_STAT_REG_AMD 0xc0010063 + +struct p_state_limit_reg_amd { + union { + uint64_t val; + struct { + uint8_t pstate_limit : 4; /* lowest P-state value (highest perf.) supported currently (this can change at runtime) */ + uint8_t pstate_max : 4; /* highest P-state value supported (lowest perf) */ + uint64_t rsvd : 56; + } reg; + } __attribute__((packed)); +} __attribute__((packed)); + + +struct p_state_stat_reg_amd { + union { + uint64_t val; + struct { + uint8_t pstate : 4; + uint64_t rsvd : 60; + } reg; + } __attribute__((packed)); +} __attribute__((packed)); + + +struct p_state_ctl_reg_amd { + union { + uint64_t val; + struct { + uint8_t cmd : 4; + uint64_t rsvd : 60; + } reg; + } __attribute__((packed)); +} __attribute__((packed)); + + +/* CPUID Fn8000_0007_EDX[HwPstate(7)] = 1 */ +static uint8_t supports_pstates_amd (void) +{ + uint32_t eax, ebx, ecx, edx; + cpuid(0x80000007, &eax, &ebx, &ecx, &edx); + return !!(edx & (1 << 7)); +} + +static void init_arch_amd(void) +{ + /* KCH: nothing to do here */ +} + +static void deinit_arch_amd(void) +{ + /* KCH: nothing to do here */ +} + +static uint8_t get_pstate_amd(void) +{ + struct p_state_stat_reg_amd pstat; + + rdmsrl(MSR_PSTATE_STAT_REG_AMD, pstat.val); + + get_cpu_var(core_state).cur_pstate=pstat.reg.pstate; + put_cpu_var(core_state); + + return pstat.reg.pstate; +} + +static void set_pstate_amd(uint8_t p) +{ + struct p_state_ctl_reg_amd pctl; + pctl.val = 0; + pctl.reg.cmd = p; + + wrmsrl(MSR_PSTATE_CTL_REG_AMD, pctl.val); + + get_cpu_var(core_state).cur_pstate=p; + put_cpu_var(core_state); +} + +/* + * NOTE: HW may change this value at runtime + */ +static uint8_t get_max_pstate_amd(void) +{ + struct p_state_limit_reg_amd plimits; + + rdmsrl(MSR_PSTATE_LIMIT_REG_AMD, plimits.val); + + return plimits.reg.pstate_max; +} + + +static uint8_t get_min_pstate_amd(void) +{ + struct p_state_limit_reg_amd plimits; + + rdmsrl(MSR_PSTATE_LIMIT_REG_AMD, plimits.val); + + return plimits.reg.pstate_limit; +} + + +static struct pstate_core_funcs amd_funcs = +{ + .arch_init = init_arch_amd, + .arch_deinit = deinit_arch_amd, + .get_pstate = get_pstate_amd, + .set_pstate = set_pstate_amd, + .get_max_pstate = get_max_pstate_amd, + .get_min_pstate = get_min_pstate_amd, +}; + + + +/*********************************************************** + INTEL DIRECT CONTROL +**********************************************************/ + + +/* Intel System Programmer's Manual Vol. 3B, 14-2 */ +#define MSR_MPERF_IA32 0x000000e7 +#define MSR_APERF_IA32 0x000000e8 +#define MSR_MISC_ENABLE_IA32 0x000001a0 +#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad +#define MSR_PLATFORM_INFO_IA32 0x000000ce +#define MSR_PERF_CTL_IA32 0x00000199 + + + +struct turbo_mode_info_reg_intel { + union { + uint64_t val; + struct { + uint8_t rsvd0; + uint8_t max_noturbo_ratio; + uint16_t rsvd1 : 12; + uint8_t ratio_limit : 1; + uint8_t tdc_tdp_limit : 1; + uint16_t rsvd2 : 10; + uint8_t min_ratio; + uint16_t rsvd3; + } reg; + } __attribute__((packed)); +} __attribute__((packed)); + + +/* CPUID.01:ECX.AES(7) */ +static uint8_t supports_pstates_intel(void) +{ + /* NOTE: CPUID.06H:ECX.SETBH[bit 3] is set and it also implies the presence of a new architectural MSR called IA32_ENERGY_PERF_BIAS (1B0H). + */ + uint32_t eax, ebx, ecx, edx; + cpuid(0x1, &eax, &ebx, &ecx, &edx); + return !!(ecx & (1 << 7)); +} + + +static void init_arch_intel(void) +{ + uint64_t val; + + rdmsrl(MSR_MISC_ENABLE_IA32, val); + + val |= 1 << 16; + + wrmsrl(MSR_MISC_ENABLE_IA32, val); + +} + +static void deinit_arch_intel(void) +{ + // ?? +} + +/* TODO: Intel P-states require sampling at intervals... */ +static uint8_t get_pstate_intel(void) +{ + uint8_t pstate; + + // This should read the HW... + pstate=get_cpu_var(core_state).cur_pstate; + put_cpu_var(core_state); + return pstate; +} + +static void set_pstate_intel(uint8_t p) +{ + uint64_t val = ((uint64_t)p) << 8; + + /* ...Intel IDA (dynamic acceleration) + if (c->no_turbo && !c->turbo_disabled) { + val |= 1 << 32; + } + */ + + wrmsrl(MSR_PERF_CTL_IA32, val); + + get_cpu_var(core_state).cur_pstate = p; + put_cpu_var(core_state); +} + + +static uint8_t get_min_pstate_intel(void) +{ + struct turbo_mode_info_reg_intel t; + + rdmsrl(MSR_PLATFORM_INFO_IA32, t.val); + + return t.reg.min_ratio; +} + + + +static uint8_t get_max_pstate_intel (void) +{ + struct turbo_mode_info_reg_intel t; + + rdmsrl(MSR_PLATFORM_INFO_IA32, t.val); + + return t.reg.max_noturbo_ratio; +} + +static struct pstate_core_funcs intel_funcs = +{ + .arch_init = init_arch_intel, + .arch_deinit = deinit_arch_intel, + .get_pstate = get_pstate_intel, + .set_pstate = set_pstate_intel, + .get_max_pstate = get_max_pstate_intel, + .get_min_pstate = get_min_pstate_intel, +}; + + + +/*********************************************** + Arch determination and setup +***********************************************/ + +static inline void cpuid_string (uint32_t id, uint32_t dest[4]) +{ + asm volatile("cpuid" + :"=a"(*dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3)) + :"a"(id)); +} + + +static int get_cpu_vendor (char name[13]) +{ + uint32_t dest[4]; + uint32_t maxid; + + cpuid_string(0,dest); + maxid=dest[0]; + ((uint32_t*)name)[0]=dest[1]; + ((uint32_t*)name)[1]=dest[3]; + ((uint32_t*)name)[2]=dest[2]; + name[12]=0; + + return maxid; +} + + +static int is_intel (void) +{ + char name[13]; + get_cpu_vendor(name); + return !strcmp(name,"GenuineIntel"); +} + + +static int is_amd (void) +{ + char name[13]; + get_cpu_vendor(name); + return !strcmp(name,"AuthenticAMD"); +} + +static int pstate_arch_setup(void) +{ + + if (is_amd()) { + machine_state.arch = AMD; + machine_state.funcs = &amd_funcs; + machine_state.supports_pstates = supports_pstates_amd(); + INFO("PSTATE: P-State initialized for AMD\n"); + } else if (is_intel()) { + machine_state.arch = INTEL; + machine_state.funcs = &intel_funcs; + machine_state.supports_pstates = supports_pstates_intel(); + INFO("PSTATE: P-State initialized for INTEL (Work in progress...)\n"); + return 0; + + } else { + machine_state.arch = OTHER; + machine_state.funcs = NULL; + machine_state.supports_pstates = 0; + INFO("PSTATE: P-state control: No support for direct control on this architecture\n"); + return 0; + } + + return 0; +} + + + +/****************************************************************** + Linux Interface +*****************************************************************/ + +#if 0 +// The purpose of the stub governor is the pretend to keep +// the processor at the maximum frequency, while we manipulate he +// processor ccre directly +static int governor_run(struct cpufreq_policy *policy, unsigned int event) +{ + switch (event) { + case CPUFREQ_GOV_START: + case CPUFREQ_GOV_STOP: + cpu_freq_driver_target(policy, policy->max_freq); + + case CPUFREQ_GOV_LIMITS: + } +} + +static struct cpufreq_governor stub_governor = +{ + .name="PALACIOS_STUB", + .governor=governor_run, + .owner=.THIS_MODULE, +} + +static void linux_init(void) +{ + // get_policy + // + // change to userspace governor - or change to our do nothing governor? (call set_speed) + // stash the old governor + // tell governor to do max freq + +} + +static void linux_deinit(void) +{ +} + +static uint8_t linux_get_pstate(void) +{ + return 0; +} + +static void linux_set_pstate(uint8_t p) +{ +} + +static void linux_restore_defaults(void) +{ +} + +#endif + + +/****************************************************************** + Generic Interface as provided to Palacios and to the rest of the + module +******************************************************************/ + +static void init_core(void) +{ + unsigned cpu; + struct cpufreq_policy *p; + + + DEBUG("P-State Core Init\n"); + + get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL; + get_cpu_var(core_state).cur_pstate = 0; + + if (machine_state.funcs) { + get_cpu_var(core_state).min_pstate = machine_state.funcs->get_min_pstate(); + get_cpu_var(core_state).max_pstate = machine_state.funcs->get_max_pstate(); + } else { + get_cpu_var(core_state).min_pstate = 0; + get_cpu_var(core_state).max_pstate = 0; + } + + + cpu = get_cpu(); put_cpu(); + + p = cpufreq_cpu_get(cpu); + + if (!p) { + get_cpu_var(core_state).have_cpufreq = 0; + get_cpu_var(core_state).min_freq_khz=0; + get_cpu_var(core_state).max_freq_khz=0; + get_cpu_var(core_state).cur_freq_khz=0; + } else { + get_cpu_var(core_state).have_cpufreq = 1; + get_cpu_var(core_state).min_freq_khz=p->min; + get_cpu_var(core_state).max_freq_khz=p->max; + get_cpu_var(core_state).cur_freq_khz=p->cur; + cpufreq_cpu_put(p); + } + + put_cpu_var(core_state); + +} + + +void palacios_pstate_ctrl_release(void); + + +static void deinit_core(void) +{ + DEBUG("P-State Core Deinit\n"); + palacios_pstate_ctrl_release(); +} + + + +void palacios_pstate_ctrl_get_chars(struct v3_cpu_pstate_chars *c) +{ + memset(c,0,sizeof(struct v3_cpu_pstate_chars)); + + + c->features = V3_PSTATE_INTERNAL_CONTROL; + + if (get_cpu_var(core_state).have_cpufreq) { + c->features |= V3_PSTATE_EXTERNAL_CONTROL; + } + + if (machine_state.arch==AMD || machine_state.arch==INTEL) { + c->features |= V3_PSTATE_DIRECT_CONTROL; + } + c->cur_mode = get_cpu_var(core_state).mode; + c->min_pstate = get_cpu_var(core_state).min_pstate; + c->max_pstate = get_cpu_var(core_state).max_pstate; + c->cur_pstate = get_cpu_var(core_state).cur_pstate; + c->min_freq_khz = get_cpu_var(core_state).min_freq_khz; + c->max_freq_khz = get_cpu_var(core_state).max_freq_khz; + c->cur_freq_khz = get_cpu_var(core_state).cur_freq_khz; + + put_cpu_var(core_state); + + + +} + + +uint8_t palacios_pstate_ctrl_get_pstate(void) +{ + if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) { + put_cpu_var(core_state); + return machine_state.funcs->get_pstate(); + } else { + put_cpu_var(core_state); + return 0; + } +} + +void palacios_pstate_ctrl_set_pstate(uint8_t p) +{ + if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) { + put_cpu_var(core_state); + machine_state.funcs->set_pstate(p); + } +} + + +void palacios_pstate_ctrl_set_pstate_wrapper(void *p) +{ + palacios_pstate_ctrl_set_pstate((uint8_t)(uint64_t)p); +} + +uint64_t palacios_pstate_ctrl_get_freq(void) +{ + if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) { + put_cpu_var(core_state); + ERROR("Unimplemented get freq\n"); + return 0; + } else { + put_cpu_var(core_state); + return 0; + } +} + +void palacios_pstate_ctrl_set_freq(uint64_t p) +{ + if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) { + put_cpu_var(core_state); + ERROR("Unimplemented set freq\n"); + } + put_cpu_var(core_state); + +} + + +static void switch_to_external(void) +{ + if (!(get_cpu_var(core_state).have_cpufreq)) { + put_cpu_var(core_state); + ERROR("No cpufreq - cannot switch to external...\n"); + return; + } + put_cpu_var(core_state); + + ERROR("Unimplemented switch to external...\n"); +} + +static void switch_to_direct(void) +{ + if (get_cpu_var(core_state).have_cpufreq) { + put_cpu_var(core_state); + ERROR("Unimplemented: switch to direct on machine with cpu freq\n"); + // The implementation would set the policy and governor to peg cpu + // regardless of load + } + + if (machine_state.funcs && machine_state.funcs->arch_init) { + get_cpu_var(core_state).mode=V3_PSTATE_DIRECT_CONTROL; + + machine_state.funcs->arch_init(); + + put_cpu_var(core_state); + } + +} + + +static void switch_to_internal(void) +{ + if (get_cpu_var(core_state).have_cpufreq) { + put_cpu_var(core_state); + ERROR("Unimplemented: switch to internal on machine with cpu freq\n"); + return; + // The implementation would set the policy and governor to peg cpu + // regardless of load - exactly like direct + } + + get_cpu_var(core_state).mode=V3_PSTATE_INTERNAL_CONTROL; + + put_cpu_var(core_state); + + return; +} + + +static void switch_from_external(void) +{ + if (!(get_cpu_var(core_state).have_cpufreq)) { + put_cpu_var(core_state); + ERROR("No cpufreq - how did we get here... external...\n"); + return; + } + + ERROR("Unimplemented switch from external...\n"); + + get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL; + + put_cpu_var(core_state); + +} + +static void switch_from_direct(void) +{ + + if (get_cpu_var(core_state).have_cpufreq) { + put_cpu_var(core_state); + ERROR("Unimplemented: switch from direct on machine with cpu freq - will just pretend to do so\n"); + // The implementation would switch back to default policy and governor + } + + get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL; + + + machine_state.funcs->set_pstate(get_cpu_var(core_state).min_pstate); + + machine_state.funcs->arch_deinit(); + + put_cpu_var(core_state); +} + + +static void switch_from_internal(void) +{ + if (get_cpu_var(core_state).have_cpufreq) { + put_cpu_var(core_state); + ERROR("Unimplemented: switch from internal on machine with cpu freq - will just pretend to do so\n"); + // The implementation would switch back to default policy and governor + } + + get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL; + + put_cpu_var(core_state); + + return; +} + + + +void palacios_pstate_ctrl_acquire(uint32_t type) +{ + if (get_cpu_var(core_state).mode != V3_PSTATE_HOST_CONTROL) { + palacios_pstate_ctrl_release(); + } + + put_cpu_var(core_state); + + switch (type) { + case V3_PSTATE_EXTERNAL_CONTROL: + switch_to_external(); + break; + case V3_PSTATE_DIRECT_CONTROL: + switch_to_direct(); + break; + case V3_PSTATE_INTERNAL_CONTROL: + switch_to_internal(); + break; + default: + ERROR("Unknown pstate control type %u\n",type); + break; + } + +} + +// Wrappers for xcalls +static void palacios_pstate_ctrl_acquire_external(void) +{ + palacios_pstate_ctrl_acquire(V3_PSTATE_EXTERNAL_CONTROL); +} + +static void palacios_pstate_ctrl_acquire_direct(void) +{ + palacios_pstate_ctrl_acquire(V3_PSTATE_DIRECT_CONTROL); +} + + +void palacios_pstate_ctrl_release(void) +{ + + if (get_cpu_var(core_state).mode == V3_PSTATE_HOST_CONTROL) { + put_cpu_var(core_state); + return; + } + + switch (get_cpu_var(core_state).mode) { + case V3_PSTATE_EXTERNAL_CONTROL: + switch_from_external(); + break; + case V3_PSTATE_DIRECT_CONTROL: + switch_from_direct(); + break; + case V3_PSTATE_INTERNAL_CONTROL: + switch_from_internal(); + break; + default: + ERROR("Unknown pstate control type %u\n",core_state.mode); + break; + } + + put_cpu_var(core_state); + +} + + +static void update_hw_pstate(void *arg) +{ + if (machine_state.funcs && machine_state.funcs->get_pstate) { + get_cpu_var(core_state).cur_hw_pstate = machine_state.funcs->get_pstate(); + put_cpu_var(core_state); + } else { + get_cpu_var(core_state).cur_hw_pstate = 0; + put_cpu_var(core_state); + } +} + + +/*************************************************************************** + PROC Interface to expose state +***************************************************************************/ + +static int pstate_show(struct seq_file * file, void * v) +{ + unsigned int cpu; + unsigned int numcpus = num_online_cpus(); + + seq_printf(file, "V3VEE DVFS Status\n\n"); + + for (cpu=0;cpucur_hw_pstate, + s->mode==V3_PSTATE_HOST_CONTROL ? "host" : + s->mode==V3_PSTATE_EXTERNAL_CONTROL ? "external" : + s->mode==V3_PSTATE_DIRECT_CONTROL ? "direct" : + s->mode==V3_PSTATE_INTERNAL_CONTROL ? "internal" : "UNKNOWN"); + if (s->have_cpufreq) { + seq_printf(file," external "); + } + if (machine_state.arch==AMD || machine_state.arch==INTEL) { + seq_printf(file,"direct "); + } + seq_printf(file,"internal ] "); + if (s->mode==V3_PSTATE_EXTERNAL_CONTROL) { + seq_printf(file,"(min=%llu max=%llu cur=%llu) ", s->min_freq_khz, s->max_freq_khz, s->cur_freq_khz); + } + if (s->mode==V3_PSTATE_DIRECT_CONTROL) { + seq_printf(file,"(min=%u max=%u cur=%u) ", (uint32_t)s->min_pstate, (uint32_t)s->max_pstate, (uint32_t)s->cur_pstate); + } + seq_printf(file,"\n"); + } + return 0; +} + +static int pstate_open(struct inode * inode, struct file * file) +{ + return single_open(file, pstate_show, NULL); +} + + +static struct file_operations pstate_fops = { + .owner = THIS_MODULE, + .open = pstate_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +int pstate_proc_setup(void) +{ + struct proc_dir_entry *proc; + + proc = create_proc_entry("v3-dvfs",0444, palacios_get_procdir()); + + if (!proc) { + ERROR("Failed to create proc entry for p-state control\n"); + return -1; + } + + proc->proc_fops = &pstate_fops; + + return 0; +} + +void pstate_proc_teardown(void) +{ + remove_proc_entry("v3-dvfs",palacios_get_procdir()); +} + +/******************************************************************** + User interface (ioctls) +********************************************************************/ + +static int dvfs_ctrl(unsigned int cmd, unsigned long arg) +{ + struct v3_dvfs_ctrl_request r; + + if (copy_from_user(&r,(void __user*)arg,sizeof(struct v3_dvfs_ctrl_request))) { + ERROR("Failed to copy DVFS request from user\n"); + return -EFAULT; + } + + if (r.pcore >= num_online_cpus()) { + ERROR("Cannot apply DVFS request to pcore %u\n",r.pcore); + return -EFAULT; + } + + switch (r.cmd) { + case V3_DVFS_ACQUIRE: { + switch (r.acq_type) { + case V3_DVFS_EXTERNAL: + palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_external,0); + return 0; + break; + case V3_DVFS_DIRECT: + palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_direct,0); + return 0; + break; + default: + ERROR("Unknown DVFS acquire type %u\n",r.acq_type); + return -EFAULT; + } + } + break; + case V3_DVFS_RELEASE: { + palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_release,0); + return 0; + } + break; + case V3_DVFS_SETFREQ: { + palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_freq,(void*)r.freq_khz); + return 0; + } + break; + case V3_DVFS_SETPSTATE: { + palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_pstate_wrapper,(void*)(uint64_t)r.pstate); + return 0; + } + default: { + ERROR("Unknown DVFS command %u\n",r.cmd); + return -EFAULT; + } + break; + } +} + + +void pstate_user_setup(void) +{ + add_global_ctrl(V3_DVFS_CTRL, dvfs_ctrl); +} + + +void pstate_user_teardown(void) +{ + remove_global_ctrl(V3_DVFS_CTRL); +} + +static struct v3_host_pstate_ctrl_iface hooks = { + .get_chars = palacios_pstate_ctrl_get_chars, + .acquire = palacios_pstate_ctrl_acquire, + .release = palacios_pstate_ctrl_release, + .set_pstate = palacios_pstate_ctrl_set_pstate, + .get_pstate = palacios_pstate_ctrl_get_pstate, + .set_freq = palacios_pstate_ctrl_set_freq, + .get_freq = palacios_pstate_ctrl_get_freq, +}; + + + +static int pstate_ctrl_init(void) +{ + unsigned int cpu; + unsigned int numcpus = num_online_cpus(); + + pstate_arch_setup(); + + for (cpu=0;cpu + * all rights reserved. + * + * Author: Kyle C. Hale + * Shiva Rao + * Peter Dinda + * + * This is free software. you are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + + + +#include + +// These functions are available for use within the module +// They affect the current core + +void palacios_pstate_ctrl_get_chars(struct v3_cpu_pstate_chars *c); + +void palacios_pstate_ctrl_acquire(uint32_t type); +void palacios_pstate_ctrl_release(void); + + +uint8_t palacios_pstate_ctrl_get_pstate(void); +void palacios_pstate_ctrl_set_pstate(uint8_t p); + +uint64_t palacios_pstate_ctrl_get_freq(void); +void palacios_pstate_ctrl_set_freq(uint64_t f_khz); + + +// This structure is how the user space commands us +struct v3_dvfs_ctrl_request { + enum {V3_DVFS_ACQUIRE, // Take control over a pcore from host + V3_DVFS_RELEASE, // Release control of a pcore to host + V3_DVFS_SETFREQ, // Set frequency of acquired pcore + V3_DVFS_SETPSTATE} cmd; // Set pstate of acquired pcore + enum {V3_DVFS_EXTERNAL, + V3_DVFS_DIRECT } acq_type; // External for setting freq using Linux + // Direct for setting pstate directly using module + uint32_t pcore; // Which core we mean + uint64_t freq_khz; // for setfreq + uint8_t pstate; // for setpstate +}; + +#endif diff --git a/linux_module/palacios-stubs.c b/linux_module/palacios-stubs.c index 5ef9c5c..ba9da58 100644 --- a/linux_module/palacios-stubs.c +++ b/linux_module/palacios-stubs.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -305,7 +306,7 @@ palacios_alloc(unsigned int size) { // this function is used extensively throughout palacios and the linux // module, both in places where interrupts are off and where they are on // a GFP_KERNEL call, when done with interrupts off can lead to DEADLOCK - if (irqs_disabled()) { + if (irqs_disabled() || in_atomic()) { return palacios_alloc_extended(size,GFP_ATOMIC,-1); } else { return palacios_alloc_extended(size,GFP_KERNEL,-1); @@ -357,7 +358,7 @@ palacios_paddr_to_vaddr( /** * Runs a function on the specified CPU. */ -static void +void palacios_xcall( int cpu_id, void (*fn)(void *arg), @@ -834,7 +835,7 @@ void palacios_used_fpu(void) struct thread_info *cur = current_thread_info(); // We assume we are not preemptible here... - cur->status |= TS_USEDFPU; + cur->status |= 1; clts(); // After this, FP Save should be handled by Linux if it // switches to a different task and that task uses FPU diff --git a/linux_module/palacios.h b/linux_module/palacios.h index f254522..1b5cf44 100644 --- a/linux_module/palacios.h +++ b/linux_module/palacios.h @@ -18,6 +18,9 @@ #define V3_ADD_PCI_HW_DEV 55 #define V3_ADD_PCI_USER_DEV 56 +#define V3_DVFS_CTRL 60 + + /* VM Specific IOCTLs */ #define V3_VM_CONSOLE_CONNECT 20 #define V3_VM_STREAM_CONNECT 21 @@ -163,6 +166,7 @@ void *palacios_valloc(unsigned int size); // use instead of vmalloc void palacios_vfree(void *); // use instead of vfree void *palacios_vaddr_to_paddr(void *vaddr); void *palacios_paddr_to_vaddr(void *paddr); +void palacios_xcall(int cpu_id, void (*fn)(void *arg), void *arg); void *palacios_create_and_start_kernel_thread(int (*fn)(void * arg), void *arg, char *thread_name); void *palacios_create_thread_on_cpu(int cpu_id, int (*fn)(void * arg), void *arg, char *thread_name); void palacios_start_thread(void *thread_ptr);