Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


P-State (DVFS) Enhancements
[palacios.git] / linux_module / iface-pstate-ctrl.c
1 /*
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2014, the V3VEE Project <http://www.v3vee.org>
11  * all rights reserved.
12  *
13  * Author: Kyle C. Hale <kh@u.northwestern.edu>
14  *         Shiva Rao <shiva.rao.717@gmail.com>
15  *         Peter Dinda <pdinda@northwestern.edu>
16  *
17  * This is free software.  you are permitted to use,
18  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
19  */
20
21 #include <linux/uaccess.h>
22 #include <linux/seq_file.h>
23 #include <linux/proc_fs.h>
24 #include <linux/cpufreq.h>
25 #include <linux/kernel.h>
26 #include <linux/kmod.h>
27 #include <linux/string.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/msr-index.h>
31
32 // Used to determine the appropriate pstates values on Intel
33 #include <linux/acpi.h>
34 #include <acpi/processor.h>
35
36 #include <interfaces/vmm_pstate_ctrl.h>
37
38 #include "palacios.h"
39 #include "iface-pstate-ctrl.h"
40
41 #include "linux-exts.h"
42
43 /*
44    This P-STATE control implementation includes:
45
46    - Direct control of Intel and AMD processor pstates
47    - External control of processor states via Linux (unimplemented)
48    - Internal control of processor states in Palacios (handoff from Linux)
49
50    Additionally, it provides a user-space interface for manipulating
51    p-state regardless of the host's functionality.  This includes
52    an ioctl for commanding the implementation and a /proc file for 
53    showing current status and capabilities.
54
55    What we mean by "pstate" here is the processor's internal
56    configuration.   For AMD, this is defined as being the same as
57    the ACPI-defined p-state.  For Intel, it is not.  There, it is the 
58    contents of the perf ctl MSR, which, often, is the frequency id 
59    and voltage id (the multipliers).
60
61 */
62
63
64 #define PALACIOS_GOVNAME "v3vee"
65 #define MAX_PATH_LEN     128
66 #define MAX_GOV_NAME_LEN 16
67
68
69 struct pstate_core_info {
70     // Here we have the notion of host control
71 #define V3_PSTATE_HOST_CONTROL 0
72     // and all the modes from the Palacios interface:
73     // V3_PSTATE_EXTERNAL_CONTROL
74     // V3_PSTATE_DIRECT_CONTROL
75     // V3_PSTATE_INTERNAL_CONTROL
76     uint32_t mode;
77
78     // Apply if we are under the DIRECT state
79     uint8_t cur_pstate;
80     uint8_t max_pstate;
81     uint8_t min_pstate;
82
83     uint8_t cur_hw_pstate;
84
85     // Apply if we are under the EXTERNAL state
86     uint64_t cur_freq_khz;
87     uint64_t max_freq_khz;
88     uint64_t min_freq_khz;
89
90     // Intel-specific
91     uint8_t prior_speedstep;
92     uint8_t turbo_disabled;
93     uint8_t no_turbo;
94
95     int have_cpufreq;
96
97     // This is where we stash Linux's governor when we make a mode switch
98     char * linux_governor;
99     // We have this so we can restore the original frequency when we started
100     uint64_t original_hz; 
101
102 };
103
104
105 static DEFINE_PER_CPU(struct pstate_core_info, core_state);
106
107
108
109 // These are used to assert DIRECT control over the core pstates
110 struct pstate_core_funcs {
111     void    (*arch_init)(void);
112     void    (*arch_deinit)(void);
113     uint64_t (*get_min_pstate)(void);
114     uint64_t (*get_max_pstate)(void);
115     uint64_t (*get_pstate)(void);
116     void    (*set_pstate)(uint64_t pstate);
117 };
118
119 struct pstate_machine_info {
120     enum {INTEL, AMD, OTHER } arch;
121     int supports_pstates;
122
123
124     // For AMD
125     int have_pstate;
126     int have_coreboost;
127     int have_feedback;  
128
129     // For Intel
130     int have_speedstep;
131     int have_opportunistic; // this means "Turbo Boost" or "IDA"
132     int have_policy_hint;
133     int have_hwp;       // hardware-controlled performance states
134     int have_hdc;       // hardware duty cycling
135     int have_mwait_ext; // mwait power extensions
136     int have_mwait_int; // mwait wakes on interrupt
137
138     // for both
139     int have_pstate_hw_coord;  // mperf/aperf
140
141     // used for DIRECT control
142     struct pstate_core_funcs *funcs;
143
144 };
145
146 static struct pstate_machine_info machine_state;
147
148
149 /****************************************************
150   AMD  DIRECT CONTROL
151  ***************************************************/
152
153 /* AMD Programmer's Manual Vol 2 (Rev 3, 2013), Sec. 17.1, pp.557 */
154 #define MSR_PSTATE_LIMIT_REG_AMD 0xc0010061
155 #define MSR_PSTATE_CTL_REG_AMD   0xc0010062
156 #define MSR_PSTATE_STAT_REG_AMD  0xc0010063
157
158 struct p_state_limit_reg_amd {
159     union {
160         uint64_t val;
161         struct {
162             uint8_t  pstate_limit : 4; /* lowest P-state value (highest perf.) supported currently (this can change at runtime) */
163             uint8_t  pstate_max   : 4; /* highest P-state value supported  (lowest perf) */
164             uint64_t rsvd         : 56;
165         } reg;
166     } __attribute__((packed));
167 } __attribute__((packed));
168
169
170 struct p_state_stat_reg_amd {
171     union {
172         uint64_t val;
173         struct {
174             uint8_t  pstate  : 4;
175             uint64_t rsvd    : 60;
176         } reg;
177     } __attribute__((packed));
178 } __attribute__((packed));
179
180
181 struct p_state_ctl_reg_amd {
182     union {
183         uint64_t val;
184         struct {
185             uint8_t  cmd  : 4;
186             uint64_t rsvd : 60;
187         } reg;
188     } __attribute__((packed));
189 } __attribute__((packed));
190
191
192 /* CPUID Fn8000_0007_EDX[HwPstate(7)] = 1 */
193 static uint8_t supports_pstates_amd (void)
194 {
195     uint32_t eax, ebx, ecx, edx;
196
197     cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
198     machine_state.have_pstate = !!(edx & (1 << 7));
199     machine_state.have_coreboost = !!(edx & (1<<9));
200     machine_state.have_feedback = !!(edx & (1<<11));
201
202     cpuid(0x6, &eax, &ebx, &ecx, &edx);
203     machine_state.have_pstate_hw_coord =  !!(ecx & 1); 
204
205     INFO("P-State: AMD: Pstates=%d Coreboost=%d Feedback=%d PstateHWCoord=%d\n",
206             machine_state.have_pstate, 
207             machine_state.have_coreboost, 
208             machine_state.have_feedback,
209             machine_state.have_pstate_hw_coord);
210
211     return machine_state.have_pstate;
212
213
214 }
215
216
217 static void init_arch_amd(void)
218 {
219     /* KCH: nothing to do here */
220 }
221
222
223 static void deinit_arch_amd(void)
224 {
225     /* KCH: nothing to do here */
226 }
227
228
229 static uint64_t get_pstate_amd(void) 
230 {
231     struct p_state_stat_reg_amd pstat;
232
233     rdmsrl(MSR_PSTATE_STAT_REG_AMD, pstat.val);
234
235     get_cpu_var(core_state).cur_pstate=pstat.reg.pstate;
236     put_cpu_var(core_state);
237
238     return pstat.reg.pstate;
239 }
240
241
242 static void set_pstate_amd(uint64_t p)
243 {
244     struct p_state_ctl_reg_amd pctl;
245     pctl.val = 0;
246     pctl.reg.cmd = p;
247
248     wrmsrl(MSR_PSTATE_CTL_REG_AMD, pctl.val);
249
250     get_cpu_var(core_state).cur_pstate=p;
251     put_cpu_var(core_state);
252 }
253
254
255 /*
256  * NOTE: HW may change this value at runtime
257  */
258 static uint64_t get_max_pstate_amd(void)
259 {
260     struct p_state_limit_reg_amd plimits;
261
262     rdmsrl(MSR_PSTATE_LIMIT_REG_AMD, plimits.val);
263
264     return plimits.reg.pstate_max;
265 }
266
267
268 static uint64_t get_min_pstate_amd(void)
269 {
270     struct p_state_limit_reg_amd plimits;
271
272     rdmsrl(MSR_PSTATE_LIMIT_REG_AMD, plimits.val);
273
274     return plimits.reg.pstate_limit;
275 }
276
277
278 static struct pstate_core_funcs amd_funcs =
279 {
280     .arch_init        = init_arch_amd,
281     .arch_deinit      = deinit_arch_amd,
282     .get_pstate       = get_pstate_amd,
283     .set_pstate       = set_pstate_amd,
284     .get_max_pstate   = get_max_pstate_amd,
285     .get_min_pstate   = get_min_pstate_amd,
286 };
287
288
289
290 /***********************************************************
291   INTEL DIRECT CONTROL
292  **********************************************************/
293
294
295 /*
296    This implementation uses SpeedStep, but does check
297    to see if the other features (MPERF/APERF, Turbo/IDA, HWP)
298    are available.
299    */
300
301 /* Intel System Programmer's Manual Vol. 3B, 14-2 */
302 #define MSR_MPERF_IA32         0x000000e7
303 #define MSR_APERF_IA32         0x000000e8
304 #define MSR_MISC_ENABLE_IA32   0x000001a0
305 #define MSR_NHM_TURBO_RATIO_LIMIT   0x000001ad
306 #define MSR_PLATFORM_INFO_IA32 0x000000ce
307 #define MSR_PERF_CTL_IA32      0x00000199
308 #define MSR_PERF_STAT_IA32     0x00000198
309 #define MSR_ENERY_PERF_BIAS_IA32 0x000001b0
310
311
312 /* Note that the actual  meaning of the pstate
313    in the control and status registers is actually
314    implementation dependent, unlike AMD.   The "official"
315    way to figure it out the mapping from pstate to 
316    these values is via ACPI.  What is written in the register
317    is an "id" of an operation point
318
319    "Often", the 16 bit field consists of a high order byte
320    which is the frequency (the multiplier) and the low order
321    byte is the voltage. 
322    */
323 // MSR_PERF_CTL_IA32  r/w
324 struct perf_ctl_reg_intel {
325     union {
326         uint64_t val;
327         struct {
328             // This is the target
329             // Note, not the ACPI pstate, but
330             // Intel's notion of pstate is that it's opaque
331             // for lots of implementations it seems to be
332             // frequency_id : voltage_id
333             // where frequency_id is typically the multiplier
334             uint16_t pstate                 : 16;
335             uint16_t reserved               : 16;
336             // set to 1 to *disengage* dynamic acceleration
337             // Note that "IDA" and "Turbo" use the same interface
338             uint16_t dynamic_accel_disable  : 1;
339             uint32_t reserved2              : 31;
340         } reg;
341     } __attribute__((packed));
342 } __attribute__((packed));
343
344 // MSR_PERF_STAT_IA32 r
345 struct perf_stat_reg_intel {
346     union {
347         uint64_t val;
348         struct {
349             // this is the current
350             uint16_t pstate                 : 16;
351             uint64_t reserved               : 48;
352         } reg;
353     } __attribute__((packed));
354 } __attribute__((packed));
355
356 // MSR_ENERGY_PERF_BIAS_IA32 r/w
357 struct enery_perf_bias_reg_intel {
358     union {
359         uint64_t val;
360         struct {
361             // this is the current
362             uint8_t  policy_hint            : 4;
363             uint64_t reserved               : 60;
364         } reg;
365     } __attribute__((packed));
366 } __attribute__((packed));
367
368 // MSR_PLATFORM_INFO
369 struct turbo_mode_info_reg_intel {
370     union {
371         uint64_t val;
372         struct {
373             uint8_t  rsvd0                  : 8;
374             uint8_t  max_noturbo_ratio      : 8;
375             uint8_t  rsvd1                  : 7;
376             uint8_t  ppin_cap               : 1;
377             uint8_t  rsvd2                  : 4;
378             uint8_t  ratio_limit            : 1; 
379             uint8_t  tdc_tdp_limit          : 1;
380             uint16_t rsvd3                  : 10;
381             uint8_t  min_ratio              : 8;
382             uint16_t rsvd4                  : 16;
383         } reg;
384     } __attribute__((packed));
385 } __attribute__((packed));
386
387 // This replicates the critical information in Linux's struct acpi_processor_px
388 // To make it easier to port to other OSes.    
389 struct intel_pstate_info {
390     uint64_t freq;  // KHz
391     uint64_t ctrl;  // What to write into the _CTL MSR to get this
392 };
393
394 // The internal array will be used if we cannot build the table locally
395 static struct intel_pstate_info *intel_pstate_to_ctrl_internal=0;
396 static int intel_num_pstates_internal=0;
397
398 // These will either point to the internal array or to a constructed array
399 static struct intel_pstate_info *intel_pstate_to_ctrl=0;
400 static int intel_num_pstates=0;
401
402
403 /* CPUID.01:ECX.AES(7) */
404 static uint8_t supports_pstates_intel(void)
405 {
406     /* NOTE: CPUID.06H:ECX.SETBH[bit 3] is set and it also implies the presence of a new architectural MSR called IA32_ENERGY_PERF_BIAS (1B0H).
407     */
408     uint32_t eax, ebx, ecx, edx;
409
410     cpuid(0x1, &eax, &ebx, &ecx, &edx);
411     machine_state.have_speedstep =  !!(ecx & (1 << 7));
412
413     cpuid(0x6, &eax, &ebx, &ecx, &edx);
414     machine_state.have_pstate_hw_coord =  !!(ecx & 1); // ?
415     machine_state.have_opportunistic =  !!(eax & 1<<1);
416     machine_state.have_policy_hint = !!(ecx & 1<<3);
417     machine_state.have_hwp = !!(eax & 1<<7);
418     machine_state.have_hdc = !!(eax & 1<<13);
419
420     cpuid(0x5, &eax, &ebx, &ecx, &edx);
421     machine_state.have_mwait_ext =  !!(ecx & 1);
422     machine_state.have_mwait_int =  !!(ecx & 1<<1);
423
424
425     INFO("P-State: Intel: Speedstep=%d, PstateHWCoord=%d, Opportunistic=%d PolicyHint=%d HWP=%d HDC=%d, MwaitExt=%d MwaitInt=%d \n",
426             machine_state.have_speedstep, 
427             machine_state.have_pstate_hw_coord, 
428             machine_state.have_opportunistic,
429             machine_state.have_policy_hint,
430             machine_state.have_hwp,
431             machine_state.have_hdc,
432             machine_state.have_mwait_ext,
433             machine_state.have_mwait_int );
434
435
436     if (machine_state.have_speedstep) {
437         uint32_t i;
438         // Build mapping table (from "pstate" (0..) to ctrl value for MSR
439         if (!(get_cpu_var(processors)) || !(get_cpu_var(processors)->performance) ) { 
440             put_cpu_var(processors);
441             // no acpi...  revert to internal table
442             intel_pstate_to_ctrl=intel_pstate_to_ctrl_internal;
443             intel_num_pstates=intel_num_pstates_internal;
444         } else {
445             intel_num_pstates = get_cpu_var(processors)->performance->state_count;
446             if (intel_num_pstates) { 
447                 intel_pstate_to_ctrl = palacios_alloc(sizeof(struct intel_pstate_info)*intel_num_pstates);
448                 if (!intel_pstate_to_ctrl) { 
449                     ERROR("P-State: Cannot allocate space for mapping...\n");
450                     intel_num_pstates=0;
451                 }
452                 for (i=0;i<intel_num_pstates;i++) { 
453                     intel_pstate_to_ctrl[i].freq = get_cpu_var(processors)->performance->states[i].core_frequency*1000;
454                     intel_pstate_to_ctrl[i].ctrl = get_cpu_var(processors)->performance->states[i].control;
455                 }
456                     
457             } else {
458                 ERROR("P-State: Strange, machine has ACPI DVFS but no states...\n");
459             }
460         }
461         put_cpu_var(processors);
462         INFO("P-State: Intel - State Mapping (%u states) follows\n",intel_num_pstates);
463         for (i=0;i<intel_num_pstates;i++) {
464             INFO("P-State: Intel Mapping %u:  freq=%llu  ctrl=%llx\n",
465                  i, intel_pstate_to_ctrl[i].freq,intel_pstate_to_ctrl[i].ctrl);
466         }
467     } else {
468         INFO("P-State: Intel:  No speedstep here\n");
469     }
470         
471
472     return machine_state.have_speedstep;
473 }
474
475
476 static void init_arch_intel(void)
477 {
478     uint64_t val;
479
480     rdmsrl(MSR_MISC_ENABLE_IA32, val);
481
482     //INFO("P-State: prior ENABLE=%llx\n",val);
483
484     // store prior speedstep setting
485     get_cpu_var(core_state).prior_speedstep=(val >> 16) & 0x1;
486     put_cpu_var(core_state);
487
488     // enable speedstep (probably already on)
489     val |= 1 << 16;
490     wrmsrl(MSR_MISC_ENABLE_IA32, val);
491
492     //INFO("P-State: write ENABLE=%llx\n",val);
493
494 }
495
496 static void deinit_arch_intel(void)
497 {
498     uint64_t val;
499
500     rdmsrl(MSR_MISC_ENABLE_IA32, val);
501
502     //INFO("P-State: deinit: ENABLE=%llx\n",val);
503
504     val &= ~(1ULL << 16);
505     val |= get_cpu_var(core_state).prior_speedstep << 16;
506     put_cpu_var(core_state);
507
508     wrmsrl(MSR_MISC_ENABLE_IA32, val);
509
510     //INFO("P-state: deinit ENABLE=%llx\n",val);
511
512 }
513
514 /* TODO: Intel P-states require sampling at intervals... */
515 static uint64_t get_pstate_intel(void)
516 {
517     uint64_t val;
518
519     rdmsrl(MSR_PERF_STAT_IA32,val);
520
521     //INFO("P-State: Get: 0x%llx\n", val);
522
523     // should check if turbo is active, in which case 
524     // this value is not the whole story
525
526     return val;
527 }
528
529 static void set_pstate_intel(uint64_t p)
530 {
531     uint64_t val;
532     uint64_t ctrl;
533
534     if (intel_num_pstates==0) { 
535         return ;
536     } else {
537         if (p>=intel_num_pstates) { 
538             p=intel_num_pstates-1;
539         }
540     }
541
542     ctrl=intel_pstate_to_ctrl[p].ctrl;
543
544     /* ...Intel IDA (dynamic acceleration)
545        if (c->no_turbo && !c->turbo_disabled) {
546        val |= 1 << 32;
547        }
548        */
549     // leave all bits along expect for the likely
550     // fid bits
551
552     rdmsrl(MSR_PERF_CTL_IA32, val);
553     INFO("P-State: Pre-Set: 0x%llx\n", val);
554
555     val &= ~0xffffULL;
556     val |= ctrl & 0xffffULL;
557
558     INFO("P-State: Set: 0x%llx\n", val);
559
560     wrmsrl(MSR_PERF_CTL_IA32, val);
561
562     get_cpu_var(core_state).cur_pstate = p;
563     put_cpu_var(core_state);
564 }
565
566
567 static uint64_t get_min_pstate_intel(void)
568 {
569     return 0;
570 }
571
572
573
574 static uint64_t get_max_pstate_intel (void)
575 {
576     if (intel_num_pstates==0) { 
577         return 0;
578     } else {
579         return intel_num_pstates-1;
580     }
581 }
582
583 static struct pstate_core_funcs intel_funcs =
584 {
585     .arch_init        = init_arch_intel,
586     .arch_deinit      = deinit_arch_intel,
587     .get_pstate       = get_pstate_intel,
588     .set_pstate       = set_pstate_intel,
589     .get_max_pstate   = get_max_pstate_intel,
590     .get_min_pstate   = get_min_pstate_intel,
591 };
592
593
594
595 /***********************************************
596   Arch determination and setup
597  ***********************************************/
598
599 static inline void cpuid_string (uint32_t id, uint32_t dest[4]) 
600 {
601     asm volatile("cpuid"
602             :"=a"(*dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3))
603             :"a"(id));
604 }
605
606
607 static int get_cpu_vendor (char name[13])
608 {
609     uint32_t dest[4];
610     uint32_t maxid;
611
612     cpuid_string(0,dest);
613     maxid=dest[0];
614     ((uint32_t*)name)[0]=dest[1];
615     ((uint32_t*)name)[1]=dest[3];
616     ((uint32_t*)name)[2]=dest[2];
617     name[12]=0;
618
619     return maxid;
620 }
621
622
623 static int is_intel (void)
624 {
625     char name[13];
626     get_cpu_vendor(name);
627     return !strcmp(name,"GenuineIntel");
628 }
629
630
631 static int is_amd (void)
632 {
633     char name[13];
634     get_cpu_vendor(name);
635     return !strcmp(name,"AuthenticAMD");
636 }
637
638 static int pstate_arch_setup(void)
639 {
640
641     if (is_amd()) {
642         machine_state.arch = AMD;
643         machine_state.funcs = &amd_funcs;
644         machine_state.supports_pstates = supports_pstates_amd();
645         INFO("PSTATE: P-State initialized for AMD\n");
646     } else if (is_intel()) {
647         machine_state.arch  = INTEL;
648         machine_state.funcs = &intel_funcs;
649         machine_state.supports_pstates = supports_pstates_intel();
650         INFO("PSTATE: P-State initialized for INTEL (Work in progress...)\n");
651         return 0;
652
653     } else {
654         machine_state.arch = OTHER;
655         machine_state.funcs = NULL;
656         machine_state.supports_pstates = 0;
657         INFO("PSTATE: P-state control: No support for direct control on this architecture\n");
658         return 0;
659     }
660
661     return 0;
662 }
663
664
665
666 /******************************************************************
667   Linux Interface
668  *****************************************************************/
669
670
671 /* 
672  * This stub governor is simply a placeholder for preventing 
673  * frequency changes from the Linux side. For now, we simply leave
674  * the frequency as is when we acquire control. 
675  */
676 static int governor_run(struct cpufreq_policy *policy, unsigned int event)
677 {
678
679     switch (event) {
680         /* we can't use cpufreq_driver_target here as it can result
681          * in a circular dependency, so we'll just do nothing.
682          */
683         case CPUFREQ_GOV_START:
684         case CPUFREQ_GOV_STOP:
685         case CPUFREQ_GOV_LIMITS:
686             /* do nothing */
687             break;
688         default:
689             ERROR("Undefined governor command\n");
690             return -1;
691     }                           
692
693     return 0;
694 }
695
696
697 static struct cpufreq_governor stub_governor = 
698 {
699     .name = PALACIOS_GOVNAME,
700     .governor = governor_run,
701     .owner = THIS_MODULE,
702 };
703
704
705 static inline void pstate_register_linux_governor(void)
706 {
707     cpufreq_register_governor(&stub_governor);
708 }
709
710
711 static inline void pstate_unregister_linux_governor(void)
712 {
713     cpufreq_unregister_governor(&stub_governor);
714 }
715
716
717 static int get_current_governor(char **buf, unsigned int cpu)
718 {
719     struct cpufreq_policy * policy = palacios_alloc(sizeof(struct cpufreq_policy));
720     char * govname = NULL;
721
722     if (!policy) {
723         ERROR("could not allocate cpufreq_policy\n");
724         return -1;
725     }
726         
727     if (cpufreq_get_policy(policy, cpu) != 0) {
728         ERROR("Could not get current cpufreq policy\n");
729         goto out_err;
730     }
731
732     /* We're in interrupt context, should probably not wait here */
733     govname = palacios_alloc(MAX_GOV_NAME_LEN);
734     if (!govname) {
735         ERROR("Could not allocate space for governor name\n");
736         goto out_err;
737     }
738
739     strncpy(govname, policy->governor->name, MAX_GOV_NAME_LEN);
740
741     get_cpu_var(core_state).linux_governor = govname;
742     put_cpu_var(core_state);
743
744     *buf = govname;
745
746     palacios_free(policy);
747
748     return 0;
749
750 out_err:
751     palacios_free(policy);
752     return -1;
753 }
754
755
756 /* passed to the userspacehelper interface for cleanup */
757 static void gov_switch_cleanup(struct subprocess_info * s)
758 {
759     palacios_free(s->argv[2]);
760     palacios_free(s->argv);
761 }
762
763
764 /* 
765  * Switch governors
766  * @s - the governor to switch to 
767  */
768 static int governor_switch(char * s, unsigned int cpu)
769 {
770     char * path_str = NULL;
771     char ** argv = NULL; 
772
773     static char * envp[] = {
774         "HOME=/",
775         "TERM=linux",
776         "PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL };
777
778
779     argv = palacios_alloc(4*sizeof(char*));
780     if (!argv) {
781         ERROR("Couldn't allocate argv struct\n");
782         return -1;
783     }
784
785     path_str = palacios_alloc(MAX_PATH_LEN);
786     if (!path_str) {
787         ERROR("Couldn't allocate path string\n");
788         goto out_freeargv;
789     }
790     memset(path_str, 0, MAX_PATH_LEN);
791
792     snprintf(path_str, MAX_PATH_LEN, "echo %s > /sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor", s, cpu);
793
794     argv[0] = "/bin/sh";
795     argv[1] = "-c";
796     argv[2] = path_str;
797     argv[3] = NULL;
798
799     /* KCH: we can't wait here to actually see if we succeeded, we're in interrupt context */
800     return call_usermodehelper_fns("/bin/sh", argv, envp, UMH_NO_WAIT, NULL, gov_switch_cleanup, NULL);
801
802 out_freeargv:
803     palacios_free(argv);
804     return -1;
805 }
806
807
808 static inline void free_linux_governor(void)
809 {
810     palacios_free(get_cpu_var(core_state).linux_governor);
811     put_cpu_var(core_state);
812 }
813
814
815 static int linux_setup_palacios_governor(void)
816 {
817     char * gov;
818     unsigned int cpu = get_cpu();
819
820     /* KCH:  we assume the v3vee governor is already 
821      * registered with kernel by this point 
822      */
823
824     if (get_current_governor(&gov, cpu) < 0) {
825         ERROR("Could not get current governor\n");
826         return -1;
827     }
828
829     DEBUG("saving current governor (%s)\n", gov);
830
831     get_cpu_var(core_state).linux_governor = gov;
832     put_cpu_var(core_state);
833     
834     DEBUG("setting the new governor (%s)\n", PALACIOS_GOVNAME);
835
836     /* set the new one to ours */
837     if (governor_switch(PALACIOS_GOVNAME, cpu) < 0) {
838         ERROR("Could not set governor to (%s)\n", PALACIOS_GOVNAME);
839         return -1;
840     }
841
842     return 0;
843 }
844
845
846
847 static int linux_get_pstate(void)
848 {
849     struct cpufreq_policy * policy = NULL;
850     struct cpufreq_frequency_table *table;
851     int cpu = get_cpu(); 
852     unsigned int i = 0;
853     unsigned int count = 0;
854
855     policy = palacios_alloc(sizeof(struct cpufreq_policy));
856     if (!policy) {
857         ERROR("Could not allocate policy struct\n");
858         return -1;
859     }
860
861     cpufreq_get_policy(policy, cpu);
862     table = cpufreq_frequency_get_table(cpu);
863
864     for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
865
866         if (table[i].frequency == CPUFREQ_ENTRY_INVALID) {
867             continue;
868         }
869
870         if (table[i].frequency == policy->cur) {
871             break;
872         }
873
874         count++;
875     }
876
877     palacios_free(policy);
878
879     put_cpu();
880     return count;
881 }
882
883
884 static int linux_get_freq(void)
885 {
886     struct cpufreq_policy * policy = NULL;
887     int cpu = get_cpu();
888
889     policy = palacios_alloc(sizeof(struct cpufreq_policy));
890     if (!policy) {
891         ERROR("Could not allocate policy struct\n");
892         return -1;
893     }
894
895     if (cpufreq_get_policy(policy, cpu)) {
896         ERROR("Could not get current policy\n");
897         return -1;
898     }
899
900     return policy->cur;
901 }
902
903
904 static int linux_set_pstate(uint8_t p)
905 {
906     struct cpufreq_policy * policy = NULL;
907     struct cpufreq_frequency_table *table;
908     int cpu = get_cpu();
909     unsigned int i = 0;
910     unsigned int count = 0;
911     int state_set = 0;
912     int last_valid = 0;
913
914     policy = palacios_alloc(sizeof(struct cpufreq_policy));
915     if (!policy) {
916         ERROR("Could not allocate policy struct\n");
917         return -1;
918     }
919
920     if (cpufreq_get_policy(policy, cpu)) {
921         ERROR("Could not get current policy\n");
922         goto out_err;
923     }
924     table = cpufreq_frequency_get_table(cpu);
925
926     for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
927
928         if (table[i].frequency == CPUFREQ_ENTRY_INVALID) {
929             continue;
930         }
931
932         if (count == p) {
933             cpufreq_driver_target(policy, table[i].frequency, CPUFREQ_RELATION_H);
934             state_set = 1;
935         }
936
937         count++;
938         last_valid = i;
939     }
940
941     /* we need to deal with the case in which we get a number > max pstate */
942     if (!state_set) {
943         cpufreq_driver_target(policy, table[last_valid].frequency, CPUFREQ_RELATION_H);
944     }
945
946     palacios_free(policy);
947     return 0;
948
949 out_err:
950     palacios_free(policy);
951     return -1;
952 }
953
954
955 static int linux_set_freq(uint64_t f)
956 {
957     struct cpufreq_policy * policy = NULL;
958     int cpu = get_cpu();
959     uint64_t freq;
960
961     policy = palacios_alloc(sizeof(struct cpufreq_policy));
962     if (!policy) {
963         ERROR("Could not allocate policy struct\n");
964         return -1;
965     }
966
967     cpufreq_get_policy(policy, cpu);
968
969     if (f < policy->min) {
970         freq = policy->min;
971     } else if (f > policy->max) {
972         freq = policy->max;
973     } else {
974         freq = f;
975     }
976
977     cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_H);
978
979     palacios_free(policy);
980     return 0;
981 }
982
983
984 static int linux_restore_defaults(void)
985 {
986     unsigned int cpu = get_cpu();
987     char * gov = NULL;
988
989     gov = get_cpu_var(core_state).linux_governor;
990     put_cpu_var(core_state);
991
992     DEBUG("restoring previous governor (%s)\n", gov);
993
994     if (governor_switch(gov, cpu) < 0) {
995         ERROR("Could not restore governor to (%s)\n", gov);
996         goto out_err;
997     }
998
999     free_linux_governor();
1000     return 0;
1001
1002 out_err:
1003     free_linux_governor();
1004     return -1;
1005 }
1006
1007
1008
1009 /******************************************************************
1010   Generic Interface as provided to Palacios and to the rest of the
1011   module
1012  ******************************************************************/
1013
1014 static void init_core(void)
1015 {
1016     unsigned cpu;
1017     struct cpufreq_policy *p;
1018     unsigned int i;
1019
1020
1021     DEBUG("P-State Core Init\n");
1022
1023     get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL;
1024     get_cpu_var(core_state).cur_pstate = 0;
1025
1026     if (machine_state.funcs) {
1027         get_cpu_var(core_state).min_pstate = machine_state.funcs->get_min_pstate();
1028         get_cpu_var(core_state).max_pstate = machine_state.funcs->get_max_pstate();
1029     } else {
1030         get_cpu_var(core_state).min_pstate = 0;
1031         get_cpu_var(core_state).max_pstate = 0;
1032     }
1033
1034
1035     cpu = get_cpu(); put_cpu();
1036
1037     p = cpufreq_cpu_get(cpu);
1038
1039     if (!p) { 
1040         get_cpu_var(core_state).have_cpufreq = 0;
1041         get_cpu_var(core_state).min_freq_khz=0;
1042         get_cpu_var(core_state).max_freq_khz=0;
1043         get_cpu_var(core_state).cur_freq_khz=0;
1044     } else {
1045         get_cpu_var(core_state).have_cpufreq = 1;
1046         get_cpu_var(core_state).min_freq_khz=p->min;
1047         get_cpu_var(core_state).max_freq_khz=p->max;
1048         get_cpu_var(core_state).cur_freq_khz=p->cur;
1049     }
1050     
1051     cpufreq_cpu_put(p);
1052
1053     put_cpu_var(core_state);
1054
1055     for (i=0;i<get_cpu_var(processors)->performance->state_count; i++) { 
1056         INFO("P-State: %u: freq=%llu ctrl=%llx",
1057                 i, 
1058                 get_cpu_var(processors)->performance->states[i].core_frequency*1000,
1059                 get_cpu_var(processors)->performance->states[i].control);
1060    }
1061    put_cpu_var(processors);
1062 }
1063
1064
1065 void palacios_pstate_ctrl_release(void);
1066
1067
1068 static void deinit_core(void)
1069 {
1070     DEBUG("P-State Core Deinit\n");
1071     palacios_pstate_ctrl_release();
1072 }
1073
1074
1075
1076 void palacios_pstate_ctrl_get_chars(struct v3_cpu_pstate_chars *c) 
1077 {
1078     memset(c,0,sizeof(struct v3_cpu_pstate_chars));
1079
1080
1081     c->features = V3_PSTATE_INTERNAL_CONTROL;
1082
1083     if (get_cpu_var(core_state).have_cpufreq) {
1084         c->features |= V3_PSTATE_EXTERNAL_CONTROL;
1085     }
1086
1087     if (machine_state.arch==AMD || machine_state.arch==INTEL) { 
1088         c->features |= V3_PSTATE_DIRECT_CONTROL;
1089     }
1090     c->cur_mode = get_cpu_var(core_state).mode;
1091     c->min_pstate = get_cpu_var(core_state).min_pstate;
1092     c->max_pstate = get_cpu_var(core_state).max_pstate;
1093     c->cur_pstate = get_cpu_var(core_state).cur_pstate;
1094     c->min_freq_khz = get_cpu_var(core_state).min_freq_khz;
1095     c->max_freq_khz = get_cpu_var(core_state).max_freq_khz;
1096     c->cur_freq_khz = get_cpu_var(core_state).cur_freq_khz;
1097
1098     put_cpu_var(core_state);
1099
1100
1101
1102 }
1103
1104
1105 uint64_t palacios_pstate_ctrl_get_pstate(void)
1106 {
1107     if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) { 
1108         put_cpu_var(core_state);
1109         return machine_state.funcs->get_pstate();
1110     } else if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1111         put_cpu_var(core_state);
1112         return linux_get_pstate();
1113     } else {
1114         put_cpu_var(core_state);
1115         return 0;
1116     }
1117 }
1118
1119
1120 void palacios_pstate_ctrl_set_pstate(uint64_t p)
1121 {
1122     if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) { 
1123         put_cpu_var(core_state);
1124         machine_state.funcs->set_pstate(p);
1125     } else if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) {
1126         put_cpu_var(core_state);
1127         linux_set_pstate(p);
1128     } 
1129 }
1130
1131
1132 void palacios_pstate_ctrl_set_pstate_wrapper(void *p)
1133 {
1134     palacios_pstate_ctrl_set_pstate((uint8_t)(uint64_t)p);
1135 }
1136
1137
1138 uint64_t palacios_pstate_ctrl_get_freq(void)
1139 {
1140     if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) { 
1141         put_cpu_var(core_state);
1142         return linux_get_freq();
1143     } else {
1144         put_cpu_var(core_state);
1145         return 0;
1146     }
1147 }
1148
1149
1150 void palacios_pstate_ctrl_set_freq(uint64_t p)
1151 {
1152     if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) { 
1153         put_cpu_var(core_state);
1154         linux_set_freq(p);
1155     } 
1156     put_cpu_var(core_state);
1157 }
1158
1159
1160 static int switch_to_external(void)
1161 {
1162     DEBUG("switch from host control to external\n");
1163
1164     if (!(get_cpu_var(core_state).have_cpufreq)) {
1165         put_cpu_var(core_state);
1166         ERROR("No cpufreq  - cannot switch to external...\n");
1167         return -1;
1168     }
1169     put_cpu_var(core_state);
1170
1171     linux_setup_palacios_governor();
1172
1173     get_cpu_var(core_state).mode=V3_PSTATE_EXTERNAL_CONTROL;
1174     put_cpu_var(core_state);
1175
1176     return 0;
1177 }
1178
1179
1180 static int switch_to_direct(void)
1181 {
1182     DEBUG("switch from host control to direct\n");
1183
1184     if (get_cpu_var(core_state).have_cpufreq) { 
1185         put_cpu_var(core_state);
1186         DEBUG("switch to direct from cpufreq\n");
1187
1188         // The implementation would set the policy and governor to peg cpu
1189         // regardless of load
1190         linux_setup_palacios_governor();
1191     }
1192
1193     if (machine_state.funcs && machine_state.funcs->arch_init) {
1194         get_cpu_var(core_state).mode=V3_PSTATE_DIRECT_CONTROL;
1195
1196         machine_state.funcs->arch_init();
1197
1198         put_cpu_var(core_state);
1199     }
1200
1201     return 0;
1202 }
1203
1204
1205 static int switch_to_internal(void)
1206 {
1207     DEBUG("switch from host control to internal\n");
1208
1209     if (get_cpu_var(core_state).have_cpufreq) { 
1210         put_cpu_var(core_state);
1211         DEBUG("switch to internal on machine with cpu freq\n");
1212         linux_setup_palacios_governor();
1213     }
1214
1215     get_cpu_var(core_state).mode=V3_PSTATE_INTERNAL_CONTROL;
1216
1217     put_cpu_var(core_state);
1218
1219     return 0;
1220 }
1221
1222
1223 static int switch_from_external(void)
1224 {
1225     if (!(get_cpu_var(core_state).have_cpufreq)) {
1226         put_cpu_var(core_state);
1227         ERROR("No cpufreq  - how did we get here... external...\n");
1228         return -1;
1229     }
1230
1231     DEBUG("Switching back to host control from external\n");
1232
1233     if (get_cpu_var(core_state).have_cpufreq) { 
1234         linux_restore_defaults();
1235     }
1236
1237     get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL;
1238
1239     put_cpu_var(core_state);
1240
1241     return 0;
1242 }
1243
1244
1245 static int switch_from_direct(void)
1246 {
1247
1248     DEBUG("Switching back to host control from direct\n");
1249
1250     // Set maximum performance, just in case there is no host control
1251     machine_state.funcs->set_pstate(get_cpu_var(core_state).min_pstate);
1252     machine_state.funcs->arch_deinit();
1253
1254     if (get_cpu_var(core_state).have_cpufreq) { 
1255         linux_restore_defaults();
1256     }
1257
1258     get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL;
1259
1260     put_cpu_var(core_state);
1261
1262     return 0;
1263 }
1264
1265
1266 static int switch_from_internal(void)
1267 {
1268     DEBUG("Switching back to host control from internal\n");
1269
1270     if (get_cpu_var(core_state).have_cpufreq) { 
1271         // ERROR("Unimplemented: switch from internal on machine with cpu freq - will just pretend to do so\n");
1272         // The implementation would switch back to default policy and governor
1273         linux_restore_defaults();
1274     }
1275
1276     get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL;
1277
1278     put_cpu_var(core_state);
1279
1280     return 0;
1281 }
1282
1283
1284
1285 void palacios_pstate_ctrl_acquire(uint32_t type)
1286 {
1287     if (get_cpu_var(core_state).mode != V3_PSTATE_HOST_CONTROL) { 
1288         palacios_pstate_ctrl_release();
1289     }
1290
1291     put_cpu_var(core_state);
1292
1293     switch (type) { 
1294         case V3_PSTATE_EXTERNAL_CONTROL:
1295             switch_to_external();
1296             break;
1297         case V3_PSTATE_DIRECT_CONTROL:
1298             switch_to_direct();
1299             break;
1300         case V3_PSTATE_INTERNAL_CONTROL:
1301             switch_to_internal();
1302             break;
1303         default:
1304             ERROR("Unknown pstate control type %u\n",type);
1305             break;
1306     }
1307
1308 }
1309
1310 // Wrappers for xcalls
1311 static void palacios_pstate_ctrl_acquire_external(void)
1312 {
1313     palacios_pstate_ctrl_acquire(V3_PSTATE_EXTERNAL_CONTROL);
1314 }
1315
1316 static void palacios_pstate_ctrl_acquire_direct(void)
1317 {
1318     palacios_pstate_ctrl_acquire(V3_PSTATE_DIRECT_CONTROL);
1319 }
1320
1321
1322 void palacios_pstate_ctrl_release(void)
1323 {
1324     if (get_cpu_var(core_state).mode == V3_PSTATE_HOST_CONTROL) { 
1325         put_cpu_var(core_state);
1326         return;
1327     }
1328
1329     switch (get_cpu_var(core_state).mode) { 
1330         case V3_PSTATE_EXTERNAL_CONTROL:
1331             switch_from_external();
1332             break;
1333         case V3_PSTATE_DIRECT_CONTROL:
1334             switch_from_direct();
1335             break;
1336         case V3_PSTATE_INTERNAL_CONTROL:
1337             switch_from_internal();
1338             break;
1339         default:
1340             ERROR("Unknown pstate control type %u\n",core_state.mode);
1341             break;
1342     }
1343
1344     put_cpu_var(core_state);
1345
1346 }
1347
1348
1349 static void update_hw_pstate(void *arg)
1350 {
1351     if (machine_state.funcs && machine_state.funcs->get_pstate) {
1352         get_cpu_var(core_state).cur_hw_pstate = machine_state.funcs->get_pstate();
1353         put_cpu_var(core_state);
1354     } else {
1355         get_cpu_var(core_state).cur_hw_pstate = 0;
1356         put_cpu_var(core_state);
1357     }
1358 }
1359
1360
1361 /***************************************************************************
1362   PROC Interface to expose state
1363  ***************************************************************************/
1364
1365 static int pstate_show(struct seq_file * file, void * v)
1366 {
1367     unsigned int cpu;
1368     unsigned int numcpus = num_online_cpus();
1369
1370     seq_printf(file, "V3VEE DVFS Status\n\n");
1371
1372     for (cpu=0;cpu<numcpus;cpu++) { 
1373         palacios_xcall(cpu,update_hw_pstate,0);
1374     }
1375
1376     seq_printf(file, "Arch:\t%s\nPStates:\t%s\n\n",
1377             machine_state.arch==INTEL ? "Intel" : 
1378             machine_state.arch==AMD ? "AMD" : "Other",
1379             machine_state.supports_pstates ? "Yes" : "No");
1380
1381     for (cpu=0;cpu<numcpus;cpu++) { 
1382         struct pstate_core_info *s = &per_cpu(core_state,cpu);
1383         seq_printf(file,"pcore %u: hw pstate 0x%x mode %s of [ host ",cpu,
1384                 s->cur_hw_pstate,
1385                 s->mode==V3_PSTATE_HOST_CONTROL ? "host" :
1386                 s->mode==V3_PSTATE_EXTERNAL_CONTROL ? "external" :
1387                 s->mode==V3_PSTATE_DIRECT_CONTROL ? "direct" : 
1388                 s->mode==V3_PSTATE_INTERNAL_CONTROL ? "internal" : "UNKNOWN");
1389         if (s->have_cpufreq) { 
1390             seq_printf(file,"external ");
1391         }
1392         if (machine_state.supports_pstates) {
1393             seq_printf(file,"direct ");
1394         }
1395         seq_printf(file,"internal ] ");
1396         if (s->mode==V3_PSTATE_EXTERNAL_CONTROL) { 
1397             seq_printf(file,"(min=%llu max=%llu cur=%llu) ", s->min_freq_khz, s->max_freq_khz, s->cur_freq_khz);
1398         } 
1399         if (s->mode==V3_PSTATE_DIRECT_CONTROL) { 
1400             seq_printf(file,"(min=%u max=%u cur=%u) ", (uint32_t)s->min_pstate, (uint32_t)s->max_pstate, (uint32_t)s->cur_pstate);
1401         }
1402         seq_printf(file,"\n");
1403     }
1404     return 0;
1405 }
1406
1407 static int pstate_open(struct inode * inode, struct file * file) 
1408 {
1409     return single_open(file, pstate_show, NULL);
1410 }
1411
1412
1413 static struct file_operations pstate_fops = {
1414     .owner = THIS_MODULE,
1415     .open = pstate_open, 
1416     .read = seq_read,
1417     .llseek = seq_lseek,
1418     .release = seq_release
1419 };
1420
1421 int pstate_proc_setup(void)
1422 {
1423     struct proc_dir_entry *proc;
1424
1425     proc = create_proc_entry("v3-dvfs",0444, palacios_get_procdir());
1426
1427     if (!proc) { 
1428         ERROR("Failed to create proc entry for p-state control\n");
1429         return -1;
1430     }
1431
1432     proc->proc_fops = &pstate_fops;
1433
1434     return 0;
1435 }
1436
1437 void pstate_proc_teardown(void)
1438 {
1439     remove_proc_entry("v3-dvfs",palacios_get_procdir());
1440 }
1441
1442 /********************************************************************
1443   User interface (ioctls)
1444  ********************************************************************/
1445
1446 static int dvfs_ctrl(unsigned int cmd, unsigned long arg) 
1447 {
1448     struct v3_dvfs_ctrl_request r;
1449
1450     if (copy_from_user(&r,(void __user*)arg,sizeof(struct v3_dvfs_ctrl_request))) {
1451         ERROR("Failed to copy DVFS request from user\n");
1452         return -EFAULT;
1453     }
1454
1455     if (r.pcore >= num_online_cpus()) {
1456         ERROR("Cannot apply DVFS request to pcore %u\n",r.pcore);
1457         return -EFAULT;
1458     }
1459
1460     switch (r.cmd) {
1461         case V3_DVFS_ACQUIRE: {
1462                                   switch (r.acq_type) { 
1463                                       case V3_DVFS_EXTERNAL:
1464                                           palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_external, NULL);
1465                                           return 0;
1466                                           break;
1467                                       case V3_DVFS_DIRECT:
1468                                           palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_direct, NULL);
1469                                           return 0;
1470                                           break;
1471                                       default:
1472                                           ERROR("Unknown DVFS acquire type %u\n",r.acq_type);
1473                                           return -EFAULT;
1474                                   }
1475                               }
1476                               break;
1477         case V3_DVFS_RELEASE: {
1478                                   palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_release, NULL);
1479                                   return 0;
1480                               }
1481                               break;
1482         case V3_DVFS_SETFREQ: {
1483                                   palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_freq,(void*)r.freq_khz);
1484                                   return 0;
1485                               }
1486                               break;
1487         case V3_DVFS_SETPSTATE: {
1488                                     palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_pstate_wrapper,(void*)(uint64_t)r.pstate);
1489                                     return 0;
1490                                 }
1491         default: {
1492                      ERROR("Unknown DVFS command %u\n",r.cmd);
1493                      return -EFAULT;
1494                  }
1495                  break;
1496     }
1497 }
1498
1499
1500 void pstate_user_setup(void)
1501 {
1502     add_global_ctrl(V3_DVFS_CTRL, dvfs_ctrl);
1503 }
1504
1505
1506 void pstate_user_teardown(void)
1507 {
1508     remove_global_ctrl(V3_DVFS_CTRL);
1509 }
1510
1511 static struct v3_host_pstate_ctrl_iface hooks = {
1512     .get_chars = palacios_pstate_ctrl_get_chars,
1513     .acquire = palacios_pstate_ctrl_acquire,
1514     .release = palacios_pstate_ctrl_release,
1515     .set_pstate = palacios_pstate_ctrl_set_pstate,
1516     .get_pstate = palacios_pstate_ctrl_get_pstate,
1517     .set_freq = palacios_pstate_ctrl_set_freq,
1518     .get_freq = palacios_pstate_ctrl_get_freq,
1519 };
1520
1521
1522
1523 static int pstate_ctrl_init(void) 
1524 {
1525     unsigned int cpu;
1526     unsigned int numcpus = num_online_cpus();
1527
1528     pstate_arch_setup();
1529
1530     for (cpu=0;cpu<numcpus;cpu++) { 
1531         palacios_xcall(cpu,(void ((*)(void*)))init_core,0);
1532     }
1533
1534     V3_Init_Pstate_Ctrl(&hooks);  
1535
1536     if (pstate_proc_setup()) { 
1537         ERROR("Unable to initialize P-State Control\n");
1538         return -1;
1539     }
1540
1541     pstate_user_setup();
1542
1543     pstate_register_linux_governor();
1544
1545     INFO("P-State Control Initialized\n");
1546
1547     return 0;
1548 }
1549
1550 static int pstate_ctrl_deinit(void)
1551 {
1552     unsigned int cpu;
1553     unsigned int numcpus=num_online_cpus();
1554
1555     pstate_unregister_linux_governor();
1556
1557     pstate_user_teardown();
1558
1559     pstate_proc_teardown();
1560
1561     // release pstate control if we have it, and we need to do this on each processor
1562     for (cpu=0;cpu<numcpus;cpu++) { 
1563         palacios_xcall(cpu,(void (*)(void *))deinit_core,0);
1564     }
1565
1566
1567     // Free any mapping table we built for Intel
1568     if (intel_pstate_to_ctrl && intel_pstate_to_ctrl != intel_pstate_to_ctrl_internal) { 
1569         palacios_free(intel_pstate_to_ctrl);
1570     }
1571
1572
1573     return 0;
1574 }
1575
1576
1577 static struct linux_ext pstate_ext = {
1578     .name = "PSTATE_CTRL",
1579     .init = pstate_ctrl_init,
1580     .deinit = pstate_ctrl_deinit,
1581     .guest_init = NULL,
1582     .guest_deinit = NULL,
1583 };
1584
1585
1586 register_extension(&pstate_ext);
1587
1588
1589