Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Additional functionality for pstate-ctrl
[palacios.git] / linux_module / iface-pstate-ctrl.c
1 /*
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2014, the V3VEE Project <http://www.v3vee.org>
11  * all rights reserved.
12  *
13  * Author: Kyle C. Hale <kh@u.northwestern.edu>
14  *         Shiva Rao <shiva.rao.717@gmail.com>
15  *         Peter Dinda <pdinda@northwestern.edu>
16  *
17  * This is free software.  you are permitted to use,
18  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
19  */
20
21 #include <linux/uaccess.h>
22 #include <linux/seq_file.h>
23 #include <linux/proc_fs.h>
24 #include <linux/cpufreq.h>
25 #include <asm/processor.h>
26 #include <asm/msr.h>
27 #include <asm/msr-index.h>
28
29 #include <interfaces/vmm_pstate_ctrl.h>
30
31 #include "palacios.h"
32 #include "iface-pstate-ctrl.h"
33
34 #include "linux-exts.h"
35
36 /*
37   This P-STATE control implementation includes:
38
39   - Direct control of Intel and AMD processor pstates
40   - External control of processor states via Linux (unimplemented)
41   - Internal control of processor states in Palacios (handoff from Linux)
42
43   Additionally, it provides a user-space interface for manipulating
44   p-state regardless of the host's functionality.  This includes
45   an ioctl for commanding the implementation and a /proc file for 
46   showing current status and capabilities.
47
48 */
49
50
51
52
53 struct pstate_core_info {
54     // Here we have the notion of host control
55 #define V3_PSTATE_HOST_CONTROL 0
56     // and all the modes from the Palacios interface:
57     // V3_PSTATE_EXTERNAL_CONTROL
58     // V3_PSTATE_DIRECT_CONTROL
59     // V3_PSTATE_INTERNAL_CONTROL
60     uint32_t mode;
61     
62     // Apply if we are under the DIRECT state
63     uint8_t cur_pstate;
64     uint8_t max_pstate;
65     uint8_t min_pstate;
66
67     uint8_t cur_hw_pstate;
68
69     // Apply if we are under the EXTERNAL state
70     uint64_t cur_freq_khz;
71     uint64_t max_freq_khz;
72     uint64_t min_freq_khz;
73    
74     // Intel-specific
75     uint8_t prior_speedstep;
76     uint8_t turbo_disabled;
77     uint8_t no_turbo;
78     
79     int have_cpufreq;
80     
81 };
82
83
84 static DEFINE_PER_CPU(struct pstate_core_info, core_state);
85
86
87 // These are used to assert DIRECT control over the core pstates
88 struct pstate_core_funcs {
89     void    (*arch_init)(void);
90     void    (*arch_deinit)(void);
91     uint8_t (*get_min_pstate)(void);
92     uint8_t (*get_max_pstate)(void);
93     uint8_t (*get_pstate)(void);
94     void    (*set_pstate)(uint8_t pstate);
95 };
96
97 struct pstate_machine_info {
98     enum {INTEL, AMD, OTHER } arch;
99     int supports_pstates;
100
101
102     // For AMD
103     int have_pstate;
104     int have_coreboost;
105     int have_feedback;  
106
107     // For Intel
108     int have_speedstep;
109     int have_opportunistic; // this means "Turbo Boost" or "IDA"
110     int have_policy_hint;
111     int have_hwp;       // hardware-controlled performance states
112     int have_hdc;       // hardware duty cycling
113     int have_mwait_ext; // mwait power extensions
114     int have_mwait_int; // mwait wakes on interrupt
115
116     // for both
117     int have_pstate_hw_coord;  // mperf/aperf
118
119     // used for DIRECT control
120     struct pstate_core_funcs *funcs;
121
122 };
123
124 static struct pstate_machine_info machine_state;
125
126
127 /****************************************************
128    AMD  DIRECT CONTROL
129 ***************************************************/
130
131 /* AMD Programmer's Manual Vol 2 (Rev 3, 2013), Sec. 17.1, pp.557 */
132 #define MSR_PSTATE_LIMIT_REG_AMD 0xc0010061
133 #define MSR_PSTATE_CTL_REG_AMD   0xc0010062
134 #define MSR_PSTATE_STAT_REG_AMD  0xc0010063
135
136 struct p_state_limit_reg_amd {
137     union {
138         uint64_t val;
139         struct {
140             uint8_t  pstate_limit : 4; /* lowest P-state value (highest perf.) supported currently (this can change at runtime) */
141             uint8_t  pstate_max   : 4; /* highest P-state value supported  (lowest perf) */
142             uint64_t rsvd         : 56;
143         } reg;
144     } __attribute__((packed));
145 } __attribute__((packed));
146
147
148 struct p_state_stat_reg_amd {
149     union {
150         uint64_t val;
151         struct {
152             uint8_t  pstate  : 4;
153             uint64_t rsvd    : 60;
154         } reg;
155     } __attribute__((packed));
156 } __attribute__((packed));
157
158
159 struct p_state_ctl_reg_amd {
160     union {
161         uint64_t val;
162         struct {
163             uint8_t  cmd  : 4;
164             uint64_t rsvd : 60;
165         } reg;
166     } __attribute__((packed));
167 } __attribute__((packed));
168
169
170 /* CPUID Fn8000_0007_EDX[HwPstate(7)] = 1 */
171 static uint8_t supports_pstates_amd (void)
172 {
173     uint32_t eax, ebx, ecx, edx;
174
175     cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
176     machine_state.have_pstate = !!(edx & (1 << 7));
177     machine_state.have_coreboost = !!(edx & (1<<9));
178     machine_state.have_feedback = !!(edx & (1<<11));
179
180     cpuid(0x6, &eax, &ebx, &ecx, &edx);
181     machine_state.have_pstate_hw_coord =  !!(ecx & 1); 
182
183     INFO("P-State: AMD: Pstates=%d Coreboost=%d Feedback=%d PstateHWCoord=%d\n",
184          machine_state.have_pstate, 
185          machine_state.have_coreboost, 
186          machine_state.have_feedback,
187          machine_state.have_pstate_hw_coord);
188     
189     return machine_state.have_pstate;
190     
191     
192 }
193
194 static void init_arch_amd(void)
195 {
196     /* KCH: nothing to do here */
197 }
198
199 static void deinit_arch_amd(void)
200 {
201     /* KCH: nothing to do here */
202 }
203
204 static uint8_t get_pstate_amd(void) 
205 {
206     struct p_state_stat_reg_amd pstat;
207
208     rdmsrl(MSR_PSTATE_STAT_REG_AMD, pstat.val);
209
210     get_cpu_var(core_state).cur_pstate=pstat.reg.pstate;
211     put_cpu_var(core_state);
212
213     return pstat.reg.pstate;
214 }
215
216 static void set_pstate_amd(uint8_t p)
217 {
218     struct p_state_ctl_reg_amd pctl;
219     pctl.val = 0;
220     pctl.reg.cmd = p;
221
222     wrmsrl(MSR_PSTATE_CTL_REG_AMD, pctl.val);
223
224     get_cpu_var(core_state).cur_pstate=p;
225     put_cpu_var(core_state);
226 }
227
228 /*
229  * NOTE: HW may change this value at runtime
230  */
231 static uint8_t get_max_pstate_amd(void)
232 {
233     struct p_state_limit_reg_amd plimits;
234
235     rdmsrl(MSR_PSTATE_LIMIT_REG_AMD, plimits.val);
236
237     return plimits.reg.pstate_max;
238 }
239
240
241 static uint8_t get_min_pstate_amd(void)
242 {
243     struct p_state_limit_reg_amd plimits;
244
245     rdmsrl(MSR_PSTATE_LIMIT_REG_AMD, plimits.val);
246
247     return plimits.reg.pstate_limit;
248 }
249
250
251 static struct pstate_core_funcs amd_funcs =
252 {
253     .arch_init        = init_arch_amd,
254     .arch_deinit      = deinit_arch_amd,
255     .get_pstate       = get_pstate_amd,
256     .set_pstate       = set_pstate_amd,
257     .get_max_pstate   = get_max_pstate_amd,
258     .get_min_pstate   = get_min_pstate_amd,
259 };
260
261
262
263 /***********************************************************
264   INTEL DIRECT CONTROL
265 **********************************************************/
266
267
268 /*
269   This implementation uses SpeedStep, but does check
270   to see if the other features (MPERF/APERF, Turbo/IDA, HWP)
271   are available.
272 */
273
274 /* Intel System Programmer's Manual Vol. 3B, 14-2 */
275 #define MSR_MPERF_IA32         0x000000e7
276 #define MSR_APERF_IA32         0x000000e8
277 #define MSR_MISC_ENABLE_IA32   0x000001a0
278 #define MSR_NHM_TURBO_RATIO_LIMIT   0x000001ad
279 #define MSR_PLATFORM_INFO_IA32 0x000000ce
280 #define MSR_PERF_CTL_IA32      0x00000199
281 #define MSR_PERF_STAT_IA32     0x00000198
282 #define MSR_ENERY_PERF_BIAS_IA32 0x000001b0
283
284
285 /* Note that the actual  meaning of the pstate
286    in the control and status registers is actually
287    implementation dependent, unlike AMD.   The "official"
288    way to figure it out the mapping from pstate to 
289    these values is via ACPI.  What is written in the register
290    is an "id" of an operation point
291
292    "Often", the 16 bit field consists of a high order byte
293    which is the frequency (the multiplier) and the low order
294    byte is the voltage. 
295 */
296 // MSR_PERF_CTL_IA32  r/w
297 struct perf_ctl_reg_intel {
298     union {
299         uint64_t val;
300         struct {
301             // This is the target
302             // Note, not the ACPI pstate, but
303             // Intel's notion of pstate is that it's opaque
304             // for lots of implementations it seems to be
305             // frequency_id : voltage_id
306             // where frequency_id is typically the multiplier
307             uint16_t pstate                 : 16;
308             uint16_t reserved               : 16;
309             // set to 1 to *disengage* dynamic acceleration
310             // Note that "IDA" and "Turbo" use the same interface
311             uint16_t dynamic_accel_disable  : 1;
312             uint32_t reserved2              : 31;
313         } reg;
314     } __attribute__((packed));
315 } __attribute__((packed));
316
317 // MSR_PERF_STAT_IA32 r
318 struct perf_stat_reg_intel {
319     union {
320         uint64_t val;
321         struct {
322             // this is the current
323             uint16_t pstate                 : 16;
324             uint64_t reserved               : 48;
325         } reg;
326     } __attribute__((packed));
327 } __attribute__((packed));
328
329 // MSR_ENERGY_PERF_BIAS_IA32 r/w
330 struct enery_perf_bias_reg_intel {
331     union {
332         uint64_t val;
333         struct {
334             // this is the current
335             uint8_t  policy_hint            : 4;
336             uint64_t reserved               : 60;
337         } reg;
338     } __attribute__((packed));
339 } __attribute__((packed));
340
341 // MSR_PLATFORM_INFO
342 struct turbo_mode_info_reg_intel {
343     union {
344         uint64_t val;
345         struct {
346             uint8_t  rsvd0                  : 8;
347             uint8_t  max_noturbo_ratio      : 8;
348             uint8_t  rsvd1                  : 7;
349             uint8_t  ppin_cap               : 1;
350             uint8_t  rsvd2                  : 4;
351             uint8_t  ratio_limit            : 1; 
352             uint8_t  tdc_tdp_limit          : 1;
353             uint16_t rsvd3                  : 10;
354             uint8_t  min_ratio              : 8;
355             uint16_t rsvd4                  : 16;
356         } reg;
357     } __attribute__((packed));
358 } __attribute__((packed));
359             
360
361 /* CPUID.01:ECX.AES(7) */
362 static uint8_t supports_pstates_intel(void)
363 {
364     /* NOTE: CPUID.06H:ECX.SETBH[bit 3] is set and it also implies the presence of a new architectural MSR called IA32_ENERGY_PERF_BIAS (1B0H).
365      */
366     uint32_t eax, ebx, ecx, edx;
367
368     cpuid(0x1, &eax, &ebx, &ecx, &edx);
369     machine_state.have_speedstep =  !!(ecx & (1 << 7));
370
371     cpuid(0x6, &eax, &ebx, &ecx, &edx);
372     machine_state.have_pstate_hw_coord =  !!(ecx & 1); // ?
373     machine_state.have_opportunistic =  !!(eax & 1<<1);
374     machine_state.have_policy_hint = !!(ecx & 1<<3);
375     machine_state.have_hwp = !!(eax & 1<<7);
376     machine_state.have_hdc = !!(eax & 1<<13);
377
378     cpuid(0x5, &eax, &ebx, &ecx, &edx);
379     machine_state.have_mwait_ext =  !!(ecx & 1);
380     machine_state.have_mwait_int =  !!(ecx & 1<<1);
381
382
383     INFO("P-State: Intel: Speedstep=%d, PstateHWCoord=%d, Opportunistic=%d PolicyHint=%d HWP=%d HDC=%d, MwaitExt=%d MwaitInt=%d \n",
384          machine_state.have_speedstep, 
385          machine_state.have_pstate_hw_coord, 
386          machine_state.have_opportunistic,
387          machine_state.have_policy_hint,
388          machine_state.have_hwp,
389          machine_state.have_hdc,
390          machine_state.have_mwait_ext,
391          machine_state.have_mwait_int );
392
393     return machine_state.have_speedstep;
394 }
395
396
397 static void init_arch_intel(void)
398 {
399     uint64_t val;
400
401     rdmsrl(MSR_MISC_ENABLE_IA32, val);
402
403     // store prior speedstep setting
404     get_cpu_var(core_state).prior_speedstep=(val >> 16) & 0x1;
405     put_cpu_var(core_state);
406
407     // enable speedstep (probably already on)
408     val |= 1 << 16;
409     wrmsrl(MSR_MISC_ENABLE_IA32, val);
410
411 }
412
413 static void deinit_arch_intel(void)
414 {
415     uint64_t val;
416
417     rdmsrl(MSR_MISC_ENABLE_IA32, val);
418
419     val &= ~(1ULL << 16);
420     val |= get_cpu_var(core_state).prior_speedstep << 16;
421     put_cpu_var(core_state);
422
423     wrmsrl(MSR_MISC_ENABLE_IA32, val);
424     
425 }
426
427 /* TODO: Intel P-states require sampling at intervals... */
428 static uint8_t get_pstate_intel(void)
429 {
430     uint64_t val;
431     uint16_t pstate;
432
433     rdmsrl(MSR_PERF_STAT_IA32,val);
434
435     pstate = val & 0xffff;
436
437     INFO("P-State: Get: 0x%llx\n", val);
438
439     // Assume top byte is the FID
440     //if (pstate & 0xff ) { 
441     //  ERROR("P-State: Intel returns confusing pstate %u\n",pstate);
442     //}
443
444     // should check if turbo is active, in which case 
445     // this value is not the whole story
446
447     return (uint8_t) (pstate>>8);
448 }
449     
450 static void set_pstate_intel(uint8_t p)
451 {
452     uint64_t val;
453
454     /* ...Intel IDA (dynamic acceleration)
455     if (c->no_turbo && !c->turbo_disabled) {
456         val |= 1 << 32;
457     }
458     */
459     // leave all bits along expect for the likely
460     // fid bits
461
462     rdmsrl(MSR_PERF_CTL_IA32, val);
463     val &= ~0xff00ULL;
464     val |= ((uint64_t)p)<<8;
465
466     INFO("P-State: Set: 0x%llx\n", val);
467
468     wrmsrl(MSR_PERF_CTL_IA32, val);
469
470     get_cpu_var(core_state).cur_pstate = p;
471     put_cpu_var(core_state);
472 }
473
474
475 static uint8_t get_min_pstate_intel(void)
476 {
477     struct turbo_mode_info_reg_intel t;
478
479     rdmsrl(MSR_PLATFORM_INFO_IA32, t.val);
480
481     return t.reg.min_ratio;
482 }
483
484
485
486 static uint8_t get_max_pstate_intel (void)
487 {
488     struct turbo_mode_info_reg_intel t;
489     
490     rdmsrl(MSR_PLATFORM_INFO_IA32, t.val);
491
492     return t.reg.max_noturbo_ratio;
493 }
494
495 static struct pstate_core_funcs intel_funcs =
496 {
497     .arch_init        = init_arch_intel,
498     .arch_deinit      = deinit_arch_intel,
499     .get_pstate       = get_pstate_intel,
500     .set_pstate       = set_pstate_intel,
501     .get_max_pstate   = get_max_pstate_intel,
502     .get_min_pstate   = get_min_pstate_intel,
503 };
504
505
506
507 /***********************************************
508   Arch determination and setup
509 ***********************************************/
510  
511 static inline void cpuid_string (uint32_t id, uint32_t dest[4]) 
512 {
513     asm volatile("cpuid"
514                  :"=a"(*dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3))
515                  :"a"(id));
516 }
517     
518
519 static int get_cpu_vendor (char name[13])
520 {
521     uint32_t dest[4];
522     uint32_t maxid;
523     
524     cpuid_string(0,dest);
525     maxid=dest[0];
526     ((uint32_t*)name)[0]=dest[1];
527     ((uint32_t*)name)[1]=dest[3];
528     ((uint32_t*)name)[2]=dest[2];
529     name[12]=0;
530     
531     return maxid;
532 }
533
534
535 static int is_intel (void)
536 {
537   char name[13];
538   get_cpu_vendor(name);
539   return !strcmp(name,"GenuineIntel");
540 }
541
542
543 static int is_amd (void)
544 {
545   char name[13];
546   get_cpu_vendor(name);
547   return !strcmp(name,"AuthenticAMD");
548 }
549
550 static int pstate_arch_setup(void)
551 {
552     
553     if (is_amd()) {
554         machine_state.arch = AMD;
555         machine_state.funcs = &amd_funcs;
556         machine_state.supports_pstates = supports_pstates_amd();
557         INFO("PSTATE: P-State initialized for AMD\n");
558     } else if (is_intel()) {
559         machine_state.arch  = INTEL;
560         machine_state.funcs = &intel_funcs;
561         machine_state.supports_pstates = supports_pstates_intel();
562         INFO("PSTATE: P-State initialized for INTEL (Work in progress...)\n");
563         return 0;
564         
565     } else {
566         machine_state.arch = OTHER;
567         machine_state.funcs = NULL;
568         machine_state.supports_pstates = 0;
569         INFO("PSTATE: P-state control: No support for direct control on this architecture\n");
570         return 0;
571     }
572     
573     return 0;
574 }
575
576
577
578 /******************************************************************
579   Linux Interface
580 *****************************************************************/
581
582 #if 0
583 // The purpose of the stub governor is the pretend to keep
584 // the processor at the maximum frequency, while we manipulate he
585 // processor ccre directly
586 static int governor_run(struct cpufreq_policy *policy, unsigned int event)
587 {
588     switch (event) {
589         case CPUFREQ_GOV_START:
590         case CPUFREQ_GOV_STOP:
591             cpu_freq_driver_target(policy, policy->max_freq);
592
593         case CPUFREQ_GOV_LIMITS:
594     }                           
595 }
596
597 static struct cpufreq_governor stub_governor = 
598 {
599     .name="PALACIOS_STUB",
600     .governor=governor_run,
601     .owner=.THIS_MODULE,
602 }
603
604 static void linux_init(void)
605 {
606     // get_policy
607     //
608     // change to userspace governor - or change to our do nothing governor? (call set_speed)
609     // stash the old governor
610     // tell governor to do max freq
611
612 }
613
614 static void linux_deinit(void)
615 {
616 }
617
618 static uint8_t linux_get_pstate(void)
619 {
620     return 0;
621 }
622
623 static void linux_set_pstate(uint8_t p)
624 {
625 }
626
627 static void linux_restore_defaults(void)
628 {
629 }
630
631 #endif
632
633
634 /******************************************************************
635   Generic Interface as provided to Palacios and to the rest of the
636   module
637 ******************************************************************/
638
639 static void init_core(void)
640 {
641     unsigned cpu;
642     struct cpufreq_policy *p;
643
644
645     DEBUG("P-State Core Init\n");
646
647     get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL;
648     get_cpu_var(core_state).cur_pstate = 0;
649     
650     if (machine_state.funcs) {
651         get_cpu_var(core_state).min_pstate = machine_state.funcs->get_min_pstate();
652         get_cpu_var(core_state).max_pstate = machine_state.funcs->get_max_pstate();
653     } else {
654         get_cpu_var(core_state).min_pstate = 0;
655         get_cpu_var(core_state).max_pstate = 0;
656     }
657
658
659     cpu = get_cpu(); put_cpu();
660
661     p = cpufreq_cpu_get(cpu);
662
663     if (!p) { 
664         get_cpu_var(core_state).have_cpufreq = 0;
665         get_cpu_var(core_state).min_freq_khz=0;
666         get_cpu_var(core_state).max_freq_khz=0;
667         get_cpu_var(core_state).cur_freq_khz=0;
668     } else {
669         get_cpu_var(core_state).have_cpufreq = 1;
670         get_cpu_var(core_state).min_freq_khz=p->min;
671         get_cpu_var(core_state).max_freq_khz=p->max;
672         get_cpu_var(core_state).cur_freq_khz=p->cur;
673         cpufreq_cpu_put(p);
674     }
675
676     put_cpu_var(core_state);
677         
678 }
679
680
681 void palacios_pstate_ctrl_release(void);
682
683
684 static void deinit_core(void)
685 {
686     DEBUG("P-State Core Deinit\n");
687     palacios_pstate_ctrl_release();
688 }
689
690
691
692 void palacios_pstate_ctrl_get_chars(struct v3_cpu_pstate_chars *c) 
693 {
694     memset(c,0,sizeof(struct v3_cpu_pstate_chars));
695    
696
697     c->features = V3_PSTATE_INTERNAL_CONTROL;
698
699     if (get_cpu_var(core_state).have_cpufreq) {
700         c->features |= V3_PSTATE_EXTERNAL_CONTROL;
701     }
702
703     if (machine_state.arch==AMD || machine_state.arch==INTEL) { 
704         c->features |= V3_PSTATE_DIRECT_CONTROL;
705     }
706     c->cur_mode = get_cpu_var(core_state).mode;
707     c->min_pstate = get_cpu_var(core_state).min_pstate;
708     c->max_pstate = get_cpu_var(core_state).max_pstate;
709     c->cur_pstate = get_cpu_var(core_state).cur_pstate;
710     c->min_freq_khz = get_cpu_var(core_state).min_freq_khz;
711     c->max_freq_khz = get_cpu_var(core_state).max_freq_khz;
712     c->cur_freq_khz = get_cpu_var(core_state).cur_freq_khz;
713
714     put_cpu_var(core_state);
715
716     
717     
718 }
719
720
721 uint8_t palacios_pstate_ctrl_get_pstate(void)
722 {
723     if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) { 
724         put_cpu_var(core_state);
725         return machine_state.funcs->get_pstate();
726     } else {
727         put_cpu_var(core_state);
728         return 0;
729     }
730 }
731
732 void palacios_pstate_ctrl_set_pstate(uint8_t p)
733 {
734     if (get_cpu_var(core_state).mode==V3_PSTATE_DIRECT_CONTROL) { 
735         put_cpu_var(core_state);
736         machine_state.funcs->set_pstate(p);
737     } 
738 }
739
740
741 void palacios_pstate_ctrl_set_pstate_wrapper(void *p)
742 {
743     palacios_pstate_ctrl_set_pstate((uint8_t)(uint64_t)p);
744 }
745
746 uint64_t palacios_pstate_ctrl_get_freq(void)
747 {
748     if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) { 
749         put_cpu_var(core_state);
750         ERROR("Unimplemented get freq\n");
751         return 0;
752     } else {
753         put_cpu_var(core_state);
754         return 0;
755     }
756 }
757
758 void palacios_pstate_ctrl_set_freq(uint64_t p)
759 {
760     if (get_cpu_var(core_state).mode==V3_PSTATE_EXTERNAL_CONTROL) { 
761         put_cpu_var(core_state);
762         ERROR("Unimplemented set freq\n");
763     } 
764     put_cpu_var(core_state);
765
766 }
767
768
769 static void switch_to_external(void)
770 {
771     if (!(get_cpu_var(core_state).have_cpufreq)) {
772         put_cpu_var(core_state);
773         ERROR("No cpufreq  - cannot switch to external...\n");
774         return;
775     }
776     put_cpu_var(core_state);
777
778     ERROR("Unimplemented switch to external...\n");
779 }
780  
781 static void switch_to_direct(void)
782 {
783     if (get_cpu_var(core_state).have_cpufreq) { 
784         put_cpu_var(core_state);
785         ERROR("Unimplemented: switch to direct on machine with cpu freq\n");
786         // The implementation would set the policy and governor to peg cpu
787         // regardless of load
788     }
789
790     if (machine_state.funcs && machine_state.funcs->arch_init) {
791        get_cpu_var(core_state).mode=V3_PSTATE_DIRECT_CONTROL;
792     
793        machine_state.funcs->arch_init();
794
795        put_cpu_var(core_state);
796     }
797
798 }
799     
800
801 static void switch_to_internal(void)
802 {
803     if (get_cpu_var(core_state).have_cpufreq) { 
804         put_cpu_var(core_state);
805         ERROR("Unimplemented: switch to internal on machine with cpu freq\n");
806         return;
807         // The implementation would set the policy and governor to peg cpu
808         // regardless of load - exactly like direct
809     }
810
811     get_cpu_var(core_state).mode=V3_PSTATE_INTERNAL_CONTROL;
812     
813     put_cpu_var(core_state);
814
815     return;
816 }
817
818
819 static void switch_from_external(void)
820 {
821     if (!(get_cpu_var(core_state).have_cpufreq)) {
822         put_cpu_var(core_state);
823         ERROR("No cpufreq  - how did we get here... external...\n");
824         return;
825     }
826
827     ERROR("Unimplemented switch from external...\n");
828     
829     get_cpu_var(core_state).mode = V3_PSTATE_HOST_CONTROL;
830
831     put_cpu_var(core_state);
832
833 }
834  
835 static void switch_from_direct(void)
836 {
837      
838     if (get_cpu_var(core_state).have_cpufreq) { 
839         put_cpu_var(core_state);
840         ERROR("Unimplemented: switch from direct on machine with cpu freq - will just pretend to do so\n");
841         // The implementation would switch back to default policy and governor
842     }
843
844     get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL;
845
846
847     machine_state.funcs->set_pstate(get_cpu_var(core_state).min_pstate);
848
849     machine_state.funcs->arch_deinit();
850
851     put_cpu_var(core_state);
852 }
853     
854
855 static void switch_from_internal(void)
856 {
857     if (get_cpu_var(core_state).have_cpufreq) { 
858         put_cpu_var(core_state);
859         ERROR("Unimplemented: switch from internal on machine with cpu freq - will just pretend to do so\n");
860         // The implementation would switch back to default policy and governor
861     }
862
863     get_cpu_var(core_state).mode=V3_PSTATE_HOST_CONTROL;
864
865     put_cpu_var(core_state);
866     
867     return;
868 }
869
870
871
872 void palacios_pstate_ctrl_acquire(uint32_t type)
873 {
874     if (get_cpu_var(core_state).mode != V3_PSTATE_HOST_CONTROL) { 
875         palacios_pstate_ctrl_release();
876     }
877
878     put_cpu_var(core_state);
879
880     switch (type) { 
881         case V3_PSTATE_EXTERNAL_CONTROL:
882             switch_to_external();
883             break;
884         case V3_PSTATE_DIRECT_CONTROL:
885             switch_to_direct();
886             break;
887         case V3_PSTATE_INTERNAL_CONTROL:
888             switch_to_internal();
889             break;
890         default:
891             ERROR("Unknown pstate control type %u\n",type);
892             break;
893     }
894
895 }
896
897 // Wrappers for xcalls
898 static void palacios_pstate_ctrl_acquire_external(void)
899 {
900     palacios_pstate_ctrl_acquire(V3_PSTATE_EXTERNAL_CONTROL);
901 }
902
903 static void palacios_pstate_ctrl_acquire_direct(void)
904 {
905     palacios_pstate_ctrl_acquire(V3_PSTATE_DIRECT_CONTROL);
906 }
907
908
909 void palacios_pstate_ctrl_release(void)
910 {
911
912     if (get_cpu_var(core_state).mode == V3_PSTATE_HOST_CONTROL) { 
913         put_cpu_var(core_state);
914         return;
915     }
916
917     switch (get_cpu_var(core_state).mode) { 
918         case V3_PSTATE_EXTERNAL_CONTROL:
919             switch_from_external();
920             break;
921         case V3_PSTATE_DIRECT_CONTROL:
922             switch_from_direct();
923             break;
924         case V3_PSTATE_INTERNAL_CONTROL:
925             switch_from_internal();
926             break;
927         default:
928             ERROR("Unknown pstate control type %u\n",core_state.mode);
929             break;
930     }
931
932     put_cpu_var(core_state);
933     
934 }
935
936
937 static void update_hw_pstate(void *arg)
938 {
939     if (machine_state.funcs && machine_state.funcs->get_pstate) {
940         get_cpu_var(core_state).cur_hw_pstate = machine_state.funcs->get_pstate();
941         put_cpu_var(core_state);
942     } else {
943         get_cpu_var(core_state).cur_hw_pstate = 0;
944         put_cpu_var(core_state);
945     }
946 }
947
948
949 /***************************************************************************
950   PROC Interface to expose state
951 ***************************************************************************/
952
953 static int pstate_show(struct seq_file * file, void * v)
954 {
955     unsigned int cpu;
956     unsigned int numcpus = num_online_cpus();
957
958     seq_printf(file, "V3VEE DVFS Status\n\n");
959
960     for (cpu=0;cpu<numcpus;cpu++) { 
961         palacios_xcall(cpu,update_hw_pstate,0);
962     }
963     
964     seq_printf(file, "Arch:\t%s\nPStates:\t%s\n\n",
965                machine_state.arch==INTEL ? "Intel" : 
966                machine_state.arch==AMD ? "AMD" : "Other",
967                machine_state.supports_pstates ? "Yes" : "No");
968                
969     for (cpu=0;cpu<numcpus;cpu++) { 
970         struct pstate_core_info *s = &per_cpu(core_state,cpu);
971         seq_printf(file,"pcore %u: hw pstate %u mode %s of [ host ",cpu,
972                    s->cur_hw_pstate,
973                    s->mode==V3_PSTATE_HOST_CONTROL ? "host" :
974                    s->mode==V3_PSTATE_EXTERNAL_CONTROL ? "external" :
975                    s->mode==V3_PSTATE_DIRECT_CONTROL ? "direct" : 
976                    s->mode==V3_PSTATE_INTERNAL_CONTROL ? "internal" : "UNKNOWN");
977         if (s->have_cpufreq) { 
978             seq_printf(file,"external ");
979         }
980         if (machine_state.supports_pstates) {
981             seq_printf(file,"direct ");
982         }
983         seq_printf(file,"internal ] ");
984         if (s->mode==V3_PSTATE_EXTERNAL_CONTROL) { 
985             seq_printf(file,"(min=%llu max=%llu cur=%llu) ", s->min_freq_khz, s->max_freq_khz, s->cur_freq_khz);
986         } 
987         if (s->mode==V3_PSTATE_DIRECT_CONTROL) { 
988             seq_printf(file,"(min=%u max=%u cur=%u) ", (uint32_t)s->min_pstate, (uint32_t)s->max_pstate, (uint32_t)s->cur_pstate);
989         }
990         seq_printf(file,"\n");
991     }
992     return 0;
993 }
994
995 static int pstate_open(struct inode * inode, struct file * file) 
996 {
997     return single_open(file, pstate_show, NULL);
998 }
999
1000
1001 static struct file_operations pstate_fops = {
1002     .owner = THIS_MODULE,
1003     .open = pstate_open, 
1004     .read = seq_read,
1005     .llseek = seq_lseek,
1006     .release = seq_release
1007 };
1008
1009 int pstate_proc_setup(void)
1010 {
1011     struct proc_dir_entry *proc;
1012     
1013     proc = create_proc_entry("v3-dvfs",0444, palacios_get_procdir());
1014
1015     if (!proc) { 
1016         ERROR("Failed to create proc entry for p-state control\n");
1017         return -1;
1018     }
1019     
1020     proc->proc_fops = &pstate_fops;
1021     
1022     return 0;
1023 }
1024   
1025 void pstate_proc_teardown(void)
1026 {
1027     remove_proc_entry("v3-dvfs",palacios_get_procdir());
1028 }
1029
1030 /********************************************************************
1031   User interface (ioctls)
1032 ********************************************************************/
1033
1034 static int dvfs_ctrl(unsigned int cmd, unsigned long arg) 
1035 {
1036     struct v3_dvfs_ctrl_request r;
1037
1038     if (copy_from_user(&r,(void __user*)arg,sizeof(struct v3_dvfs_ctrl_request))) {
1039         ERROR("Failed to copy DVFS request from user\n");
1040         return -EFAULT;
1041     }
1042
1043     if (r.pcore >= num_online_cpus()) {
1044         ERROR("Cannot apply DVFS request to pcore %u\n",r.pcore);
1045         return -EFAULT;
1046     }
1047
1048     switch (r.cmd) {
1049         case V3_DVFS_ACQUIRE: {
1050             switch (r.acq_type) { 
1051                 case V3_DVFS_EXTERNAL:
1052                     palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_external,0);
1053                     return 0;
1054                     break;
1055                 case V3_DVFS_DIRECT:
1056                     palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_acquire_direct,0);
1057                     return 0;
1058                     break;
1059                 default:
1060                     ERROR("Unknown DVFS acquire type %u\n",r.acq_type);
1061                     return -EFAULT;
1062             }
1063         }
1064             break;
1065         case V3_DVFS_RELEASE: {
1066             palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_release,0);
1067             return 0;
1068         }
1069             break;
1070         case V3_DVFS_SETFREQ: {
1071             palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_freq,(void*)r.freq_khz);
1072             return 0;
1073         }
1074             break;
1075         case V3_DVFS_SETPSTATE: {
1076             palacios_xcall(r.pcore,(void (*)(void*))palacios_pstate_ctrl_set_pstate_wrapper,(void*)(uint64_t)r.pstate);
1077             return 0;
1078         }
1079         default: {
1080             ERROR("Unknown DVFS command %u\n",r.cmd);
1081             return -EFAULT;
1082         }
1083             break;
1084     }
1085 }
1086
1087
1088 void pstate_user_setup(void)
1089 {
1090     add_global_ctrl(V3_DVFS_CTRL, dvfs_ctrl);
1091 }
1092
1093
1094 void pstate_user_teardown(void)
1095 {
1096     remove_global_ctrl(V3_DVFS_CTRL);
1097 }
1098
1099 static struct v3_host_pstate_ctrl_iface hooks = {
1100     .get_chars = palacios_pstate_ctrl_get_chars,
1101     .acquire = palacios_pstate_ctrl_acquire,
1102     .release = palacios_pstate_ctrl_release,
1103     .set_pstate = palacios_pstate_ctrl_set_pstate,
1104     .get_pstate = palacios_pstate_ctrl_get_pstate,
1105     .set_freq = palacios_pstate_ctrl_set_freq,
1106     .get_freq = palacios_pstate_ctrl_get_freq,
1107 };
1108
1109
1110     
1111 static int pstate_ctrl_init(void) 
1112 {
1113     unsigned int cpu;
1114     unsigned int numcpus = num_online_cpus();
1115
1116     pstate_arch_setup();
1117
1118     for (cpu=0;cpu<numcpus;cpu++) { 
1119         palacios_xcall(cpu,(void ((*)(void*)))init_core,0);
1120     }
1121
1122     V3_Init_Pstate_Ctrl(&hooks);  
1123
1124     if (pstate_proc_setup()) { 
1125         ERROR("Unable to initialize P-State Control\n");
1126         return -1;
1127     }
1128
1129     pstate_user_setup();
1130
1131     INFO("P-State Control Initialized\n");
1132
1133     return 0;
1134 }
1135
1136 static int pstate_ctrl_deinit(void)
1137 {
1138     unsigned int cpu;
1139     unsigned int numcpus=num_online_cpus();
1140
1141
1142     pstate_user_teardown();
1143
1144     pstate_proc_teardown();
1145
1146     // release pstate control if we have it, and we need to do this on each processor
1147     for (cpu=0;cpu<numcpus;cpu++) { 
1148         palacios_xcall(cpu,(void (*)(void *))deinit_core,0);
1149     }
1150
1151     return 0;
1152 }
1153         
1154
1155 static struct linux_ext pstate_ext = {
1156     .name = "PSTATE_CTRL",
1157     .init = pstate_ctrl_init,
1158     .deinit = pstate_ctrl_deinit,
1159     .guest_init = NULL,
1160     .guest_deinit = NULL,
1161 };
1162
1163
1164 register_extension(&pstate_ext);
1165
1166
1167