2 * Palacios cache information interface
5 * (c) Peter Dinda, 2015
9 #include <linux/file.h>
10 #include <linux/uaccess.h>
11 #include <linux/namei.h>
12 #include <linux/poll.h>
13 #include <linux/anon_inodes.h>
16 #include "util-hashtable.h"
17 #include "linux-exts.h"
20 #define sint64_t int64_t
22 #include <linux/spinlock.h>
23 #include <asm/uaccess.h>
24 #include <linux/inet.h>
25 #include <linux/kthread.h>
26 #include <linux/netdevice.h>
29 #include <linux/string.h>
30 #include <linux/preempt.h>
31 #include <linux/sched.h>
32 #include <linux/list.h>
33 #include <linux/syscalls.h>
35 #include <linux/module.h>
36 #include <linux/kernel.h>
37 #include <linux/socket.h>
38 #include <linux/net.h>
39 #include <linux/slab.h>
41 #include <palacios/vmm.h>
42 #include <interfaces/vmm_cache_info.h>
46 This is a simple implementation of the Palacios cache info
51 static inline void cpuid_string(u32 id, u32 dest[4]) {
53 :"=a"(*dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3))
58 static int get_cpu_vendor(char name[13])
65 ((u32*)name)[0]=dest[1];
66 ((u32*)name)[1]=dest[3];
67 ((u32*)name)[2]=dest[2];
73 static int is_intel(void)
77 return !strcmp(name,"GenuineIntel");
80 static int is_amd(void)
84 return !strcmp(name,"AuthenticAMD");
87 static uint32_t decode_amd_l2l3_assoc(uint32_t val)
112 ERROR("Unknown associativity encoding %x\n",val);
117 static int get_cache_level_amd_legacy(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c)
119 uint32_t eax, ebx, ecx, edx;
120 uint32_t l1_dtlb_24_assoc;
121 uint32_t l1_dtlb_24_entries;
122 uint32_t l1_itlb_24_assoc;
123 uint32_t l1_itlb_24_entries;
124 uint32_t l1_dtlb_4k_assoc;
125 uint32_t l1_dtlb_4k_entries;
126 uint32_t l1_itlb_4k_assoc;
127 uint32_t l1_itlb_4k_entries;
128 uint32_t l1_dcache_size;
129 uint32_t l1_dcache_assoc;
130 uint32_t l1_dcache_linespertag;
131 uint32_t l1_dcache_linesize;
132 uint32_t l1_icache_size;
133 uint32_t l1_icache_assoc;
134 uint32_t l1_icache_linespertag;
135 uint32_t l1_icache_linesize;
136 uint32_t l2_dtlb_24_assoc;
137 uint32_t l2_dtlb_24_entries;
138 uint32_t l2_itlb_24_assoc;
139 uint32_t l2_itlb_24_entries;
140 uint32_t l2_dtlb_4k_assoc;
141 uint32_t l2_dtlb_4k_entries;
142 uint32_t l2_itlb_4k_assoc;
143 uint32_t l2_itlb_4k_entries;
144 uint32_t l2_cache_size;
145 uint32_t l2_cache_assoc;
146 uint32_t l2_cache_linespertag;
147 uint32_t l2_cache_linesize;
148 uint32_t l3_cache_size;
149 uint32_t l3_cache_assoc;
150 uint32_t l3_cache_linespertag;
151 uint32_t l3_cache_linesize;
153 // L1 caches and tlbs
154 cpuid(0x80000005,&eax,&ebx,&ecx,&edx);
156 l1_dtlb_24_assoc = (eax >> 24) & 0xff;
157 l1_dtlb_24_entries = (eax >> 16) & 0xff;
158 l1_itlb_24_assoc = (eax >> 8) & 0xff;
159 l1_itlb_24_entries = (eax) & 0xff;
160 l1_dtlb_4k_assoc = (ebx >> 24) & 0xff;
161 l1_dtlb_4k_entries = (ebx >> 16) & 0xff;
162 l1_itlb_4k_assoc = (ebx >> 8) & 0xff;
163 l1_itlb_4k_entries = (ebx) & 0xff;
164 l1_dcache_size = ((ecx >> 24) & 0xff) * 1024;
165 l1_dcache_assoc = (ecx >> 16) & 0xff;
166 l1_dcache_linespertag = (ecx >> 8) & 0xff;
167 l1_dcache_linesize = ((ecx) & 0xff) * l1_dcache_linespertag;
168 l1_icache_size = ((edx >> 24) & 0xff) * 1024;
169 l1_icache_assoc = (edx >> 16) & 0xff;
170 l1_icache_linespertag = (edx >> 8) & 0xff;
171 l1_icache_linesize = ((edx) & 0xff) * l1_icache_linespertag;
174 // L2 caches and tlbs plus L3 cache
175 cpuid(0x80000006,&eax,&ebx,&ecx,&edx);
177 l2_dtlb_24_assoc = decode_amd_l2l3_assoc((eax >> 28) & 0xf);
178 l2_dtlb_24_entries = (eax >> 16) & 0xfff;
179 l2_itlb_24_assoc = decode_amd_l2l3_assoc((eax >> 12) & 0xf);
180 l2_itlb_24_entries = (eax) & 0xfff;
181 l2_dtlb_4k_assoc = decode_amd_l2l3_assoc((ebx >> 28) & 0xf);
182 l2_dtlb_4k_entries = (ebx >> 16) & 0xfff;
183 l2_itlb_4k_assoc = decode_amd_l2l3_assoc((ebx >> 12) & 0xf);
184 l2_itlb_4k_entries = (ebx) & 0xfff;
185 l2_cache_size = ((ecx >> 16) & 0xffff) * 1024;
186 l2_cache_assoc = decode_amd_l2l3_assoc((ecx >> 12) & 0xf);
187 l2_cache_linespertag = (ecx >> 8) & 0xf;
188 l2_cache_linesize = ((ecx) & 0xff) * l1_dcache_linespertag;
189 l3_cache_size = ((edx >> 18) & 0x3fff) * 1024 * 512;
190 l3_cache_assoc = decode_amd_l2l3_assoc((edx >> 12) & 0xf);
191 l3_cache_linespertag = (edx >> 8) & 0xf;
192 l3_cache_linesize = ((edx) & 0xff) * l3_cache_linespertag;
195 INFO("L1 ITLB: 2/4MB: %u assoc, %u entries; 4KB: %u assoc %u entries\n",
196 l1_itlb_24_assoc,l1_itlb_24_entries,l1_itlb_4k_assoc,l1_itlb_4k_entries);
197 INFO("L2 ITLB: 2/4MB: %u assoc, %u entries; 4KB: %u assoc %u entries\n",
198 l2_itlb_24_assoc,l2_itlb_24_entries,l2_itlb_4k_assoc,l2_itlb_4k_entries);
200 INFO("L1 DTLB: 2/4MB: %u assoc, %u entries; 4KB: %u assoc %u entries\n",
201 l1_dtlb_24_assoc,l1_dtlb_24_entries,l1_dtlb_4k_assoc,l1_dtlb_4k_entries);
202 INFO("L2 DTLB: 2/4MB: %u assoc, %u entries; 4KB: %u assoc %u entries\n",
203 l2_dtlb_24_assoc,l2_dtlb_24_entries,l2_dtlb_4k_assoc,l2_dtlb_4k_entries);
205 INFO("L1 ICACHE: %u size, %u assoc, %u linesize %u linespertag\n",
206 l1_icache_size,l1_icache_assoc,l1_icache_linesize,l1_icache_linespertag);
208 INFO("L1 DCACHE: %u size, %u assoc, %u linesize %u linespertag\n",
209 l1_dcache_size,l1_dcache_assoc,l1_dcache_linesize,l1_dcache_linespertag);
211 INFO("L2 CACHE: %u size, %u assoc, %u linesize %u linespertag\n",
212 l2_cache_size,l2_cache_assoc,l2_cache_linesize,l2_cache_linespertag);
214 INFO("L3 CACHE: %u size, %u assoc, %u linesize %u linespertag\n",
215 l3_cache_size,l3_cache_assoc,l3_cache_linesize,l3_cache_linespertag);
225 c->associativity=0; // does not exist unless we say otherwise
231 c->size = l1_icache_size;
232 c->blocksize = l1_icache_linesize;
233 c->associativity = l1_icache_assoc == 0xff ? -1 : l1_icache_assoc;
239 c->size = l1_dcache_size;
240 c->blocksize = l1_dcache_linesize;
241 c->associativity = l1_dcache_assoc == 0xff ? -1 : l1_dcache_assoc;
245 case V3_CACHE_COMBINED:
247 c->size = l2_cache_size;
248 c->blocksize = l2_cache_linesize;
249 c->associativity = l2_cache_assoc;
250 } else if (level==3) {
251 c->size = l3_cache_size;
252 c->blocksize = l3_cache_linesize;
253 c->associativity = l3_cache_assoc;
254 } else if (level==-1) {
255 // find highest level combined cache that is enabled
256 if (l3_cache_assoc) {
257 c->size = l3_cache_size;
258 c->blocksize = l3_cache_linesize;
259 c->associativity = l3_cache_assoc;
261 c->size = l2_cache_size;
262 c->blocksize = l2_cache_linesize;
263 c->associativity = l2_cache_assoc;
270 c->size = l1_itlb_4k_entries;
271 c->associativity = l1_itlb_4k_assoc == 0xff ? -1 : l1_itlb_4k_assoc;
272 } else if (level==2) {
273 c->size = l2_itlb_4k_entries;
274 c->associativity = l2_itlb_4k_assoc;
280 c->size = l1_dtlb_4k_entries;
281 c->associativity = l1_dtlb_4k_assoc == 0xff ? -1 : l1_dtlb_4k_assoc;
282 } else if (level==2) {
283 c->size = l2_dtlb_4k_entries;
284 c->associativity = l2_dtlb_4k_assoc;
288 case V3_TLB_COMBINED:
289 // no combined TLB exposed on this machine;
293 ERROR("Don't know how to handle cache info request type %x\n",type);
301 static int get_cache_level_amd(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c)
303 uint32_t eax, ebx, ecx, edx;
305 cpuid(0x80000000,&eax,&ebx,&ecx,&edx);
307 if (eax < 0x80000006) {
308 ERROR("AMD processor does not support even legacy cache info\n");
312 cpuid(0x80000001,&eax,&ebx,&ecx,&edx);
314 if ((ecx >> 22) & 0x1) {
315 INFO("AMD Processor has Cache Topology Support - Legacy results may be inaccurate\n");
318 return get_cache_level_amd_legacy(type,level,c);
321 #define INTEL_MAX_CACHE 256
323 static int get_cache_level_intel_det(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c)
326 uint32_t eax, ebx, ecx, edx;
327 uint32_t ctype, clevel, cassoc, cparts, csets, clinesize, csize;
329 if (type==V3_TLB_CODE || type==V3_TLB_DATA || type==V3_TLB_COMBINED) {
330 ERROR("TLB query unsupported on Intel\n");
336 c->level = 0; // max level found so far
339 for (i=0;i<INTEL_MAX_CACHE;i++) {
341 cpuid_count(4,i,&eax,&ebx,&ecx,&edx);
350 clevel = (eax >> 5) & 0x7;
351 cassoc = eax & 0x200 ? -1 : ((ebx>>22) & 0x3ff) + 1 ;
352 cparts = ((ebx >> 12) & 0x3ff) + 1;
353 clinesize = (ebx & 0xfff) + 1;
355 csize = cassoc * cparts * clinesize * csets;
357 INFO("Cache: index %u type %u level %u assoc %u parts %u linesize %u sets %u size %u\n",
358 i,ctype,clevel,cassoc,cparts,clinesize,csets,csize);
361 (((ctype==1 && type==V3_CACHE_DATA) ||
362 (ctype==2 && type==V3_CACHE_CODE) ||
363 (ctype==3 && type==V3_CACHE_COMBINED)) &&
365 (level==-1 && clevel>c->level)))) {
369 c->blocksize = clinesize;
370 c->associativity = cassoc;
374 if (i==INTEL_MAX_CACHE) {
382 static int get_cache_level_intel(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c)
384 uint32_t eax, ebx, ecx, edx;
386 cpuid(0,&eax,&ebx,&ecx,&edx);
389 ERROR("Intel Processor does not support deterministic cache parameters function\n");
393 return get_cache_level_intel_det(type,level,c);
396 static int get_cache_level(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c)
399 return get_cache_level_amd(type,level,c);
400 } else if (is_intel()) {
401 return get_cache_level_intel(type,level,c);
403 ERROR("Cannot get cache information for unknown architecture\n");
411 /***************************************************************************************************
412 Hooks to palacios and inititialization
413 *************************************************************************************************/
416 static struct v3_cache_info_iface hooks = {
417 .get_cache_level = get_cache_level,
421 static int init_cache_info(void)
424 // just to see what's there - this should enumerate all
425 // and fail immediately otherwise
426 if (get_cache_level(-1,0,0)) {
427 ERROR("Cannot intialized cache information\n");
431 V3_Init_Cache_Info(&hooks);
433 INFO("cache_info inited\n");
439 static int deinit_cache_info(void)
441 INFO("cache_info deinited\n");
450 static struct linux_ext cache_info_ext = {
451 .name = "CACHE_INFO_INTERFACE",
452 .init = init_cache_info,
453 .deinit = deinit_cache_info,
457 register_extension(&cache_info_ext);