Cache information interface and implementation for AMD and Intel on Linux

diff --git a/linux_module/Makefile b/linux_module/Makefile

index d89f187..86abff6 100644 (file)
--- a/linux_module/Makefile
+++ b/linux_module/Makefile
@@ -38,6 +38,7 @@ v3vee-$(V3_CONFIG_EXT_MACH_CHECK) += mcheck.o
 
 v3vee-$(V3_CONFIG_MEM_TRACK) += memtrack.o
 
+v3vee-$(V3_CONFIG_CACHE_INFO) += iface-cache_info.o
 v3vee-$(V3_CONFIG_HOST_PMU) += iface-pmu.o
 v3vee-$(V3_CONFIG_HOST_PWRSTAT) += iface-pwrstat.o
 v3vee-$(V3_CONFIG_HOST_PSTATE_CTRL) += iface-pstate-ctrl.o
diff --git a/linux_module/iface-cache_info.c b/linux_module/iface-cache_info.c

new file mode 100644 (file)

index 0000000..d2b44ee
--- /dev/null
+++ b/linux_module/iface-cache_info.c
@@ -0,0 +1,457 @@
+/*
+ * Palacios cache information interface
+ *
+ *
+ * (c) Peter Dinda, 2015
+ */
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/uaccess.h>
+#include <linux/namei.h>
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+
+#include "palacios.h"
+#include "util-hashtable.h"
+#include "linux-exts.h"
+#include "vm.h"
+
+#define sint64_t int64_t
+
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+#include <linux/inet.h>
+#include <linux/kthread.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/string.h>
+#include <linux/preempt.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/slab.h>
+
+#include <palacios/vmm.h>
+#include <interfaces/vmm_cache_info.h>
+
+
+/*
+  This is a simple implementation of the Palacios cache info 
+  
+*/
+
+
+static inline void cpuid_string(u32 id, u32 dest[4]) {
+  asm volatile("cpuid"
+              :"=a"(*dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3))
+              :"a"(id));
+}
+
+
+static int get_cpu_vendor(char name[13])
+{
+  u32 dest[4];
+  u32 maxid;
+
+  cpuid_string(0,dest);
+  maxid=dest[0];
+  ((u32*)name)[0]=dest[1];
+  ((u32*)name)[1]=dest[3];
+  ((u32*)name)[2]=dest[2];
+  name[12]=0;
+   
+  return maxid;
+}
+
+static int is_intel(void)
+{
+  char name[13];
+  get_cpu_vendor(name);
+  return !strcmp(name,"GenuineIntel");
+}
+
+static int is_amd(void)
+{
+  char name[13];
+  get_cpu_vendor(name);
+  return !strcmp(name,"AuthenticAMD");
+}
+
+static uint32_t decode_amd_l2l3_assoc(uint32_t val)
+{
+    switch (val) {
+       case 0:
+       case 1:
+       case 2:
+       case 4:
+           return val;
+       case 6:
+           return 8;
+       case 8:
+           return 16;
+       case 0xa:
+           return 32;
+       case 0xb:
+           return 48;
+       case 0xc:
+           return 64;
+       case 0xd:
+           return 96;
+       case 0xe:
+           return 128;
+       case 0xf:
+           return (uint32_t)-1;
+       default:
+           ERROR("Unknown associativity encoding %x\n",val);
+           return 0;
+    }
+}
+
+static int get_cache_level_amd_legacy(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c)
+{
+    uint32_t eax, ebx, ecx, edx;
+    uint32_t l1_dtlb_24_assoc;
+    uint32_t l1_dtlb_24_entries;
+    uint32_t l1_itlb_24_assoc;
+    uint32_t l1_itlb_24_entries;
+    uint32_t l1_dtlb_4k_assoc;
+    uint32_t l1_dtlb_4k_entries;
+    uint32_t l1_itlb_4k_assoc;
+    uint32_t l1_itlb_4k_entries;
+    uint32_t l1_dcache_size;
+    uint32_t l1_dcache_assoc;
+    uint32_t l1_dcache_linespertag;
+    uint32_t l1_dcache_linesize;
+    uint32_t l1_icache_size;
+    uint32_t l1_icache_assoc;
+    uint32_t l1_icache_linespertag;
+    uint32_t l1_icache_linesize;
+    uint32_t l2_dtlb_24_assoc;
+    uint32_t l2_dtlb_24_entries;
+    uint32_t l2_itlb_24_assoc;
+    uint32_t l2_itlb_24_entries;
+    uint32_t l2_dtlb_4k_assoc;
+    uint32_t l2_dtlb_4k_entries;
+    uint32_t l2_itlb_4k_assoc;
+    uint32_t l2_itlb_4k_entries;
+    uint32_t l2_cache_size;
+    uint32_t l2_cache_assoc;
+    uint32_t l2_cache_linespertag;
+    uint32_t l2_cache_linesize;
+    uint32_t l3_cache_size;
+    uint32_t l3_cache_assoc;
+    uint32_t l3_cache_linespertag;
+    uint32_t l3_cache_linesize;
+
+    // L1 caches and tlbs
+    cpuid(0x80000005,&eax,&ebx,&ecx,&edx);
+
+    l1_dtlb_24_assoc = (eax >> 24) & 0xff;
+    l1_dtlb_24_entries = (eax >> 16) & 0xff;
+    l1_itlb_24_assoc = (eax >> 8) & 0xff;
+    l1_itlb_24_entries = (eax) & 0xff;
+    l1_dtlb_4k_assoc = (ebx >> 24) & 0xff;
+    l1_dtlb_4k_entries = (ebx >> 16) & 0xff;
+    l1_itlb_4k_assoc = (ebx >> 8) & 0xff;
+    l1_itlb_4k_entries = (ebx) & 0xff;
+    l1_dcache_size = ((ecx >> 24) & 0xff) * 1024;
+    l1_dcache_assoc = (ecx >> 16) & 0xff;
+    l1_dcache_linespertag = (ecx >> 8) & 0xff;
+    l1_dcache_linesize = ((ecx) & 0xff) * l1_dcache_linespertag;
+    l1_icache_size = ((edx >> 24) & 0xff) * 1024;
+    l1_icache_assoc = (edx >> 16) & 0xff;
+    l1_icache_linespertag = (edx >> 8) & 0xff;
+    l1_icache_linesize = ((edx) & 0xff) * l1_icache_linespertag;
+
+
+    // L2 caches and tlbs plus L3 cache
+    cpuid(0x80000006,&eax,&ebx,&ecx,&edx);
+
+    l2_dtlb_24_assoc = decode_amd_l2l3_assoc((eax >> 28) & 0xf);
+    l2_dtlb_24_entries = (eax >> 16) & 0xfff;
+    l2_itlb_24_assoc = decode_amd_l2l3_assoc((eax >> 12) & 0xf);
+    l2_itlb_24_entries = (eax) & 0xfff;
+    l2_dtlb_4k_assoc = decode_amd_l2l3_assoc((ebx >> 28) & 0xf);
+    l2_dtlb_4k_entries = (ebx >> 16) & 0xfff;
+    l2_itlb_4k_assoc = decode_amd_l2l3_assoc((ebx >> 12) & 0xf);
+    l2_itlb_4k_entries = (ebx) & 0xfff;
+    l2_cache_size = ((ecx >> 16) & 0xffff) * 1024;
+    l2_cache_assoc = decode_amd_l2l3_assoc((ecx >> 12) & 0xf);
+    l2_cache_linespertag = (ecx >> 8) & 0xf;
+    l2_cache_linesize = ((ecx) & 0xff) * l1_dcache_linespertag;
+    l3_cache_size = ((edx >> 18) & 0x3fff) * 1024 * 512;
+    l3_cache_assoc = decode_amd_l2l3_assoc((edx >> 12) & 0xf);
+    l3_cache_linespertag = (edx >> 8) & 0xf;
+    l3_cache_linesize = ((edx) & 0xff) * l3_cache_linespertag;
+    
+    
+    INFO("L1 ITLB: 2/4MB: %u assoc, %u entries; 4KB: %u assoc %u entries\n",
+        l1_itlb_24_assoc,l1_itlb_24_entries,l1_itlb_4k_assoc,l1_itlb_4k_entries);
+    INFO("L2 ITLB: 2/4MB: %u assoc, %u entries; 4KB: %u assoc %u entries\n",
+        l2_itlb_24_assoc,l2_itlb_24_entries,l2_itlb_4k_assoc,l2_itlb_4k_entries);
+    
+    INFO("L1 DTLB: 2/4MB: %u assoc, %u entries; 4KB: %u assoc %u entries\n",
+        l1_dtlb_24_assoc,l1_dtlb_24_entries,l1_dtlb_4k_assoc,l1_dtlb_4k_entries);
+    INFO("L2 DTLB: 2/4MB: %u assoc, %u entries; 4KB: %u assoc %u entries\n",
+        l2_dtlb_24_assoc,l2_dtlb_24_entries,l2_dtlb_4k_assoc,l2_dtlb_4k_entries);
+    
+    INFO("L1 ICACHE: %u size, %u assoc, %u linesize %u linespertag\n",
+        l1_icache_size,l1_icache_assoc,l1_icache_linesize,l1_icache_linespertag);
+    
+    INFO("L1 DCACHE: %u size, %u assoc, %u linesize %u linespertag\n",
+        l1_dcache_size,l1_dcache_assoc,l1_dcache_linesize,l1_dcache_linespertag);
+    
+    INFO("L2 CACHE: %u size, %u assoc, %u linesize %u linespertag\n",
+        l2_cache_size,l2_cache_assoc,l2_cache_linesize,l2_cache_linespertag);
+    
+    INFO("L3 CACHE: %u size, %u assoc, %u linesize %u linespertag\n",
+        l3_cache_size,l3_cache_assoc,l3_cache_linesize,l3_cache_linespertag);
+    
+    if (!c) { 
+       // debug
+       return 0;
+    }
+
+    c->type=type;
+    c->level=level;
+    c->blocksize=0;
+    c->associativity=0; // does not exist unless we say otherwise
+    
+    switch (type) {
+
+       case V3_CACHE_CODE: 
+           if (level==1) { 
+               c->size = l1_icache_size;
+               c->blocksize = l1_icache_linesize;
+               c->associativity = l1_icache_assoc == 0xff ? -1 : l1_icache_assoc;
+           }           
+           break;
+
+       case V3_CACHE_DATA: 
+           if (level==1) { 
+               c->size = l1_dcache_size;
+               c->blocksize = l1_dcache_linesize;
+               c->associativity = l1_dcache_assoc == 0xff ? -1 : l1_dcache_assoc;
+           } 
+           break;
+           
+       case V3_CACHE_COMBINED: 
+           if (level==2) { 
+               c->size = l2_cache_size;
+               c->blocksize = l2_cache_linesize;
+               c->associativity = l2_cache_assoc;
+           } else if (level==3) { 
+               c->size = l3_cache_size;
+               c->blocksize = l3_cache_linesize;
+               c->associativity = l3_cache_assoc;
+           } else if (level==-1) { 
+               // find highest level combined cache that is enabled
+               if (l3_cache_assoc) { 
+                   c->size = l3_cache_size;
+                   c->blocksize = l3_cache_linesize;
+                   c->associativity = l3_cache_assoc;
+               } else {
+                   c->size = l2_cache_size;
+                   c->blocksize = l2_cache_linesize;
+                   c->associativity = l2_cache_assoc;
+               }
+           }
+           break;
+
+       case V3_TLB_CODE: 
+           if (level==1) { 
+               c->size = l1_itlb_4k_entries;
+               c->associativity = l1_itlb_4k_assoc == 0xff ? -1 : l1_itlb_4k_assoc;
+           } else if (level==2) { 
+               c->size = l2_itlb_4k_entries;
+               c->associativity = l2_itlb_4k_assoc;
+           }
+           break;
+           
+       case V3_TLB_DATA: 
+           if (level==1) { 
+               c->size = l1_dtlb_4k_entries;
+               c->associativity = l1_dtlb_4k_assoc == 0xff ? -1 : l1_dtlb_4k_assoc;
+           } else if (level==2) { 
+               c->size = l2_dtlb_4k_entries;
+               c->associativity = l2_dtlb_4k_assoc;
+           }
+           break;
+       
+       case V3_TLB_COMBINED: 
+           // no combined TLB exposed on this machine;
+           break;
+           
+       default:
+           ERROR("Don't know how to handle cache info request type %x\n",type);
+           return -1;
+    }
+
+    return 0;
+}
+
+
+static int get_cache_level_amd(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c)
+{
+    uint32_t eax, ebx, ecx, edx;
+
+    cpuid(0x80000000,&eax,&ebx,&ecx,&edx);
+
+    if (eax < 0x80000006) { 
+       ERROR("AMD processor does not support even legacy cache info\n");
+       return -1;
+    }
+
+    cpuid(0x80000001,&eax,&ebx,&ecx,&edx);
+
+    if ((ecx >> 22) & 0x1) {
+       INFO("AMD Processor has Cache Topology Support - Legacy results may be inaccurate\n");
+    }
+
+    return get_cache_level_amd_legacy(type,level,c);
+}
+
+#define INTEL_MAX_CACHE 256
+
+static int get_cache_level_intel_det(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c)
+{
+    uint32_t i;
+    uint32_t eax, ebx, ecx, edx;
+    uint32_t ctype, clevel, cassoc, cparts, csets, clinesize, csize;
+
+    if (type==V3_TLB_CODE || type==V3_TLB_DATA || type==V3_TLB_COMBINED) { 
+       ERROR("TLB query unsupported on Intel\n");
+       return -1;
+    }
+
+    if (c) { 
+       c->type = type;
+       c->level = 0;  // max level found so far
+    }
+    
+    for (i=0;i<INTEL_MAX_CACHE;i++) {
+
+       cpuid_count(4,i,&eax,&ebx,&ecx,&edx);
+
+       ctype = eax & 0x1f;
+
+       if (!ctype) { 
+           // no more caches
+           break;
+       }
+
+       clevel = (eax >> 5) & 0x7;
+       cassoc = eax & 0x200 ? -1 : ((ebx>>22) & 0x3ff) + 1 ;
+       cparts = ((ebx >> 12) & 0x3ff) + 1;
+       clinesize = (ebx & 0xfff) + 1;
+       csets = ecx + 1;
+       csize = cassoc * cparts * clinesize * csets;
+
+       INFO("Cache: index %u type %u level %u assoc %u parts %u linesize %u sets %u size %u\n",
+            i,ctype,clevel,cassoc,cparts,clinesize,csets,csize);
+
+       if (c &&
+           (((ctype==1 && type==V3_CACHE_DATA) ||
+             (ctype==2 && type==V3_CACHE_CODE) ||
+             (ctype==3 && type==V3_CACHE_COMBINED)) &&
+            ((clevel==level) || 
+             (level==-1 && clevel>c->level)))) { 
+           
+           c->level = clevel;
+           c->size = csize;
+           c->blocksize = clinesize;
+           c->associativity = cassoc;
+       } 
+    }
+
+    if (i==INTEL_MAX_CACHE) { 
+       return -1;
+    } else {
+       return 0;
+    }
+}
+
+
+static int get_cache_level_intel(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c)
+{
+    uint32_t eax, ebx, ecx, edx;
+
+    cpuid(0,&eax,&ebx,&ecx,&edx);
+    
+    if (eax < 4) { 
+       ERROR("Intel Processor does not support deterministic cache parameters function\n");
+       return -1;
+    }
+
+    return get_cache_level_intel_det(type,level,c);
+}
+
+static int get_cache_level(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c)
+{
+    if (is_amd()) { 
+       return get_cache_level_amd(type,level,c);
+    } else if (is_intel()) { 
+       return get_cache_level_intel(type,level,c);
+    } else {
+       ERROR("Cannot get cache information for unknown architecture\n");
+       return -1;
+    }
+       
+       
+}
+
+
+/***************************************************************************************************
+  Hooks to palacios and inititialization
+*************************************************************************************************/
+
+    
+static struct v3_cache_info_iface hooks = {
+    .get_cache_level = get_cache_level,
+};
+
+
+static int init_cache_info(void)
+{
+
+    // just to see what's there - this should enumerate all
+    // and fail immediately otherwise
+    if (get_cache_level(-1,0,0)) { 
+       ERROR("Cannot intialized cache information\n");
+       return -1;
+    }
+
+    V3_Init_Cache_Info(&hooks);
+
+    INFO("cache_info inited\n");
+
+    return 0;
+
+}
+
+static int deinit_cache_info(void)
+{
+    INFO("cache_info deinited\n");
+
+    return 0;
+}
+
+
+
+
+
+static struct linux_ext cache_info_ext = {
+    .name = "CACHE_INFO_INTERFACE",
+    .init = init_cache_info,
+    .deinit = deinit_cache_info,
+};
+
+
+register_extension(&cache_info_ext);
diff --git a/palacios/include/interfaces/vmm_cache_info.h b/palacios/include/interfaces/vmm_cache_info.h

new file mode 100644 (file)

index 0000000..c29cd3c
--- /dev/null
+++ b/palacios/include/interfaces/vmm_cache_info.h
@@ -0,0 +1,53 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National
+ * Science Foundation and the Department of Energy.
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2015, The V3VEE Project <http://www.v3vee.org>
+ * All rights reserved.
+ *
+ * Author: Peter Dinda (pdinda@northwestern.edu)
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#ifndef __VMM_CACHE_INFO
+#define __VMM_CACHE_INFO
+
+#include <palacios/vmm_types.h>
+
+
+typedef enum {V3_CACHE_CODE, V3_CACHE_DATA, V3_CACHE_COMBINED, V3_TLB_CODE, V3_TLB_DATA, V3_TLB_COMBINED} v3_cache_type_t;
+
+struct v3_cache_info {
+    v3_cache_type_t type;
+    uint32_t level;         // level
+    uint32_t size;          // size in bytes (cache) or entries (tlb)
+    uint32_t blocksize;     // block size in bytes (caches, ignore for tlbs)
+    uint32_t associativity; // n-way, etc. (for 4K pages in case of TLB)
+                            // -1 for fully assoc, 0 for disabled/nonexistent
+                            
+};
+    
+
+struct v3_cache_info_iface {
+    // level 1 => L1 ("closest"), level 2=> L2, etc.
+    // level 0xffffffff => last level shared cache
+    int (*get_cache_level)(v3_cache_type_t type, uint32_t level, struct v3_cache_info *info);
+};
+
+
+extern void V3_Init_Cache_Info(struct v3_cache_info_iface * palacios_cache_info);
+
+#ifdef __V3VEE__
+
+int v3_get_cache_info(v3_cache_type_t type, uint32_t level, struct v3_cache_info *info);
+
+#endif
+
+#endif
diff --git a/palacios/src/interfaces/Kconfig b/palacios/src/interfaces/Kconfig

index d7671a0..72442f9 100644 (file)
--- a/palacios/src/interfaces/Kconfig
+++ b/palacios/src/interfaces/Kconfig
@@ -101,4 +101,11 @@ config HOST_LAZY_FPU_SWITCH
        help
                Select this if your host provides lazy context switch support
                 for floating point state and you would like Palacios to use it
+
+config CACHE_INFO
+       bool "Host provides information about cache structure"
+       default n
+       help
+               Select this if your host provides lazy context switch support
+                for floating point state and you would like Palacios to use it
 endmenu
diff --git a/palacios/src/interfaces/Makefile b/palacios/src/interfaces/Makefile

index 63a393a..1657366 100644 (file)
--- a/palacios/src/interfaces/Makefile
+++ b/palacios/src/interfaces/Makefile
@@ -12,6 +12,7 @@ obj-$(V3_CONFIG_HOST_PMU) += vmm_pmu.o
 obj-$(V3_CONFIG_HOST_PWRSTAT) += vmm_pwrstat.o
 obj-$(V3_CONFIG_HOST_PSTATE_CTRL) += vmm_pstate_ctrl.o
 obj-$(V3_CONFIG_HOST_LAZY_FPU_SWITCH) += vmm_lazy_fpu.o
+obj-$(V3_CONFIG_CACHE_INFO) += vmm_cache_info.o
 
 obj-y += null.o
 obj-y += vmm_numa.o
diff --git a/palacios/src/interfaces/vmm_cache_info.c b/palacios/src/interfaces/vmm_cache_info.c

new file mode 100644 (file)

index 0000000..6c87b01
--- /dev/null
+++ b/palacios/src/interfaces/vmm_cache_info.c
@@ -0,0 +1,38 @@
+/*
+ * This file is part of the Palacios Virtual Machine Monitor developed
+ * by the V3VEE Project with funding from the United States National 
+ * Science Foundation and the Department of Energy.  
+ *
+ * The V3VEE Project is a joint project between Northwestern University
+ * and the University of New Mexico.  You can find out more at 
+ * http://www.v3vee.org
+ *
+ * Copyright (c) 2015, Peter Dinda <pdinda@northwestern.edu>
+ * Copyright (c) 2015, The V3VEE Project <http://www.v3vee.org> 
+ * All rights reserved.
+ *
+ * Author: Peter Dinda <pdinda@northwestern.edu>
+ *
+ * This is free software.  You are permitted to use,
+ * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
+ */
+
+#include <palacios/vmm.h>
+#include <interfaces/vmm_cache_info.h>
+
+static struct v3_cache_info_iface *cache_info=0;
+
+void V3_Init_Cache_Info(struct v3_cache_info_iface * palacios_cache_info)
+{
+    cache_info=palacios_cache_info;
+    V3_Print(VM_NONE,VCORE_NONE,"Cache information interface inited\n");
+}
+
+int v3_get_cache_info(v3_cache_type_t type, uint32_t level, struct v3_cache_info *info)
+{
+    if (cache_info && cache_info->get_cache_level) { 
+       return cache_info->get_cache_level(type,level,info);
+    } else {
+       return -1;
+    }
+}
linux_module/Makefile		patch \| blob \| history
linux_module/iface-cache_info.c	[new file with mode: 0644]	patch \| blob
palacios/include/interfaces/vmm_cache_info.h	[new file with mode: 0644]	patch \| blob
palacios/src/interfaces/Kconfig		patch \| blob \| history
palacios/src/interfaces/Makefile		patch \| blob \| history
palacios/src/interfaces/vmm_cache_info.c	[new file with mode: 0644]	patch \| blob