From: Peter Dinda Date: Mon, 13 Jul 2015 21:17:35 +0000 (-0500) Subject: Cache information interface and implementation for AMD and Intel on Linux X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=da0b4bb80de755529f47b9ca57ccb0c2cefae15b Cache information interface and implementation for AMD and Intel on Linux --- diff --git a/linux_module/Makefile b/linux_module/Makefile index d89f187..86abff6 100644 --- a/linux_module/Makefile +++ b/linux_module/Makefile @@ -38,6 +38,7 @@ v3vee-$(V3_CONFIG_EXT_MACH_CHECK) += mcheck.o v3vee-$(V3_CONFIG_MEM_TRACK) += memtrack.o +v3vee-$(V3_CONFIG_CACHE_INFO) += iface-cache_info.o v3vee-$(V3_CONFIG_HOST_PMU) += iface-pmu.o v3vee-$(V3_CONFIG_HOST_PWRSTAT) += iface-pwrstat.o v3vee-$(V3_CONFIG_HOST_PSTATE_CTRL) += iface-pstate-ctrl.o diff --git a/linux_module/iface-cache_info.c b/linux_module/iface-cache_info.c new file mode 100644 index 0000000..d2b44ee --- /dev/null +++ b/linux_module/iface-cache_info.c @@ -0,0 +1,457 @@ +/* + * Palacios cache information interface + * + * + * (c) Peter Dinda, 2015 + */ + +#include +#include +#include +#include +#include +#include + +#include "palacios.h" +#include "util-hashtable.h" +#include "linux-exts.h" +#include "vm.h" + +#define sint64_t int64_t + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + + +/* + This is a simple implementation of the Palacios cache info + +*/ + + +static inline void cpuid_string(u32 id, u32 dest[4]) { + asm volatile("cpuid" + :"=a"(*dest),"=b"(*(dest+1)),"=c"(*(dest+2)),"=d"(*(dest+3)) + :"a"(id)); +} + + +static int get_cpu_vendor(char name[13]) +{ + u32 dest[4]; + u32 maxid; + + cpuid_string(0,dest); + maxid=dest[0]; + ((u32*)name)[0]=dest[1]; + ((u32*)name)[1]=dest[3]; + ((u32*)name)[2]=dest[2]; + name[12]=0; + + return maxid; +} + +static int is_intel(void) +{ + char name[13]; + get_cpu_vendor(name); + return !strcmp(name,"GenuineIntel"); +} + +static int is_amd(void) +{ + char name[13]; + get_cpu_vendor(name); + return !strcmp(name,"AuthenticAMD"); +} + +static uint32_t decode_amd_l2l3_assoc(uint32_t val) +{ + switch (val) { + case 0: + case 1: + case 2: + case 4: + return val; + case 6: + return 8; + case 8: + return 16; + case 0xa: + return 32; + case 0xb: + return 48; + case 0xc: + return 64; + case 0xd: + return 96; + case 0xe: + return 128; + case 0xf: + return (uint32_t)-1; + default: + ERROR("Unknown associativity encoding %x\n",val); + return 0; + } +} + +static int get_cache_level_amd_legacy(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c) +{ + uint32_t eax, ebx, ecx, edx; + uint32_t l1_dtlb_24_assoc; + uint32_t l1_dtlb_24_entries; + uint32_t l1_itlb_24_assoc; + uint32_t l1_itlb_24_entries; + uint32_t l1_dtlb_4k_assoc; + uint32_t l1_dtlb_4k_entries; + uint32_t l1_itlb_4k_assoc; + uint32_t l1_itlb_4k_entries; + uint32_t l1_dcache_size; + uint32_t l1_dcache_assoc; + uint32_t l1_dcache_linespertag; + uint32_t l1_dcache_linesize; + uint32_t l1_icache_size; + uint32_t l1_icache_assoc; + uint32_t l1_icache_linespertag; + uint32_t l1_icache_linesize; + uint32_t l2_dtlb_24_assoc; + uint32_t l2_dtlb_24_entries; + uint32_t l2_itlb_24_assoc; + uint32_t l2_itlb_24_entries; + uint32_t l2_dtlb_4k_assoc; + uint32_t l2_dtlb_4k_entries; + uint32_t l2_itlb_4k_assoc; + uint32_t l2_itlb_4k_entries; + uint32_t l2_cache_size; + uint32_t l2_cache_assoc; + uint32_t l2_cache_linespertag; + uint32_t l2_cache_linesize; + uint32_t l3_cache_size; + uint32_t l3_cache_assoc; + uint32_t l3_cache_linespertag; + uint32_t l3_cache_linesize; + + // L1 caches and tlbs + cpuid(0x80000005,&eax,&ebx,&ecx,&edx); + + l1_dtlb_24_assoc = (eax >> 24) & 0xff; + l1_dtlb_24_entries = (eax >> 16) & 0xff; + l1_itlb_24_assoc = (eax >> 8) & 0xff; + l1_itlb_24_entries = (eax) & 0xff; + l1_dtlb_4k_assoc = (ebx >> 24) & 0xff; + l1_dtlb_4k_entries = (ebx >> 16) & 0xff; + l1_itlb_4k_assoc = (ebx >> 8) & 0xff; + l1_itlb_4k_entries = (ebx) & 0xff; + l1_dcache_size = ((ecx >> 24) & 0xff) * 1024; + l1_dcache_assoc = (ecx >> 16) & 0xff; + l1_dcache_linespertag = (ecx >> 8) & 0xff; + l1_dcache_linesize = ((ecx) & 0xff) * l1_dcache_linespertag; + l1_icache_size = ((edx >> 24) & 0xff) * 1024; + l1_icache_assoc = (edx >> 16) & 0xff; + l1_icache_linespertag = (edx >> 8) & 0xff; + l1_icache_linesize = ((edx) & 0xff) * l1_icache_linespertag; + + + // L2 caches and tlbs plus L3 cache + cpuid(0x80000006,&eax,&ebx,&ecx,&edx); + + l2_dtlb_24_assoc = decode_amd_l2l3_assoc((eax >> 28) & 0xf); + l2_dtlb_24_entries = (eax >> 16) & 0xfff; + l2_itlb_24_assoc = decode_amd_l2l3_assoc((eax >> 12) & 0xf); + l2_itlb_24_entries = (eax) & 0xfff; + l2_dtlb_4k_assoc = decode_amd_l2l3_assoc((ebx >> 28) & 0xf); + l2_dtlb_4k_entries = (ebx >> 16) & 0xfff; + l2_itlb_4k_assoc = decode_amd_l2l3_assoc((ebx >> 12) & 0xf); + l2_itlb_4k_entries = (ebx) & 0xfff; + l2_cache_size = ((ecx >> 16) & 0xffff) * 1024; + l2_cache_assoc = decode_amd_l2l3_assoc((ecx >> 12) & 0xf); + l2_cache_linespertag = (ecx >> 8) & 0xf; + l2_cache_linesize = ((ecx) & 0xff) * l1_dcache_linespertag; + l3_cache_size = ((edx >> 18) & 0x3fff) * 1024 * 512; + l3_cache_assoc = decode_amd_l2l3_assoc((edx >> 12) & 0xf); + l3_cache_linespertag = (edx >> 8) & 0xf; + l3_cache_linesize = ((edx) & 0xff) * l3_cache_linespertag; + + + INFO("L1 ITLB: 2/4MB: %u assoc, %u entries; 4KB: %u assoc %u entries\n", + l1_itlb_24_assoc,l1_itlb_24_entries,l1_itlb_4k_assoc,l1_itlb_4k_entries); + INFO("L2 ITLB: 2/4MB: %u assoc, %u entries; 4KB: %u assoc %u entries\n", + l2_itlb_24_assoc,l2_itlb_24_entries,l2_itlb_4k_assoc,l2_itlb_4k_entries); + + INFO("L1 DTLB: 2/4MB: %u assoc, %u entries; 4KB: %u assoc %u entries\n", + l1_dtlb_24_assoc,l1_dtlb_24_entries,l1_dtlb_4k_assoc,l1_dtlb_4k_entries); + INFO("L2 DTLB: 2/4MB: %u assoc, %u entries; 4KB: %u assoc %u entries\n", + l2_dtlb_24_assoc,l2_dtlb_24_entries,l2_dtlb_4k_assoc,l2_dtlb_4k_entries); + + INFO("L1 ICACHE: %u size, %u assoc, %u linesize %u linespertag\n", + l1_icache_size,l1_icache_assoc,l1_icache_linesize,l1_icache_linespertag); + + INFO("L1 DCACHE: %u size, %u assoc, %u linesize %u linespertag\n", + l1_dcache_size,l1_dcache_assoc,l1_dcache_linesize,l1_dcache_linespertag); + + INFO("L2 CACHE: %u size, %u assoc, %u linesize %u linespertag\n", + l2_cache_size,l2_cache_assoc,l2_cache_linesize,l2_cache_linespertag); + + INFO("L3 CACHE: %u size, %u assoc, %u linesize %u linespertag\n", + l3_cache_size,l3_cache_assoc,l3_cache_linesize,l3_cache_linespertag); + + if (!c) { + // debug + return 0; + } + + c->type=type; + c->level=level; + c->blocksize=0; + c->associativity=0; // does not exist unless we say otherwise + + switch (type) { + + case V3_CACHE_CODE: + if (level==1) { + c->size = l1_icache_size; + c->blocksize = l1_icache_linesize; + c->associativity = l1_icache_assoc == 0xff ? -1 : l1_icache_assoc; + } + break; + + case V3_CACHE_DATA: + if (level==1) { + c->size = l1_dcache_size; + c->blocksize = l1_dcache_linesize; + c->associativity = l1_dcache_assoc == 0xff ? -1 : l1_dcache_assoc; + } + break; + + case V3_CACHE_COMBINED: + if (level==2) { + c->size = l2_cache_size; + c->blocksize = l2_cache_linesize; + c->associativity = l2_cache_assoc; + } else if (level==3) { + c->size = l3_cache_size; + c->blocksize = l3_cache_linesize; + c->associativity = l3_cache_assoc; + } else if (level==-1) { + // find highest level combined cache that is enabled + if (l3_cache_assoc) { + c->size = l3_cache_size; + c->blocksize = l3_cache_linesize; + c->associativity = l3_cache_assoc; + } else { + c->size = l2_cache_size; + c->blocksize = l2_cache_linesize; + c->associativity = l2_cache_assoc; + } + } + break; + + case V3_TLB_CODE: + if (level==1) { + c->size = l1_itlb_4k_entries; + c->associativity = l1_itlb_4k_assoc == 0xff ? -1 : l1_itlb_4k_assoc; + } else if (level==2) { + c->size = l2_itlb_4k_entries; + c->associativity = l2_itlb_4k_assoc; + } + break; + + case V3_TLB_DATA: + if (level==1) { + c->size = l1_dtlb_4k_entries; + c->associativity = l1_dtlb_4k_assoc == 0xff ? -1 : l1_dtlb_4k_assoc; + } else if (level==2) { + c->size = l2_dtlb_4k_entries; + c->associativity = l2_dtlb_4k_assoc; + } + break; + + case V3_TLB_COMBINED: + // no combined TLB exposed on this machine; + break; + + default: + ERROR("Don't know how to handle cache info request type %x\n",type); + return -1; + } + + return 0; +} + + +static int get_cache_level_amd(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c) +{ + uint32_t eax, ebx, ecx, edx; + + cpuid(0x80000000,&eax,&ebx,&ecx,&edx); + + if (eax < 0x80000006) { + ERROR("AMD processor does not support even legacy cache info\n"); + return -1; + } + + cpuid(0x80000001,&eax,&ebx,&ecx,&edx); + + if ((ecx >> 22) & 0x1) { + INFO("AMD Processor has Cache Topology Support - Legacy results may be inaccurate\n"); + } + + return get_cache_level_amd_legacy(type,level,c); +} + +#define INTEL_MAX_CACHE 256 + +static int get_cache_level_intel_det(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c) +{ + uint32_t i; + uint32_t eax, ebx, ecx, edx; + uint32_t ctype, clevel, cassoc, cparts, csets, clinesize, csize; + + if (type==V3_TLB_CODE || type==V3_TLB_DATA || type==V3_TLB_COMBINED) { + ERROR("TLB query unsupported on Intel\n"); + return -1; + } + + if (c) { + c->type = type; + c->level = 0; // max level found so far + } + + for (i=0;i> 5) & 0x7; + cassoc = eax & 0x200 ? -1 : ((ebx>>22) & 0x3ff) + 1 ; + cparts = ((ebx >> 12) & 0x3ff) + 1; + clinesize = (ebx & 0xfff) + 1; + csets = ecx + 1; + csize = cassoc * cparts * clinesize * csets; + + INFO("Cache: index %u type %u level %u assoc %u parts %u linesize %u sets %u size %u\n", + i,ctype,clevel,cassoc,cparts,clinesize,csets,csize); + + if (c && + (((ctype==1 && type==V3_CACHE_DATA) || + (ctype==2 && type==V3_CACHE_CODE) || + (ctype==3 && type==V3_CACHE_COMBINED)) && + ((clevel==level) || + (level==-1 && clevel>c->level)))) { + + c->level = clevel; + c->size = csize; + c->blocksize = clinesize; + c->associativity = cassoc; + } + } + + if (i==INTEL_MAX_CACHE) { + return -1; + } else { + return 0; + } +} + + +static int get_cache_level_intel(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c) +{ + uint32_t eax, ebx, ecx, edx; + + cpuid(0,&eax,&ebx,&ecx,&edx); + + if (eax < 4) { + ERROR("Intel Processor does not support deterministic cache parameters function\n"); + return -1; + } + + return get_cache_level_intel_det(type,level,c); +} + +static int get_cache_level(v3_cache_type_t type, uint32_t level, struct v3_cache_info *c) +{ + if (is_amd()) { + return get_cache_level_amd(type,level,c); + } else if (is_intel()) { + return get_cache_level_intel(type,level,c); + } else { + ERROR("Cannot get cache information for unknown architecture\n"); + return -1; + } + + +} + + +/*************************************************************************************************** + Hooks to palacios and inititialization +*************************************************************************************************/ + + +static struct v3_cache_info_iface hooks = { + .get_cache_level = get_cache_level, +}; + + +static int init_cache_info(void) +{ + + // just to see what's there - this should enumerate all + // and fail immediately otherwise + if (get_cache_level(-1,0,0)) { + ERROR("Cannot intialized cache information\n"); + return -1; + } + + V3_Init_Cache_Info(&hooks); + + INFO("cache_info inited\n"); + + return 0; + +} + +static int deinit_cache_info(void) +{ + INFO("cache_info deinited\n"); + + return 0; +} + + + + + +static struct linux_ext cache_info_ext = { + .name = "CACHE_INFO_INTERFACE", + .init = init_cache_info, + .deinit = deinit_cache_info, +}; + + +register_extension(&cache_info_ext); diff --git a/palacios/include/interfaces/vmm_cache_info.h b/palacios/include/interfaces/vmm_cache_info.h new file mode 100644 index 0000000..c29cd3c --- /dev/null +++ b/palacios/include/interfaces/vmm_cache_info.h @@ -0,0 +1,53 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2015, The V3VEE Project + * All rights reserved. + * + * Author: Peter Dinda (pdinda@northwestern.edu) + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#ifndef __VMM_CACHE_INFO +#define __VMM_CACHE_INFO + +#include + + +typedef enum {V3_CACHE_CODE, V3_CACHE_DATA, V3_CACHE_COMBINED, V3_TLB_CODE, V3_TLB_DATA, V3_TLB_COMBINED} v3_cache_type_t; + +struct v3_cache_info { + v3_cache_type_t type; + uint32_t level; // level + uint32_t size; // size in bytes (cache) or entries (tlb) + uint32_t blocksize; // block size in bytes (caches, ignore for tlbs) + uint32_t associativity; // n-way, etc. (for 4K pages in case of TLB) + // -1 for fully assoc, 0 for disabled/nonexistent + +}; + + +struct v3_cache_info_iface { + // level 1 => L1 ("closest"), level 2=> L2, etc. + // level 0xffffffff => last level shared cache + int (*get_cache_level)(v3_cache_type_t type, uint32_t level, struct v3_cache_info *info); +}; + + +extern void V3_Init_Cache_Info(struct v3_cache_info_iface * palacios_cache_info); + +#ifdef __V3VEE__ + +int v3_get_cache_info(v3_cache_type_t type, uint32_t level, struct v3_cache_info *info); + +#endif + +#endif diff --git a/palacios/src/interfaces/Kconfig b/palacios/src/interfaces/Kconfig index d7671a0..72442f9 100644 --- a/palacios/src/interfaces/Kconfig +++ b/palacios/src/interfaces/Kconfig @@ -101,4 +101,11 @@ config HOST_LAZY_FPU_SWITCH help Select this if your host provides lazy context switch support for floating point state and you would like Palacios to use it + +config CACHE_INFO + bool "Host provides information about cache structure" + default n + help + Select this if your host provides lazy context switch support + for floating point state and you would like Palacios to use it endmenu diff --git a/palacios/src/interfaces/Makefile b/palacios/src/interfaces/Makefile index 63a393a..1657366 100644 --- a/palacios/src/interfaces/Makefile +++ b/palacios/src/interfaces/Makefile @@ -12,6 +12,7 @@ obj-$(V3_CONFIG_HOST_PMU) += vmm_pmu.o obj-$(V3_CONFIG_HOST_PWRSTAT) += vmm_pwrstat.o obj-$(V3_CONFIG_HOST_PSTATE_CTRL) += vmm_pstate_ctrl.o obj-$(V3_CONFIG_HOST_LAZY_FPU_SWITCH) += vmm_lazy_fpu.o +obj-$(V3_CONFIG_CACHE_INFO) += vmm_cache_info.o obj-y += null.o obj-y += vmm_numa.o diff --git a/palacios/src/interfaces/vmm_cache_info.c b/palacios/src/interfaces/vmm_cache_info.c new file mode 100644 index 0000000..6c87b01 --- /dev/null +++ b/palacios/src/interfaces/vmm_cache_info.c @@ -0,0 +1,38 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2015, Peter Dinda + * Copyright (c) 2015, The V3VEE Project + * All rights reserved. + * + * Author: Peter Dinda + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#include +#include + +static struct v3_cache_info_iface *cache_info=0; + +void V3_Init_Cache_Info(struct v3_cache_info_iface * palacios_cache_info) +{ + cache_info=palacios_cache_info; + V3_Print(VM_NONE,VCORE_NONE,"Cache information interface inited\n"); +} + +int v3_get_cache_info(v3_cache_type_t type, uint32_t level, struct v3_cache_info *info) +{ + if (cache_info && cache_info->get_cache_level) { + return cache_info->get_cache_level(type,level,info); + } else { + return -1; + } +}