2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
11 * All rights reserved.
13 * Copyright (c) 2008, Philip Soltero <psoltero@cs.unm.edu>
14 * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org>
15 * All rights reserved.
17 * Author: Philip Soltero <psoltero@cs.unm.edu>
19 * This is free software. You are permitted to use,
20 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
24 * @file Virtualized machine-check architecture.
26 * @author <a HREF="mailto:psoltero@cs.unm.edu.us">Philip Soltero</a>
29 #include <palacios/vmm.h>
30 #include <palacios/vm_guest.h>
31 #include <palacios/vmm_excp.h>
32 #include <palacios/vmm_lowlevel.h>
33 #include <palacios/vmm_dev_mgr.h>
34 #include <palacios/vmm_string.h>
35 #include <palacios/vmm_cpuid.h>
37 #ifndef CONFIG_DEBUG_MCHECK
39 #define PrintDebug(fmt, args...)
42 #define CPUID_0000_0001 0x00000001
43 #define CPUID_8000_0001 0x80000001
45 // 6 error reporting banks. This may be configurable in the future.
46 #define MC_REG_BANKS 6
47 #define MCE_INTERRUPT 18
49 #define MSG_PRE "MCHECK: "
51 #define MCG_CAP 0x0179
52 #define MCG_STAT 0x017A
53 #define MCG_CTRL 0x017B
57 /* I have no idea what Intel was thinking (or maybe they just weren't)
58 * but the MCi registers are completely non-standard across Intel's platforms and are a total mess.
59 * Any derivative of the pentium-M (i.e. all Core CPU lines) completely disregard the
60 * architectural standard that Intel itself created...
61 * For these CPUs: the MC4 MSRs switch locations with the MC3s,
62 * also every MCi below MC3 (including MC4) does not have a MCi_MISC MSR.
64 * So for now, screw it, we'll use AMD's standard
69 Bank 1 : Instruction Cache.
71 Bank 3 : Load Store Unit.
72 Bank 4 : Northbridge and DRAM.
75 static const uint32_t amd_mci_bases[] = {0x0400, 0x0404, 0x0408, 0x040c, 0x0410, 0x0414};
76 static const uint32_t pentium_6_mci_bases[] = {0x0400, 0x0404, 0x0408, 0x040c, 0x0410, 0x0414};
77 static const uint32_t pentium_m_mci_bases[] = {0x0400, 0x0404, 0x0408, 0x0410, 0x040c, 0x0414};
78 static const uint32_t ia32_mci_bases[] = { 0x0400, 0x0404, 0x0408, 0x040c,
79 0x0410, 0x0414, 0x0418, 0x041c,
80 0x0420, 0x0424, 0x0428, 0x042c,
81 0x0430, 0x0434, 0x0438, 0x043c,
82 0x0440, 0x0444, 0x0448, 0x044c,
84 #define MCi_MASK 0xfffffffc
94 * MCA status low and high registers, MC4_STAT, MSR0000_0411.
102 uint_t error_code_ext : 5;
103 uint_t error_code : 16;
108 uint_t mca_stat_sub_cache : 2;
109 uint_t reserved_01 : 1;
112 uint_t syndrome2 : 8;
113 uint_t reserved_02 : 1;
114 uint_t err_cpu_val : 1;
122 }__attribute__((packed));
123 }__attribute__((packed));
124 } __attribute__((packed));
127 * MCA address low and high registers, MC4_ADDR, MSR0000_0412.
129 struct mc4_addr_msr {
134 uint64_t addr32 : 36;
135 uint32_t reserved : 28;
136 } __attribute__((packed));
139 } __attribute__((packed));
140 } __attribute__((packed));
143 * Global machine-check capabilities register, MCG_CAP.
150 uint32_t mcg_ctl_p : 1; // CTRL Present
151 uint64_t reserved : 55;
152 } __attribute__((packed));
153 } __attribute__((packed));
154 } __attribute__((packed));
157 * Global machine-check status register, MCG_STAT.
159 struct mcg_stat_msr {
165 uint32_t mcip : 1; // Machine-check in progress.
166 uint64_t reserved : 61;
167 } __attribute__((packed));
168 } __attribute__((packed));
169 } __attribute__((packed));
172 * Global machine-check control register, MCG_CTRL.
178 uint32_t dce : 1; // Data cache register bank enable
179 uint32_t ice : 1; // Instruction cache register bank enable
180 uint32_t bue : 1; // Bus unit register bank enable
181 uint32_t lse : 1; // Load-store register bank enable
182 uint32_t nbe : 1; // Northbridge register bank enable
183 uint32_t fre : 1; // Fixed issue reorder buffer register bank enable
184 uint64_t unused : 58;
185 } __attribute__((packed));
186 } __attribute__((packed));
187 } __attribute__((packed));
190 * A temporary structure for unimplemented machine-check error reporting banks.
200 struct mcheck_state {
201 struct mcg_cap_msr mcg_cap;
202 struct mcg_stat_msr mcg_stat;
203 struct mcg_ctl_msr mcg_ctl;
205 /* Note that these are in logical order not MSR order */
206 /* So MC4 is always at mci_regs[4] even if the MSR is before MC3's */
207 struct mci_bank mci_regs[MC_REG_BANKS];
212 * Handles a guest read of cpuid function 0000_0001 and 8000_0001.
213 * All bits are passthrough except for bit 14, the MCA available bit, and bit 7, the MCE available
216 * @b<Note:> The virtual MCA only uses two bits in the entire 256 bit "return value". If other VMM
217 * subsystems or devices require the virtualization of other return value bits, it is suggested that
218 * this hook handler be moved to a common source file where all subsystems and devices can
219 * virtualize the bits they need to.
221 static int cpuid_hook_handler(struct guest_info * const info, const uint32_t cpuid,
222 uint32_t * const eax, uint32_t * const ebx,
223 uint32_t * const ecx, uint32_t * const edx,
224 void * const private_data) {
226 // Most bits are passthrough.
227 v3_cpuid(cpuid, eax, ebx, ecx, edx);
229 // Bit 7, MCE availability
230 // Bit 14, MCA availability
237 void init_state(struct mcheck_state * const state) {
240 memset(state, 0, sizeof(struct mcheck_state));
242 // Set the initial MCI reg base values to the current architecture
243 for (i = 0; i < MC_REG_BANKS; i++) {
244 state->mci_regs[i].base = amd_mci_bases[i];
249 * Handles guest writes to MCG MSRs.
252 int mcg_write_handler(struct guest_info * core, uint32_t msr, struct v3_msr src, void * priv_data) {
253 struct mcheck_state * state = (struct mcheck_state *)priv_data;
257 PrintDebug(MSG_PRE "Ignoring write to MCG_CAP MSR.\n");
261 state->mcg_stat.value = 0;
265 if (!state->mcg_cap.mcg_ctl_p) {
266 PrintDebug(MSG_PRE "Ignoring write to control MSR '0x%x'. Control MSRs not supported.\n", msr);
270 // The upper 58 bits are unused and read-only.
271 state->mcg_ctl.value &= ~0x3f;
272 state->mcg_ctl.value |= src.value & 0x3f;
277 PrintError(MSG_PRE "Reading from invalid MSR: %x\n", msr);
286 * Handles guest reads to MCG MSRs.
289 int mcg_read_handler(struct guest_info * core, uint32_t msr, struct v3_msr * dst, void * priv_data) {
290 struct mcheck_state * state = (struct mcheck_state *)priv_data;
294 dst->value = state->mcg_cap.value;
298 dst->value = state->mcg_stat.value;
302 if (!state->mcg_cap.mcg_ctl_p) {
303 PrintDebug(MSG_PRE "Ignoring read of control MSR '0x%x'. Control MSRs not supported.\n", msr);
307 dst->value = state->mcg_ctl.value;
311 PrintError(MSG_PRE "Reading from invalid MSR: %x\n", msr);
318 static struct mci_bank * get_mci_reg(struct mcheck_state * state, uint32_t msr) {
321 for (i = 0; i < MC_REG_BANKS; i++) {
322 if (state->mci_regs[i].base == (msr & MCi_MASK)) {
323 return &(state->mci_regs[i]);
332 * Handles guest reads to MCi MSRs.
335 int mci_read_handler(struct guest_info * const core,
337 struct v3_msr * const dst,
338 void * const priv_data) {
339 struct mcheck_state * const state = (struct mcheck_state *)priv_data;
340 struct mci_bank * mci = get_mci_reg(state, msr);
342 PrintDebug(MSG_PRE "Reading value '0x%llx' for MSR '0x%x'.\n", dst->value, msr);
345 PrintError(MSG_PRE " MSR read for invalid MCI register 0x%x\n", msr);
349 switch (msr & ~MCi_MASK) {
351 if (!state->mcg_cap.mcg_ctl_p) {
352 PrintDebug(MSG_PRE "Ignoring read of control MSR '0x%x'. Control MSRs not supported.\n", msr);
356 dst->value = mci->ctl.value;
360 dst->value = mci->stat.value;
364 dst->value = mci->addr.value;
368 dst->value = mci->misc.value;
372 PrintError(MSG_PRE "Ignoring read of unhooked MSR '0x%x'. This is a bug.\n", msr);
380 * Handles guest writes to MCi MSRs.
383 int mci_write_handler(struct guest_info * const core,
385 const struct v3_msr src,
386 void * const priv_data) {
387 struct mcheck_state * const state = (struct mcheck_state *)priv_data;
388 struct mci_bank * mci = get_mci_reg(state, msr);
390 PrintDebug(MSG_PRE "Writing value '0x%llx' for MSR '0x%x'.\n", src.value, msr);
392 switch (msr & ~MCi_MASK) {
394 if (!state->mcg_cap.mcg_ctl_p) {
395 PrintDebug(MSG_PRE "Ignoring read of control MSR '0x%x'. Control MSRs not supported.\n", msr);
399 mci->ctl.value = src.value;
403 if (src.value != 0) {
405 PrintError(MSG_PRE "Ignoring write of illegal value '0x%llx'.\n", src.value);
413 mci->addr.value = src.value;
417 V3_Print(MSG_PRE "Ignoring write to read only miscellaneous MSR '0x%x'.\n", msr);
421 PrintError(MSG_PRE "Ignoring write of unhooked MSR '0x%x'. This is a bug.\n", msr);
431 * CPUID functions 0000_0001 and 8000_0001 are hooked to signal MC availability
433 * @return 0 for success and -1 for failure.
436 int hook_cpuids(struct v3_vm_info * const vm,
437 struct mcheck_state * const state) {
440 ret = v3_hook_cpuid(vm, CPUID_0000_0001, cpuid_hook_handler, state);
443 PrintError(MSG_PRE "Failed to hook CPUID function 0000_0001.\n");
447 ret = v3_hook_cpuid(vm, CPUID_8000_0001, cpuid_hook_handler, state);
450 PrintError(MSG_PRE "Failed to hook CPUID function 8000_0001.\n");
451 v3_unhook_cpuid(vm, CPUID_0000_0001);
459 static int mcheck_free(struct mcheck_state * state) {
464 static struct v3_device_ops dev_ops = {
465 .free = (int (*)(void *))mcheck_free,
471 static int mcheck_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
472 struct mcheck_state * state = NULL;
473 // char * dev_id = v3_cfg_val(cfg, "ID");
474 char * dev_id = "MCHECK"; // we hardcode the device ID for now so we can always find it for #MC insertion
478 state = (struct mcheck_state *)V3_Malloc(sizeof(struct mcheck_state));
481 PrintError(MSG_PRE "Failed to allocate machine-check architecture state.\n");
485 struct vm_device * dev = v3_add_device(vm, dev_id, &dev_ops, state);
488 PrintError("Could not attach device %s\n", dev_id);
495 state->mcg_cap.count = MC_REG_BANKS;
497 ret |= hook_cpuids(vm, state);
500 ret |= v3_dev_hook_msr(dev, MCG_CAP, mcg_read_handler, mcg_write_handler);
501 ret |= v3_dev_hook_msr(dev, MCG_STAT, mcg_read_handler, mcg_write_handler);
502 ret |= v3_dev_hook_msr(dev, MCG_CTRL, mcg_read_handler, mcg_write_handler);
504 for (i = 0; i < MC_REG_BANKS; i++) {
505 ret |= v3_dev_hook_msr(dev, state->mci_regs[i].base, mci_read_handler, mci_write_handler);
506 ret |= v3_dev_hook_msr(dev, state->mci_regs[i].base + 1, mci_read_handler, mci_write_handler);
507 ret |= v3_dev_hook_msr(dev, state->mci_regs[i].base + 2, mci_read_handler, mci_write_handler);
508 ret |= v3_dev_hook_msr(dev, state->mci_regs[i].base + 3, mci_read_handler, mci_write_handler);
512 PrintError(MSG_PRE "Error hooking Device resources\n");
513 v3_remove_device(dev);
520 int v3_mcheck_inject_nb_mce(struct v3_vm_info * const vm, const uint32_t cpu,
521 const struct mc4_stat_msr stat,
522 const struct mc4_addr_msr addr) {
523 struct vm_device * dev = v3_find_dev(vm, "MCHECK");
524 struct mcheck_state * state = dev->private_data;
527 // For now only MCE injection on cpu 0 is supported.
529 PrintError(MSG_PRE "Injecting MCE on cpu %u not supported.\n", cpu);
534 // Is the Northbridge bank enabled?
535 if (state->mcg_ctl.nbe != 1) {
536 PrintDebug(MSG_PRE "Northbridge register bank disabled. Ignoring Northbridge MCE.\n");
540 state->mci_regs[4].stat.value = stat.value;
541 state->mci_regs[4].addr.value = addr.value;
543 state->mcg_stat.value = 0;
544 state->mcg_stat.ripv = 1;
545 state->mcg_stat.mcip = 1;
547 PrintDebug(MSG_PRE "Injecting NB MCE on core %u.\n", 0);
550 ret = v3_raise_exception(&(vm->cores[0]), MCE_INTERRUPT);
553 PrintError(MSG_PRE "Failed to raise MCE.\n");
560 device_register("MCHECK", mcheck_init);