Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Merge branch 'devel' of ssh://palacios@newskysaw.cs.northwestern.edu//home/palacios...
Peter Dinda [Tue, 5 Oct 2010 22:30:53 +0000 (17:30 -0500)]
palacios/include/devices/apic.h
palacios/include/devices/icc_bus.h
palacios/include/palacios/vm_guest.h
palacios/include/palacios/vmm_types.h
palacios/src/devices/apic.c
palacios/src/devices/icc_bus.c
palacios/src/devices/io_apic.c
palacios/src/palacios/svm.c
palacios/src/palacios/vmm_config.c
palacios/src/palacios/vmm_mem.c

index f78e2a6..e4452e7 100644 (file)
@@ -25,6 +25,7 @@
 #include <palacios/vmm_dev_mgr.h>
 
 
+
 int v3_apic_raise_intr(struct guest_info * info, struct vm_device * apic_dev, int intr_num);
 
 
index da5f39f..4084340 100644 (file)
@@ -23,6 +23,8 @@
 
 struct v3_icc_ops {
     int (*raise_intr)(struct guest_info * core, int intr_num, void * private_data);
+    int (*should_deliver_flat)(struct guest_info * core, uint8_t mda, void * private_data);
+    int (*should_deliver_cluster)(struct guest_info * core, uint8_t mda, void * private_data);
 };
 
 
@@ -39,9 +41,10 @@ int v3_icc_register_ioapic(struct v3_vm_info *vm, struct vm_device * icc_bus, ui
  * @param apic_src - The source APIC id.
  * @param apic_num - The remote APIC number.
  * @param icr      - A copy of the APIC's ICR.  (LAPIC-style ICR, clone from redir table for ioapics)
+ * @param dfr      - A copy of the APIC's DFR   (LAPIC-style DFR)
  & @param extirq   - irq for external interrupts (e.g., from 8259)
  */
-int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t apic_src, uint64_t icr, uint32_t ext_irq);
+int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t apic_src, uint64_t icr, uint32_t dfr, uint32_t ext_irq);
 
 
 #if 0
index 3635f8c..322f390 100644 (file)
@@ -130,6 +130,8 @@ struct v3_vm_info {
     uint32_t mem_align;
     struct v3_mem_map mem_map;
 
+    v3_paging_size_t paging_size; // for nested paging
+
     struct v3_mem_hooks mem_hooks;
 
     struct v3_shdw_impl_state shdw_impl;
index 0c95d0d..fc4fd5f 100644 (file)
@@ -29,6 +29,7 @@
 typedef enum {SHADOW_PAGING, NESTED_PAGING} v3_paging_mode_t;
 typedef enum {VM_RUNNING, VM_STOPPED, VM_SUSPENDED, VM_ERROR, VM_EMULATING} v3_vm_operating_mode_t;
 
+typedef enum {PAGING_4KB, PAGING_2MB} v3_paging_size_t;
 
 typedef enum {INIT, SIPI, REAL, /*UNREAL,*/ PROTECTED, PROTECTED_PAE, LONG, LONG_32_COMPAT, LONG_16_COMPAT} v3_cpu_mode_t;
 typedef enum {PHYSICAL_MEM, VIRTUAL_MEM} v3_mem_mode_t;
index 2ec7a41..763b538 100644 (file)
@@ -118,6 +118,8 @@ typedef enum { APIC_TMR_INT, APIC_THERM_INT, APIC_PERF_INT,
 
 
 
+
+
 struct apic_msr {
     union {
        uint64_t value;
@@ -133,8 +135,6 @@ struct apic_msr {
 } __attribute__((packed));
 
 
-
-
 struct apic_state {
     addr_t base_addr;
 
@@ -188,6 +188,10 @@ struct apic_state {
     v3_lock_t  lock;
 };
 
+
+
+
+
 static int apic_read(struct guest_info * core, addr_t guest_addr, void * dst, uint_t length, void * priv_data);
 static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, uint_t length, void * priv_data);
 
@@ -898,10 +902,11 @@ static int apic_write(struct guest_info * core, addr_t guest_addr, void * src, u
 
            // ICC???
            PrintDebug("apic %u: core %u: sending cmd 0x%llx to apic %u\n", 
-                      apic->clapic_id.val, core->cpu_id,
+                      apic->lapic_id.val, core->cpu_id,
                       apic->int_cmd.val, apic->int_cmd.dst);
-
-           v3_icc_send_ipi(apic->icc_bus, apic->lapic_id.val, apic->int_cmd.val, 0);
+           if (v3_icc_send_ipi(apic->icc_bus, apic->lapic_id.val, apic->int_cmd.val,apic->dst_fmt.val,0)==-1) { 
+               return -1;
+           }
            break;
 
        case INT_CMD_HI_OFFSET:
@@ -1120,8 +1125,35 @@ static struct v3_device_ops dev_ops = {
 
 
 
+static int apic_should_deliver_flat(struct guest_info * core, uint8_t mda, void * private_data)
+{
+  struct apic_state * apic = (struct apic_state *)private_data;
+
+  if (mda==0xff ||                         // broadcast or
+      (apic->log_dst.dst_log_id & mda)) {  // I am in the set 
+      return 1;
+  } else {
+      return 0;
+  }
+}
+
+static int apic_should_deliver_cluster(struct guest_info * core, uint8_t mda, void * private_data)
+{
+  struct apic_state * apic = (struct apic_state *)private_data;
+
+  if (mda==0xff ||                                                 // broadcast or
+      ( ((mda & 0xf0) == (apic->log_dst.dst_log_id & 0xf0)) &&     // (I am in the cluster and
+        ((mda & 0x0f)  & (apic->log_dst.dst_log_id & 0x0f)) ) ) {  //  I am in the set)
+      return 1;
+  } else {
+      return 0;
+  }
+}
+
 static struct v3_icc_ops icc_ops = {
     .raise_intr = apic_raise_intr,
+    .should_deliver_flat = apic_should_deliver_flat,
+    .should_deliver_cluster = apic_should_deliver_cluster,
 };
 
 
index 6c5e9e7..c78190b 100644 (file)
@@ -23,7 +23,6 @@
 #include <devices/icc_bus.h>
 #include <devices/apic_regs.h>
 
-
 #define MAX_APICS 256
 
 #ifndef CONFIG_DEBUG_ICC_BUS
@@ -202,11 +201,13 @@ static int deliver(uint32_t src_apic, struct apic_data *dest_apic, struct int_cm
 // icr_data contains interrupt vector *except* for ext_int
 // in which case it is given via irq
 //
-int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t src_apic, uint64_t icr_data, uint32_t extirq) {
+int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t src_apic, uint64_t icr_data, 
+                   uint32_t dfr_data, uint32_t extirq) {
 
     PrintDebug("icc_bus: icc_bus=%p, src_apic=%u, icr_data=%llx, extirq=%u\n",icc_bus,src_apic,icr_data,extirq);
 
     struct int_cmd_reg *icr = (struct int_cmd_reg *)&icr_data;
+    struct dst_fmt_reg *dfr = (struct dst_fmt_reg*)&dfr_data;
     struct icc_bus_state * state = (struct icc_bus_state *)icc_bus->private_data;
 
     // initial sanity checks
@@ -218,47 +219,139 @@ int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t src_apic, uint64_t icr_
        PrintError("icc_bus: Attempted send to unregistered apic id=%u\n",icr->dst);
        return -1;
     }
-    
-    struct apic_data * dest_apic =  &(state->apics[icr->dst]);
 
-    PrintDebug("icc_bus: IPI %s %u from %s %u to %s %u (icr=0x%llx) (extirq=%u)\n",
-              deliverymode_str[icr->del_mode], icr->vec, src_apic==state->ioapic_id ? "ioapic" : "apic",
-              src_apic, shorthand_str[icr->dst_shorthand], icr->dst,icr->val,
+    PrintDebug("icc_bus: IPI %s %u from %s %u to %s %s %u (icr=0x%llx, dfr=0x%x) (extirq=%u)\n",
+              deliverymode_str[icr->del_mode], icr->vec, 
+              src_apic==state->ioapic_id ? "ioapic" : "apic",
+              src_apic,               
+              icr->dst_mode==0 ? "(physical)" : "(logical)", 
+              shorthand_str[icr->dst_shorthand], icr->dst,icr->val, dfr->val,
               extirq);
 
+    /*
+
+    if (icr->dst==state->ioapic_id) { 
+       PrintError("icc_bus: Attempted send to ioapic ignored\n");
+       return -1;
+    }
+    */
 
 
 
     switch (icr->dst_shorthand) {
 
        case 0:  // no shorthand
-           if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
-               return -1;
+           if (icr->dst_mode==0) { 
+               // physical delivery
+               struct apic_data * dest_apic =  &(state->apics[icr->dst]);
+               if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
+                   return -1;
+               }
+           } else {
+               // logical delivery
+               uint8_t mda = icr->dst; // message destination address, not physical address
+               
+               if (dfr->model==0xf) { 
+                   // flat model
+                   // this means we deliver the IPI each destination APIC where
+                   // mda of sender & ldr of receiver is nonzero
+                   // mda=0xff means broadcast to all
+                   //
+                   int i;
+                   for (i=0;i<MAX_APICS;i++) { 
+                       struct apic_data *dest_apic=&(state->apics[i]);
+                       if (dest_apic->present &&
+                           dest_apic->ops->should_deliver_flat(dest_apic->core,
+                                                               mda,
+                                                               dest_apic->priv_data)) {
+                           if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
+                               return -1;
+                           }
+                       }
+                   }
+               } else if (dfr->model==0x0) {
+                   // cluster model
+                   //
+                   // there are two variants of this
+                   //
+                   // 1. (ancient P5/P6) All apics are on one bus
+                   //    mda[31:28] is the target cluster, 
+                   //    mda[27:24] has one bit for each apic in the cluster
+                   //    mda[31:28] of sending apic == ldr[31:28] of dest apic means
+                   //      the dest apic is part of the cluster
+                   //      then mda[27:24] & ldr[27:24] nonzero means to deliver
+                   //    also, mda=0xff still means broadcast 
+                   //    So, basically, you have 15 clusters of 4 apics each + broadcast
+                   //
+                   // 2. (current) hierarchical cluster model
+                   //    This is some hwat unclearly documented in volume 3, 9-32
+                   //    basically, you have a hierarchy of clusters that where
+                   //    each cluster has 4 agents (APICs?) and a cluster manager.
+                   //    The cluster manager is not an apic, though, and outside of
+                   //    scope of documents.  Again, you have 15 clusters of 4 apics
+                   //    each + broadcast.   My impression is that this is identical 
+                   //    to variant 1 for our purposes. 
+                   //
+                   //
+                   // if we are in lowest priorty mode, we should just pick one
+                   // according to the arbitrarion prioty register
+                   int i;
+                   for (i=0;i<MAX_APICS;i++) { 
+                       struct apic_data *dest_apic=&(state->apics[i]);
+                       if (dest_apic->present &&
+                           dest_apic->ops->should_deliver_cluster(dest_apic->core,
+                                                                  mda,
+                                                                  dest_apic->priv_data)) {
+                           if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
+                               return -1;
+                           }
+                       }
+                   }
+               } else {
+                   PrintError("icc_bus: unknown logical delivery model 0x%x\n", dfr->model);
+                   return -1;
+               }
            }
+           
            break;
-
+           
        case 1:  // self
-           if (icr->dst==state->ioapic_id) { 
-               PrintError("icc_bus: ioapic attempting to send to itself\n");
-               return -1;
-           }
-           if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
+           if (icr->dst_mode==0) { 
+               // physical delivery
+               if (icr->dst==state->ioapic_id) { 
+                   PrintError("icc_bus: ioapic attempting to send to itself\n");
+                   return -1;
+               }
+               struct apic_data *dest_apic=&(state->apics[src_apic]);
+               if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
+                   return -1;
+               }
+           } else {
+               // logical delivery
+               PrintError("icc_bus: use of logical delivery in self is not yet supported.\n");
                return -1;
            }
            break;
-
+           
        case 2: 
-       case 3: { // all and all-but-me
-           int i;
-           for (i=0;i<MAX_APICS;i++) { 
-               dest_apic=&(state->apics[i]);
-               if (dest_apic->present && (i!=src_apic || icr->dst_shorthand==2)) { 
-                   if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
-                       return -1;
+       case 3:  // all and all-but-me
+           if (icr->dst_mode==0) { 
+               // physical
+               int i;
+               for (i=0;i<MAX_APICS;i++) { 
+                   struct apic_data *dest_apic=&(state->apics[i]);
+                   if (dest_apic->present && (i!=src_apic || icr->dst_shorthand==2)) { 
+                       if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
+                           return -1;
+                       }
                    }
                }
+           } else {
+               // logical delivery
+               PrintError("icc_bus: use of logical delivery in %s is not yet supported\n",
+                          icr->dst_shorthand==2 ? "all" : "all-but-me" );
+               return -1;
            }
-       }
            break;
     }
 
@@ -308,7 +401,6 @@ int v3_icc_register_ioapic(struct v3_vm_info *vm, struct vm_device * icc_bus, ui
 }
 
 
-
 static int icc_bus_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
     PrintDebug("icc_bus: Creating ICC_BUS\n");
 
index e6a9f3a..00587ef 100644 (file)
@@ -291,9 +291,12 @@ static int ioapic_raise_irq(struct v3_vm_info * vm, void * private_data, int irq
        icr.rem_rd_status=0;
        icr.dst_shorthand=0; // no shorthand
        icr.rsvd2=0;
+
+       // Note: 0 yhere is "cluster model", but it should be irrelevant
+       // since we are sending this as a physical destination
        PrintDebug("io apic %u: raising irq %u on ICC bus.\n",
                   ioapic->ioapic_id.id, irq);
-       v3_icc_send_ipi(ioapic->icc_bus, ioapic->ioapic_id.id,icr.val, irq);
+       v3_icc_send_ipi(ioapic->icc_bus, ioapic->ioapic_id.id,icr.val, 0, irq);
     }
 
     return 0;
index 34fed45..7b33c8c 100644 (file)
 #include <palacios/vmm_sprintf.h>
 
 
+#ifndef CONFIG_DEBUG_SVM
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+
 uint32_t v3_last_exit;
 
 // This is a global pointer to the host's VMCB
index e65563c..c2c8c57 100644 (file)
@@ -239,7 +239,6 @@ static int pre_config_vm(struct v3_vm_info * vm, v3_cfg_tree_t * vm_cfg) {
        return -1;
     }
 
-
 #ifdef CONFIG_TELEMETRY
     {
        char * telemetry = v3_cfg_val(vm_cfg, "telemetry");
@@ -278,7 +277,8 @@ static int determine_paging_mode(struct guest_info *info, v3_cfg_tree_t * core_c
 
     v3_cfg_tree_t *vm_tree = info->vm_info->cfg_data->cfg;
     v3_cfg_tree_t *pg_tree = v3_cfg_subtree(vm_tree, "paging");
-    char *pg_mode = v3_cfg_val(pg_tree, "mode");
+    char *pg_mode          = v3_cfg_val(pg_tree, "mode");
+    char *page_size        = v3_cfg_val(pg_tree, "page_size");
     
     PrintDebug("Paging mode specified as %s\n", pg_mode);
 
@@ -297,13 +297,22 @@ static int determine_paging_mode(struct guest_info *info, v3_cfg_tree_t * core_c
            info->shdw_pg_mode = SHADOW_PAGING;
        }
     } else {
-       PrintDebug("No paging mode specified in configuration.\n");
+       PrintDebug("No paging type specified in configuration. Defaulting to shadow paging\n");
        info->shdw_pg_mode = SHADOW_PAGING;
     }
 
 
     if (info->shdw_pg_mode == NESTED_PAGING) {
        PrintDebug("Guest Paging Mode: NESTED_PAGING\n");
+       if (strcasecmp(page_size, "4kb") == 0) { /* TODO: this may not be an ideal place for this */
+           info->vm_info->paging_size = PAGING_4KB;
+       } else if (strcasecmp(page_size, "2mb") == 0) {
+           info->vm_info->paging_size = PAGING_2MB;
+       } else {
+           PrintError("Invalid VM paging size: '%s'\n", page_size);
+           return -1;
+       }
+       PrintDebug("VM page size=%s\n", page_size);
     } else if (info->shdw_pg_mode == SHADOW_PAGING) {
         PrintDebug("Guest Paging Mode: SHADOW_PAGING\n");
     } else {
index a795f58..7994cc4 100644 (file)
@@ -61,6 +61,7 @@ int v3_init_mem_map(struct v3_vm_info * vm) {
     // There is an underlying region that contains all of the guest memory
     // PrintDebug("Mapping %d pages of memory (%u bytes)\n", (int)mem_pages, (uint_t)info->mem_size);
 
+    // 2MB page alignment needed for 2MB hardware nested paging
     map->base_region.guest_start = 0;
     map->base_region.guest_end = mem_pages * PAGE_SIZE_4KB;