From: Peter Dinda <pdinda@northwestern.edu>
Date: Sun, 26 Sep 2010 20:51:48 +0000 (-0500)
Subject: Partially functional support for APIC/ICC clustered delivery,
X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=941622f53f723161738e9f9889f5360af1bc9c40;p=palacios.git

Partially functional support for APIC/ICC clustered delivery,
both physical and logical, needed for Linux with >2 cores.

Note that there is still a bug here somewhere - Linux >2 cores will
eventually die with an attempted physical delivery outside of the
available cores
---

diff --git a/palacios/include/devices/icc_bus.h b/palacios/include/devices/icc_bus.h
index c3ec43a..4084340 100644
--- a/palacios/include/devices/icc_bus.h
+++ b/palacios/include/devices/icc_bus.h
@@ -24,6 +24,7 @@
 struct v3_icc_ops {
     int (*raise_intr)(struct guest_info * core, int intr_num, void * private_data);
     int (*should_deliver_flat)(struct guest_info * core, uint8_t mda, void * private_data);
+    int (*should_deliver_cluster)(struct guest_info * core, uint8_t mda, void * private_data);
 };
 
 
diff --git a/palacios/src/devices/apic.c b/palacios/src/devices/apic.c
index 3064490..a0d17df 100644
--- a/palacios/src/devices/apic.c
+++ b/palacios/src/devices/apic.c
@@ -1108,7 +1108,21 @@ static int apic_should_deliver_flat(struct guest_info * core, uint8_t mda, void
 {
   struct apic_state * apic = (struct apic_state *)private_data;
 
-  if (mda==0xff || (apic->log_dst.dst_log_id & mda)) { 
+  if (mda==0xff ||                         // broadcast or
+      (apic->log_dst.dst_log_id & mda)) {  // I am in the set 
+      return 1;
+  } else {
+      return 0;
+  }
+}
+
+static int apic_should_deliver_cluster(struct guest_info * core, uint8_t mda, void * private_data)
+{
+  struct apic_state * apic = (struct apic_state *)private_data;
+
+  if (mda==0xff ||                                                 // broadcast or
+      ( ((mda & 0xf0) == (apic->log_dst.dst_log_id & 0xf0)) &&     // (I am in the cluster and
+        ((mda & 0x0f)  & (apic->log_dst.dst_log_id & 0x0f)) ) ) {  //  I am in the set)
       return 1;
   } else {
       return 0;
@@ -1118,6 +1132,7 @@ static int apic_should_deliver_flat(struct guest_info * core, uint8_t mda, void
 static struct v3_icc_ops icc_ops = {
     .raise_intr = apic_raise_intr,
     .should_deliver_flat = apic_should_deliver_flat,
+    .should_deliver_cluster = apic_should_deliver_cluster,
 };
 
 
diff --git a/palacios/src/devices/icc_bus.c b/palacios/src/devices/icc_bus.c
index 99eed8f..3606be1 100644
--- a/palacios/src/devices/icc_bus.c
+++ b/palacios/src/devices/icc_bus.c
@@ -253,56 +253,107 @@ int v3_icc_send_ipi(struct vm_device * icc_bus, uint32_t src_apic, uint64_t icr_
 		
 		if (dfr->model==0xf) { 
 		    // flat model
-		    // deliver irq if
+		    // this means we deliver the IPI each destination APIC where
 		    // mda of sender & ldr of receiver is nonzero
-		    // mda=0xff means broadcaset to all
-		    
+		    // mda=0xff means broadcast to all
+		    //
 		    int i;
 		    for (i=0;i<MAX_APICS;i++) { 
 			struct apic_data *dest_apic=&(state->apics[i]);
 			if (dest_apic->present &&
 			    dest_apic->ops->should_deliver_flat(dest_apic->core,
 								mda,
-								dest_apic->priv_data)) { 
+								dest_apic->priv_data)) {
 			    if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
 				return -1;
 			    }
 			}
 		    }
-		} else {
+		} else if (dfr->model==0x0) {
 		    // cluster model
-		    PrintError("icc_bus: use of cluster model not yet supported\n");
+		    //
+		    // there are two variants of this
+		    //
+		    // 1. (ancient P5/P6) All apics are on one bus
+		    //    mda[31:28] is the target cluster, 
+		    //    mda[27:24] has one bit for each apic in the cluster
+		    //    mda[31:28] of sending apic == ldr[31:28] of dest apic means
+		    //      the dest apic is part of the cluster
+		    //      then mda[27:24] & ldr[27:24] nonzero means to deliver
+		    //    also, mda=0xff still means broadcast 
+		    //    So, basically, you have 15 clusters of 4 apics each + broadcast
+		    //
+		    // 2. (current) hierarchical cluster model
+		    //    This is some hwat unclearly documented in volume 3, 9-32
+		    //    basically, you have a hierarchy of clusters that where
+		    //    each cluster has 4 agents (APICs?) and a cluster manager.
+		    //    The cluster manager is not an apic, though, and outside of
+		    //    scope of documents.  Again, you have 15 clusters of 4 apics
+		    //    each + broadcast.   My impression is that this is identical 
+		    //    to variant 1 for our purposes. 
+		    //
+		    //
+		    // if we are in lowest priorty mode, we should just pick one
+		    // according to the arbitrarion prioty register
+		    int i;
+		    for (i=0;i<MAX_APICS;i++) { 
+			struct apic_data *dest_apic=&(state->apics[i]);
+			if (dest_apic->present &&
+			    dest_apic->ops->should_deliver_cluster(dest_apic->core,
+								   mda,
+								   dest_apic->priv_data)) {
+			    if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
+				return -1;
+			    }
+			}
+		    }
+		} else {
+		    PrintError("icc_bus: unknown logical delivery model 0x%x\n", dfr->model);
 		    return -1;
 		}
 	    }
-		
+	    
 	    break;
-
+	    
 	case 1:  // self
-	    if (icr->dst==state->ioapic_id) { 
-		PrintError("icc_bus: ioapic attempting to send to itself\n");
-		return -1;
-	    }
-	    struct apic_data *dest_apic=&(state->apics[src_apic]);
-	    if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
+	    if (icr->dst_mode==0) { 
+		// physical delivery
+		if (icr->dst==state->ioapic_id) { 
+		    PrintError("icc_bus: ioapic attempting to send to itself\n");
+		    return -1;
+		}
+		struct apic_data *dest_apic=&(state->apics[src_apic]);
+		if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
+		    return -1;
+		}
+	    } else {
+		// logical delivery
+		PrintError("icc_bus: use of logical delivery in self is not yet supported.\n");
 		return -1;
 	    }
 	    break;
-
+	    
 	case 2: 
-	case 3: { // all and all-but-me
-	    int i;
-	    for (i=0;i<MAX_APICS;i++) { 
-		struct apic_data *dest_apic=&(state->apics[i]);
-		if (dest_apic->present && (i!=src_apic || icr->dst_shorthand==2)) { 
-		    if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
-			return -1;
+	case 3:  // all and all-but-me
+	    if (icr->dst_mode==0) { 
+		// physical
+		int i;
+		for (i=0;i<MAX_APICS;i++) { 
+		    struct apic_data *dest_apic=&(state->apics[i]);
+		    if (dest_apic->present && (i!=src_apic || icr->dst_shorthand==2)) { 
+			if (deliver(src_apic,dest_apic,icr,state,extirq)) { 
+			    return -1;
+			}
 		    }
 		}
+	    } else {
+		// logical delivery
+		PrintError("icc_bus: use of logical delivery in %s is not yet supported\n",
+			   icr->dst_shorthand==2 ? "all" : "all-but-me" );
+		return -1;
 	    }
-	}
 	    break;
-	    }
+    }
 
     return 0;
 }
@@ -350,7 +401,6 @@ int v3_icc_register_ioapic(struct v3_vm_info *vm, struct vm_device * icc_bus, ui
 }
 
 
-
 static int icc_bus_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
     PrintDebug("icc_bus: Creating ICC_BUS\n");
 
diff --git a/palacios/src/palacios/vmm_mem.c b/palacios/src/palacios/vmm_mem.c
index 2b97989..89fd64f 100644
--- a/palacios/src/palacios/vmm_mem.c
+++ b/palacios/src/palacios/vmm_mem.c
@@ -325,43 +325,6 @@ struct v3_mem_region * v3_get_next_mem_region( struct v3_vm_info * vm, uint16_t
 }
 
 
-/* Search the "hooked" memory regions for a region that ends after the given address.  If the
- * address is invalid, return NULL. Else, return the first region found or the base region if no
- * region ends after the given address.
- */
-struct v3_mem_region * v3_get_next_mem_region( struct v3_vm_info * vm, uint16_t core_id, addr_t guest_addr) {
-    struct rb_node * n = vm->mem_map.mem_regions.rb_node;
-    struct v3_mem_region * reg = NULL;
-
-    // Keep going to the right in the tree while the address is greater than the current region's
-    // end address.
-    while (n) {
-        reg = rb_entry(n, struct v3_mem_region, tree_node);
-        if (guest_addr >= reg->guest_end) { // reg is [start,end)
-            n = n->rb_right;
-        } else {
-	    // PAD this may be buggy since there is no guarantees that 
-	    // the cores are in order
-	    if ((core_id == reg->core_id) || (reg->core_id == V3_MEM_CORE_ANY)) {
-		return reg;
-	    } else {
-		n = n->rb_right;
-	    }
-        }
-    }
-    
-    // There is no registered region, so we check if it's a valid address in the base region
-    
-    if (guest_addr >= vm->mem_map.base_region.guest_end) {
-	PrintError("%s: Guest Address Exceeds Base Memory Size (ga=%p), (limit=%p)\n",
-		   __FUNCTION__, (void *)guest_addr, (void *)vm->mem_map.base_region.guest_end);
-        v3_print_mem_map(vm);
-        return NULL;
-    }
-    
-    return &(vm->mem_map.base_region);
-}
-
 
 
 void v3_delete_mem_region(struct v3_vm_info * vm, struct v3_mem_region * reg) {