9 #include <linux/module.h>
10 #include <linux/kernel.h>
11 #include <linux/slab.h>
13 #include <linux/sched.h>
15 #include <palacios/vmm.h>
16 #include <palacios/vm_guest.h>
17 #include <interfaces/vmm_host_hypercall.h>
22 #define SHALLOW_DEBUG 0
25 #define DEEP_DEBUG_PRINT(fmt, args...) printk((fmt), ##args)
27 #define DEEP_DEBUG_PRINT(fmt, args...)
31 #define SHALLOW_DEBUG_PRINT(fmt, args...) printk((fmt), ##args)
33 #define SHALLOW_DEBUG_PRINT(fmt, args...)
37 #define ERROR(fmt, args...) printk((fmt), ##args)
38 #define INFO(fmt, args...) printk((fmt), ##args)
40 #define RENDEZVOUS_TABLE_MAX 32
41 #define EXEC_NAME_MAX 128
43 struct rendezvous_table_row {
50 char exec[EXEC_NAME_MAX];
52 struct guest_info *core;
53 struct guest_accessors *acc;
55 wait_queue_head_t send_wait_queue;
62 wait_queue_head_t recv_wait_queue;
68 uint64_t recv_stat_vaddr;
72 static struct rendezvous_table_row *rtab;
75 static int mpi_init_hcall(struct guest_info *core,
76 struct guest_accessors *acc,
81 struct rendezvous_table_row *r;
84 SHALLOW_DEBUG_PRINT("mpi: mpi_init_hcall(%p,%p)\n",(void*)argc,(void*)argv);
87 ERROR("mpi: no rtab!\n");
91 for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
92 if (rtab[i].state==FREE) {
97 if (i==RENDEZVOUS_TABLE_MAX) {
98 ERROR("mpi: no room in rtab\n");
106 r->cr3=acc->get_cr3(core);
110 // The following hideously assumes that FIX FIX FIX
111 // the guest app is 32 bit! FIX FIX FIX
112 // THIS IS COMMON ASSUMPTION THROUGHOUT FIX FIX FIX
113 if (acc->read_gva(core,(uint64_t)argv,4,&va)<0) {
114 ERROR("mpi: init cannot copy argv (first deref)\n");
119 if (acc->read_gva(core,(uint64_t)va,4,&va)<0) {
120 ERROR("mpi: init cannot copy argv (second deref)\n");
123 // now we have **argv, and we want the array it points to
124 if (acc->read_gva(core,(uint64_t)va,EXEC_NAME_MAX,r->exec)<0) {
125 ERROR("mpi: init cannot copy exec name (third deref)\n");
129 r->exec[EXEC_NAME_MAX-1]=0;
133 init_waitqueue_head(&(r->send_wait_queue));
134 init_waitqueue_head(&(r->recv_wait_queue));
138 DEEP_DEBUG_PRINT("mpi: inited entry %d to '%s' core=%p cr3=%p\n",
139 i,r->exec,r->core,(void*)(r->cr3));
144 static int mpi_deinit_hcall(struct guest_info *core,
145 struct guest_accessors *acc)
150 SHALLOW_DEBUG_PRINT("mpi: mpi_deinit_hcall()\n");
152 cr3=acc->get_cr3(core);
154 for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
155 if (rtab[i].state!=FREE &&
156 rtab[i].core==core &&
162 if (i==RENDEZVOUS_TABLE_MAX) {
163 ERROR("mpi: could not find matching row in rtab to delete\n");
167 if (rtab[i].send_pending) {
168 ERROR("mpi: warning: deleting matching row with send pending\n");
171 if (rtab[i].recv_pending) {
172 ERROR("mpi: warning: deleting matching row with recv pending\n");
175 DEEP_DEBUG_PRINT("mpi: removing row for core %p, cr3 %p, exec '%s'\n",
176 core, (void*)cr3, rtab[i].exec);
179 memset(&(rtab[i]),0,sizeof(struct rendezvous_table_row));
184 static int mpi_comm_rank_hcall(struct guest_info *core,
185 struct guest_accessors *acc,
192 SHALLOW_DEBUG_PRINT("mpi_comm_rank_hcall(%p,%p)\n",(void*)comm_va,(void*)rank_va);
194 cr3=acc->get_cr3(core);
196 for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
197 if (rtab[i].state==INITED &&
198 rtab[i].core==core &&
204 if (i==RENDEZVOUS_TABLE_MAX) {
205 ERROR("mpi: no matching row found\n");
210 // The following completely ignores the communicator
211 // Throughout we assume everyone is in MPI_COMM_WORLD
215 if (acc->read_gva(core,(uint64_t)rank_va,4,&(rtab[i].rank))<0) {
216 ERROR("mpi: rank cannot copy rank\n");
220 rtab[i].state=RANKED;
222 SHALLOW_DEBUG_PRINT("mpi: ranking rcore %p, cr3 %p, exec '%s' as %llu\n",
223 core, (void*)cr3, rtab[i].exec, rtab[i].rank);
228 #define PAGE_ADDR(x) ((x)&~((uint64_t)0xfff))
229 #define PAGE_NEXT_ADDR(x) (PAGE_ADDR(x)+0x1000)
233 static uint64_t fast_inter_vm_copy(struct guest_info *dest_core,
234 struct guest_accessors *dest_acc,
236 struct guest_info *src_core,
237 struct guest_accessors *src_acc,
242 uint64_t left, chunk;
243 uint64_t src_page_left, dest_page_left;
244 uint64_t src_host_va, dest_host_va;
249 src_page_left = PAGE_NEXT_ADDR(src_va) - src_va;
250 dest_page_left = PAGE_NEXT_ADDR(dest_va) - dest_va;
252 chunk = src_page_left < dest_page_left ? src_page_left : dest_page_left;
253 chunk = chunk < left ? chunk : left;
255 DEEP_DEBUG_PRINT("mpi: copy chunk=%d, src_va=%p, dest_va=%p\n",
256 chunk, src_va, dest_va);
258 if (src_acc->gva_to_hva(src_core,src_va,&src_host_va)<0) {
259 ERROR("mpi: cannot translate src address %p in VM core %p\n",src_va,src_core);
262 if (dest_acc->gva_to_hva(dest_core,dest_va,&dest_host_va)<0) {
263 ERROR("mpi: cannot translate dest address %p in VM core %p\n",dest_va,dest_core);
267 DEEP_DEBUG_PRINT("mpi: copy chunk=%d, src_host_va=%p, dest_host_va=%p\n",
268 chunk, src_host_va, dest_host_va);
270 memcpy((void*)dest_host_va,(void*)src_host_va,chunk);
283 static int mpi_send_hcall(struct guest_info *core,
284 struct guest_accessors *acc,
294 struct rendezvous_table_row *sender, *receiver;
296 SHALLOW_DEBUG_PRINT("mpi: mpi_send_hcall(%p,%p,%p,%p,%p,%p)\n",(void*)buf,(void*)n,(void*)dtype,(void*)dest,(void*)tag,(void*)comm);
298 cr3=acc->get_cr3(core);
301 for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
302 if (rtab[i].state==RANKED &&
303 rtab[i].core==core &&
309 if (i==RENDEZVOUS_TABLE_MAX) {
310 ERROR("mpi: existential panic in send\n");
316 // Next try to find a matching receive
318 for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
319 if (&(rtab[i])!=sender &&
320 rtab[i].state==RANKED &&
321 strncmp(rtab[i].exec,sender->exec,EXEC_NAME_MAX)==0) {
326 if (i==RENDEZVOUS_TABLE_MAX) {
327 DEEP_DEBUG_PRINT("mpi: receiver does not exist yet - pending ourselves\n");
331 if (!(receiver->recv_pending)) {
332 DEEP_DEBUG_PRINT("mpi: receiver has no pending receive - pending ourselves\n");
335 // totally ignores communicator!!! FIX FIX FIX
336 // simplistic fully qualified matching FIX FIX FIX
337 if (receiver->recv_tag==tag &&
338 receiver->recv_src==sender->rank) {
340 // totally ignores types and assumes byte xfer FIX FIX FIX
341 uint64_t size = n < receiver->recv_size ? n : receiver->recv_size;
343 SHALLOW_DEBUG_PRINT("mpi: mpi_send: copying %llu bytes\n", size);
345 if (fast_inter_vm_copy(receiver->core,
347 receiver->recv_vaddr,
352 ERROR("mpi: fast_inter_vm_copy failed in mpi_send: destvm=%p, destacc=%p, dest_va=%p, srcvm=%p, srcacc=%p, src_va=%p, size=%llu\n",receiver->core,receiver->acc,receiver->recv_vaddr,core,acc,buf,size);
357 SHALLOW_DEBUG_PRINT("mpi: mpi_send: finished copying\n");
360 // Now we release the receiver
361 receiver->recv_rc = 0;
362 receiver->recv_pending = 0;
364 wake_up_interruptible(&(receiver->recv_wait_queue));
366 // And we are also done
371 DEEP_DEBUG_PRINT("mpi: receiver's pending receive does not match - pending ourselves\n");
380 // we store our state
381 sender->send_vaddr=buf;
383 sender->send_dest=dest;
384 sender->send_tag=tag;
387 // And now we wait for the receive to do the job
388 sender->send_pending=1;
389 while (wait_event_interruptible(sender->send_wait_queue,
390 !(sender->send_pending)) !=0) {
396 return sender->send_rc;
399 static int mpi_recv_hcall(struct guest_info *core,
400 struct guest_accessors *acc,
411 struct rendezvous_table_row *sender, *receiver;
413 SHALLOW_DEBUG_PRINT("mpi_recv_hcall(%p,%p,%p,%p,%p,%p,%p)\n",(void*)buf,(void*)n,(void*)dtype,(void*)src,(void*)tag,(void*)comm,(void*)stat);
415 cr3=acc->get_cr3(core);
418 for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
419 if (rtab[i].state==RANKED &&
420 rtab[i].core==core &&
426 if (i==RENDEZVOUS_TABLE_MAX) {
427 ERROR("mpi: existential panic in receive\n");
433 // Next try to find a matching send
435 for (i=0;i<RENDEZVOUS_TABLE_MAX;i++) {
436 if (&(rtab[i])!=receiver &&
437 rtab[i].state==RANKED &&
438 strncmp(rtab[i].exec,receiver->exec,EXEC_NAME_MAX)==0) {
443 if (i==RENDEZVOUS_TABLE_MAX) {
444 DEEP_DEBUG_PRINT("mpi: sender does not exist yet - pending ourselves\n");
448 if (!(sender->send_pending)) {
449 DEEP_DEBUG_PRINT("mpi: sender has no pending receive - pending ourselves\n");
452 // totally ignores communicator!!! FIX FIX FIX
453 // simplistic fully qualified matching FIX FIX FIX
454 if (sender->send_tag==tag &&
455 sender->send_dest==receiver->rank) {
457 uint64_t size = n < sender->send_size ? n : sender->send_size;
459 SHALLOW_DEBUG_PRINT("mpi: mpi_recv: copying %llu bytes\n", size);
461 if (fast_inter_vm_copy(core,
468 ERROR("mpi: fast_inter_vm_copy failed in mpi_recv: destvm=%p, destacc=%p, dest_va=%p, srcvm=%p, srcacc=%p, src_va=%p, size=%llu\n",core,acc,buf,sender->core,sender->acc,sender->send_vaddr,size);
472 SHALLOW_DEBUG_PRINT("mpi: mpi_recv: finished copying\n");
474 // Now we release the sender
476 sender->send_pending = 0;
478 wake_up_interruptible(&(sender->send_wait_queue));
480 // And we are also done
485 DEEP_DEBUG_PRINT("mpi: sender's pending send does not match - pending ourselves\n");
494 // we store our state
495 receiver->recv_vaddr=buf;
496 receiver->recv_size=n;
497 receiver->recv_src=src;
498 receiver->recv_tag=tag;
499 receiver->recv_rc=-1;
501 // And now we wait for the send to do the job
502 receiver->recv_pending=1;
503 while (wait_event_interruptible(receiver->recv_wait_queue,
504 !(receiver->recv_pending)) !=0) {
510 return receiver->recv_rc;
514 static void get_args_64(palacios_core_t core,
515 struct guest_accessors *acc,
525 *a1 = acc->get_rcx(core);
526 *a2 = acc->get_rdx(core);
527 *a3 = acc->get_rsi(core);
528 *a4 = acc->get_rdi(core);
529 *a5 = acc->get_r8(core);
530 *a6 = acc->get_r9(core);
531 *a7 = acc->get_r10(core);
532 *a8 = acc->get_r11(core);
535 static void get_args_32(palacios_core_t core,
536 struct guest_accessors *acc,
550 rsp = acc->get_rsp(core);
552 acc->read_gva(core,rsp,4,&temp); *a1=temp;
553 acc->read_gva(core,rsp+4,4,&temp); *a2=temp;
554 acc->read_gva(core,rsp+8,4,&temp); *a3=temp;
555 acc->read_gva(core,rsp+12,4,&temp); *a4=temp;
556 acc->read_gva(core,rsp+16,4,&temp); *a5=temp;
557 acc->read_gva(core,rsp+20,4,&temp); *a6=temp;
558 acc->read_gva(core,rsp+24,4,&temp); *a7=temp;
559 acc->read_gva(core,rsp+28,4,&temp); *a8=temp;
563 static void get_args(palacios_core_t core,
564 struct guest_accessors *acc,
577 rbx=acc->get_rbx(core);
582 DEEP_DEBUG_PRINT("64 bit hcall\n");
583 return get_args_64(core,acc,a1,a2,a3,a4,a5,a6,a7,a8);
586 DEEP_DEBUG_PRINT("32 bit hcall\n");
587 return get_args_32(core,acc,a1,a2,a3,a4,a5,a6,a7,a8);
590 ERROR("UNKNOWN hcall calling convention\n");
595 static void put_return(palacios_core_t core,
596 struct guest_accessors *acc,
599 acc->set_rax(core,rc);
603 int mpi_hypercall(palacios_core_t *core,
605 struct guest_accessors *acc,
608 uint64_t a1,a2,a3,a4,a5,a6,a7,a8;
611 DEEP_DEBUG_PRINT("palacios: mpi_hypercall(%p,0x%x,%p,%p)\n",
614 get_args(core,acc,&a1,&a2,&a3,&a4,&a5,&a6,&a7,&a8);
616 DEEP_DEBUG_PRINT("palacios: arguments: %p, %p, %p, %p, %p, %p, %p, %p\n",
617 a1,a2,a3,a4,a5,a6,a7,a8);
621 rc = mpi_init_hcall(core,acc,(int*)a1,(char ***)a2);
624 rc = mpi_deinit_hcall(core,acc);
627 rc = mpi_comm_rank_hcall(core,acc,(void*)a1,(int*)a2);
630 rc = mpi_send_hcall(core,acc,(void*)a1,(int)a2,(int)a3,(int)a4,(int)a5,(int)a6);
633 rc = mpi_recv_hcall(core,acc,(void*)a1,(int)a2,(int)a3,(int)a4,(int)a5,(int)a6,(void*)a7);
636 ERROR("palacios: mpi: unknown hcall number\n");
640 put_return(core,acc,rc);
648 EXPORT_SYMBOL(mpi_hypercall);
651 int init_module(void)
654 rtab = kmalloc(sizeof(struct rendezvous_table_row)*RENDEZVOUS_TABLE_MAX,GFP_KERNEL);
656 ERROR("mpi: could not allocate memory\n");
659 memset(rtab,0,sizeof(struct rendezvous_table_row)*RENDEZVOUS_TABLE_MAX);
660 INFO("mpi: inited\n");
667 void cleanup_module(void)
674 INFO("mpi: deinited\n");