linux_module/palacios-stubs.c

   1 #include <linux/kernel.h>
   2 #include <linux/kthread.h>
   3 #include <linux/spinlock.h>
   4 #include <linux/gfp.h>
   5 #include <linux/interrupt.h>
   6 #include <linux/linkage.h>
   7 #include <linux/sched.h>
   8 #include <linux/uaccess.h>
   9 #include <asm/irq_vectors.h>
  10 #include <asm/io.h>
  11
  12 #include <linux/init.h>
  13 #include <linux/module.h>
  14 #include <linux/kthread.h>
  15 #include <asm/uaccess.h>
  16 #include <linux/smp.h>
  17
  18 #include <palacios/vmm.h>
  19 #include <palacios/vmm_host_events.h>
  20 #include "palacios.h"
  21
  22
  23
  24
  25 #include "mm.h"
  26
  27 // The following can be used to track heap bugs
  28 // zero memory after allocation
  29 #define ALLOC_ZERO_MEM 0
  30 // pad allocations by this many bytes on both ends of block
  31 #define ALLOC_PAD      0
  32
  33
  34 u32 pg_allocs = 0;
  35 u32 pg_frees = 0;
  36 u32 mallocs = 0;
  37 u32 frees = 0;
  38
  39
  40 static struct v3_vm_info * irq_to_guest_map[256];
  41
  42
  43 extern unsigned int cpu_khz;
  44
  45 extern int cpu_list[NR_CPUS];
  46 extern int cpu_list_len;
  47
  48
  49 static char *print_buffer[NR_CPUS];
  50
  51 static void deinit_print_buffers(void)
  52 {
  53     int i;
  54
  55     for (i=0;i<NR_CPUS;i++) {
  56         if (print_buffer[i]) {
  57             palacios_free(print_buffer[i]);
  58             print_buffer[i]=0;
  59         }
  60     }
  61 }
  62
  63 static int init_print_buffers(void)
  64 {
  65     int i;
  66
  67     memset(print_buffer,0,sizeof(char*)*NR_CPUS);
  68
  69 #if !V3_PRINTK_OLD_STYLE_OUTPUT
  70
  71     for (i=0;i<NR_CPUS;i++) {
  72         print_buffer[i] = palacios_alloc(V3_PRINTK_BUF_SIZE);
  73         if (!print_buffer[i]) {
  74             ERROR("Cannot allocate print buffer for cpu %d\n",i);
  75             deinit_print_buffers();
  76             return -1;
  77         }
  78         memset(print_buffer[i],0,V3_PRINTK_BUF_SIZE);
  79     }
  80
  81 #endif
  82
  83     return 0;
  84
  85 }
  86
  87 /**
  88  * Prints a message to the console.
  89  */
  90 void palacios_print_scoped(void * vm, int vcore, const char *fmt, ...) {
  91
  92 #if V3_PRINTK_OLD_STYLE_OUTPUT
  93
  94   va_list ap;
  95
  96   va_start(ap, fmt);
  97   vprintk(fmt, ap);
  98   va_end(ap);
  99
 100   return
 101
 102 #else
 103
 104   va_list ap;
 105   char *buf;
 106   unsigned int cpu = palacios_get_cpu();
 107   struct v3_guest *guest = (struct v3_guest *)vm;
 108
 109   buf = print_buffer[cpu];
 110
 111   if (!buf) {
 112       printk(KERN_INFO "palacios (pcore %u): output skipped - no allocated buffer\n",cpu);
 113       return;
 114   }
 115
 116   va_start(ap, fmt);
 117   vsnprintf(buf,V3_PRINTK_BUF_SIZE, fmt, ap);
 118   va_end(ap);
 119
 120 #if V3_PRINTK_CHECK_7BIT
 121   {
 122       char c=0;
 123       int i;
 124       for (i=0;i<strlen(buf);i++) {
 125           if (buf[i] < 0) {
 126               c=buf[i];
 127               break;
 128           }
 129       }
 130       if (c!=0) {
 131           printk(KERN_INFO "palacios (pcore %u): ALERT ALERT 8 BIT CHAR (c=%d) DETECTED\n", cpu,c);
 132       }
 133   }
 134 #endif
 135
 136   if (guest) {
 137     if (vcore>=0) {
 138       printk(KERN_INFO "palacios (pcore %u vm %s vcore %u): %s",
 139              cpu,
 140              guest->name,
 141              vcore,
 142              buf);
 143     } else {
 144        printk(KERN_INFO "palacios (pcore %u vm %s): %s",
 145              cpu,
 146              guest->name,
 147              buf);
 148     }
 149   } else {
 150     printk(KERN_INFO "palacios (pcore %u): %s",
 151            cpu,
 152            buf);
 153   }
 154
 155   return;
 156
 157 #endif
 158
 159 }
 160
 161
 162 /*
 163  * Allocates a contiguous region of pages of the requested size.
 164  * Returns the physical address of the first page in the region.
 165  */
 166 void *palacios_allocate_pages(int num_pages, unsigned int alignment) {
 167     void * pg_addr = NULL;
 168
 169     pg_addr = (void *)alloc_palacios_pgs(num_pages, alignment);
 170
 171     if (!pg_addr) {
 172         ERROR("ALERT ALERT  Page allocation has FAILED Warning\n");
 173         return NULL;
 174     }
 175
 176     pg_allocs += num_pages;
 177
 178     return pg_addr;
 179 }
 180
 181
 182 /**
 183  * Frees a page previously allocated via palacios_allocate_page().
 184  * Note that palacios_allocate_page() can allocate multiple pages with
 185  * a single call while palacios_free_page() only frees a single page.
 186  */
 187
 188 void palacios_free_pages(void * page_paddr, int num_pages) {
 189     pg_frees += num_pages;
 190     free_palacios_pgs((uintptr_t)page_paddr, num_pages);
 191 }
 192
 193
 194 void *
 195 palacios_alloc_extended(unsigned int size, unsigned int flags) {
 196     void * addr = NULL;
 197
 198     addr = kmalloc(size+2*ALLOC_PAD, flags);
 199
 200     if (!addr) {
 201        ERROR("ALERT ALERT  kmalloc has FAILED FAILED FAILED\n");
 202        return NULL;
 203     }
 204
 205     mallocs++;
 206
 207 #if ALLOC_ZERO_MEM
 208     memset(addr,0,size+2*ALLOC_PAD);
 209 #endif
 210
 211     return addr+ALLOC_PAD;
 212 }
 213
 214
 215 /**
 216  * Allocates 'size' bytes of kernel memory.
 217  * Returns the kernel virtual address of the memory allocated.
 218  */
 219 void *
 220 palacios_alloc(unsigned int size) {
 221
 222     // It is very important that this test remains since
 223     // this function is used extensively throughout palacios and the linux
 224     // module, both in places where interrupts are off and where they are on
 225     // a GFP_KERNEL call, when done with interrupts off can lead to DEADLOCK
 226     if (irqs_disabled()) {
 227         return palacios_alloc_extended(size,GFP_ATOMIC);
 228     } else {
 229         return palacios_alloc_extended(size,GFP_KERNEL);
 230     }
 231
 232 }
 233
 234 /**
 235  * Frees memory that was previously allocated by palacios_alloc().
 236  */
 237 void
 238 palacios_free(
 239         void *                  addr
 240 )
 241 {
 242     frees++;
 243     kfree(addr-ALLOC_PAD);
 244     return;
 245 }
 246
 247 /**
 248  * Converts a kernel virtual address to the corresponding physical address.
 249  */
 250 void *
 251 palacios_vaddr_to_paddr(
 252         void *                  vaddr
 253 )
 254 {
 255     return (void*) __pa(vaddr);
 256
 257 }
 258
 259 /**
 260  * Converts a physical address to the corresponding kernel virtual address.
 261  */
 262 void *
 263 palacios_paddr_to_vaddr(
 264         void *                  paddr
 265 )
 266 {
 267   return __va(paddr);
 268 }
 269
 270 /**
 271  * Runs a function on the specified CPU.
 272  */
 273 static void
 274 palacios_xcall(
 275         int                     cpu_id,
 276         void                    (*fn)(void *arg),
 277         void *                  arg
 278 )
 279 {
 280
 281
 282     // We set wait to 1, but I'm not sure this is necessary
 283     smp_call_function_single(cpu_id, fn, arg, 1);
 284
 285     return;
 286 }
 287
 288
 289 #define MAX_THREAD_NAME 32
 290
 291 struct lnx_thread_arg {
 292     int (*fn)(void * arg);
 293     void * arg;
 294     char name[MAX_THREAD_NAME];
 295 };
 296
 297 static int lnx_thread_target(void * arg) {
 298     struct lnx_thread_arg * thread_info = (struct lnx_thread_arg *)arg;
 299     int ret = 0;
 300     /*
 301       INFO("Daemonizing new Palacios thread (name=%s)\n", thread_info->name);
 302
 303       daemonize(thread_info->name);
 304       allow_signal(SIGKILL);
 305     */
 306
 307
 308     ret = thread_info->fn(thread_info->arg);
 309
 310
 311     INFO("Palacios Thread (%s) EXITING\n", thread_info->name);
 312
 313     palacios_free(thread_info);
 314     // handle cleanup
 315
 316     do_exit(ret);
 317
 318     return 0; // should not get here.
 319 }
 320
 321 /**
 322  * Creates a kernel thread.
 323  */
 324 void *
 325 palacios_start_kernel_thread(
 326         int (*fn)               (void * arg),
 327         void *                  arg,
 328         char *                  thread_name) {
 329
 330     struct lnx_thread_arg * thread_info = palacios_alloc(sizeof(struct lnx_thread_arg));
 331
 332     if (!thread_info) {
 333         ERROR("ALERT ALERT Unable to allocate thread\n");
 334         return NULL;
 335     }
 336
 337     thread_info->fn = fn;
 338     thread_info->arg = arg;
 339     strncpy(thread_info->name,thread_name,MAX_THREAD_NAME);
 340     thread_info->name[MAX_THREAD_NAME-1] =0;
 341
 342     return kthread_run( lnx_thread_target, thread_info, thread_info->name );
 343 }
 344
 345
 346 /**
 347  * Starts a kernel thread on the specified CPU.
 348  */
 349 void *
 350 palacios_start_thread_on_cpu(int cpu_id,
 351                              int (*fn)(void * arg),
 352                              void * arg,
 353                              char * thread_name ) {
 354     struct task_struct * thread = NULL;
 355     struct lnx_thread_arg * thread_info = palacios_alloc(sizeof(struct lnx_thread_arg));
 356
 357     if (!thread_info) {
 358         ERROR("ALERT ALERT Unable to allocate thread to start on cpu\n");
 359         return NULL;
 360     }
 361
 362     thread_info->fn = fn;
 363     thread_info->arg = arg;
 364     strncpy(thread_info->name,thread_name,MAX_THREAD_NAME);
 365     thread_info->name[MAX_THREAD_NAME-1] =0;
 366
 367     thread = kthread_create( lnx_thread_target, thread_info, thread_info->name );
 368
 369     if (IS_ERR(thread)) {
 370         WARNING("Palacios error creating thread: %s\n", thread_info->name);
 371         palacios_free(thread_info);
 372         return NULL;
 373     }
 374
 375     if (set_cpus_allowed_ptr(thread, cpumask_of(cpu_id)) != 0) {
 376         WARNING("Attempt to start thread on disallowed CPU\n");
 377         kthread_stop(thread);
 378         palacios_free(thread_info);
 379         return NULL;
 380     }
 381
 382     wake_up_process(thread);
 383
 384     return thread;
 385 }
 386
 387
 388 /**
 389  * Rebind a kernel thread to the specified CPU
 390  * The thread will be running on target CPU on return
 391  * non-zero return means failure
 392  */
 393 int
 394 palacios_move_thread_to_cpu(int new_cpu_id,
 395                             void * thread_ptr) {
 396     struct task_struct * thread = (struct task_struct *)thread_ptr;
 397
 398     INFO("Moving thread (%p) to cpu %d\n", thread, new_cpu_id);
 399
 400     if (thread == NULL) {
 401         thread = current;
 402     }
 403
 404     /*
 405      * Bind to the specified CPU.  When this call returns,
 406      * the thread should be running on the target CPU.
 407      */
 408     return set_cpus_allowed_ptr(thread, cpumask_of(new_cpu_id));
 409 }
 410
 411
 412 /**
 413  * Returns the CPU ID that the caller is running on.
 414  */
 415 unsigned int
 416 palacios_get_cpu(void)
 417 {
 418
 419     /* We want to call smp_processor_id()
 420      * But this is not safe if kernel preemption is possible
 421      * We need to ensure that the palacios threads are bound to a give cpu
 422      */
 423
 424     unsigned int cpu_id = get_cpu();
 425     put_cpu();
 426     return cpu_id;
 427 }
 428
 429 /**
 430  * Interrupts the physical CPU corresponding to the specified logical guest cpu.
 431  *
 432  * NOTE:
 433  * This is dependent on the implementation of xcall_reschedule().  Currently
 434  * xcall_reschedule does not explicitly call schedule() on the destination CPU,
 435  * but instead relies on the return to user space to handle it. Because
 436  * palacios is a kernel thread schedule will not be called, which is correct.
 437  * If it ever changes to induce side effects, we'll need to figure something
 438  * else out...
 439  */
 440
 441 #include <asm/apic.h>
 442
 443 static void
 444 palacios_interrupt_cpu(
 445         struct v3_vm_info *     vm,
 446         int                     cpu_id,
 447         int                     vector
 448 )
 449 {
 450     if (vector == 0) {
 451         smp_send_reschedule(cpu_id);
 452     } else {
 453         apic->send_IPI_mask(cpumask_of(cpu_id), vector);
 454     }
 455     return;
 456 }
 457
 458 /**
 459  * Dispatches an interrupt to Palacios for handling.
 460  */
 461 static void
 462 palacios_dispatch_interrupt( int vector, void * dev, struct pt_regs * regs ) {
 463     struct v3_interrupt intr = {
 464         .irq            = vector,
 465         .error          = regs->orig_ax,
 466         .should_ack     = 1,
 467     };
 468
 469     if (irq_to_guest_map[vector]) {
 470         v3_deliver_irq(irq_to_guest_map[vector], &intr);
 471     }
 472
 473 }
 474
 475 /**
 476  * Instructs the kernel to forward the specified IRQ to Palacios.
 477  */
 478 static int
 479 palacios_hook_interrupt(struct v3_vm_info *     vm,
 480                         unsigned int            vector ) {
 481     INFO("hooking vector %d\n", vector);
 482
 483     if (irq_to_guest_map[vector]) {
 484         WARNING(
 485                "%s: Interrupt vector %u is already hooked.\n",
 486                __func__, vector);
 487         return -1;
 488     }
 489
 490     DEBUG(
 491            "%s: Hooking interrupt vector %u to vm %p.\n",
 492            __func__, vector, vm);
 493
 494     irq_to_guest_map[vector] = vm;
 495
 496     /*
 497      * NOTE: Normally PCI devices are supposed to be level sensitive,
 498      *       but we need them to be edge sensitive so that they are
 499      *       properly latched by Palacios.  Leaving them as level
 500      *       sensitive would lead to an interrupt storm.
 501      */
 502     //ioapic_set_trigger_for_vector(vector, ioapic_edge_sensitive);
 503
 504     //set_idtvec_handler(vector, palacios_dispatch_interrupt);
 505     if (vector < 32) {
 506         ERROR("unexpected vector for hooking\n");
 507         return -1;
 508     } else {
 509         int device_id = 0;
 510
 511         int flag = 0;
 512         int error;
 513
 514         DEBUG("hooking vector: %d\n", vector);
 515
 516         if (vector == 32) {
 517             flag = IRQF_TIMER;
 518         } else {
 519             flag = IRQF_SHARED;
 520         }
 521
 522         error = request_irq((vector - 32),
 523                             (void *)palacios_dispatch_interrupt,
 524                             flag,
 525                             "interrupt_for_palacios",
 526                             &device_id);
 527
 528         if (error) {
 529             ERROR("error code for request_irq is %d\n", error);
 530             ERROR("request vector %d failed", vector);
 531             return -1;
 532         }
 533     }
 534
 535     return 0;
 536 }
 537
 538
 539
 540 /**
 541  * Acknowledges an interrupt.
 542  */
 543 static int
 544 palacios_ack_interrupt(
 545         int                     vector
 546 )
 547 {
 548   ack_APIC_irq();
 549   DEBUG("Pretending to ack interrupt, vector=%d\n", vector);
 550   return 0;
 551 }
 552
 553 /**
 554  * Returns the CPU frequency in kilohertz.
 555  */
 556 unsigned int
 557 palacios_get_cpu_khz(void)
 558 {
 559     INFO("cpu_khz is %u\n", cpu_khz);
 560
 561     if (cpu_khz == 0) {
 562         INFO("faking cpu_khz to 1000000\n");
 563         return 1000000;
 564     } else {
 565         return cpu_khz;
 566     }
 567   //return 1000000;
 568 }
 569
 570 /**
 571  * Yield the CPU so other host OS tasks can run.
 572  * This will return immediately if there is no other thread that is runnable
 573  * And there is no real bound on how long it will yield
 574  */
 575 void
 576 palacios_yield_cpu(void)
 577 {
 578     schedule();
 579     return;
 580 }
 581
 582 /**
 583  * Yield the CPU so other host OS tasks can run.
 584  * Given now immediately if there is no other thread that is runnable
 585  * And there is no real bound on how long it will yield
 586  */
 587 void palacios_sleep_cpu(unsigned int us)
 588 {
 589
 590     set_current_state(TASK_INTERRUPTIBLE);
 591     if (us) {
 592         unsigned int uspj = 1000000U/HZ;
 593         unsigned int jiffies = us/uspj + ((us%uspj) !=0);  // ceiling
 594         schedule_timeout(jiffies);
 595     } else {
 596         schedule();
 597     }
 598     return;
 599 }
 600
 601 void palacios_wakeup_cpu(void *thread)
 602 {
 603     wake_up_process(thread);
 604     return;
 605 }
 606
 607 /**
 608  * Allocates a mutex.
 609  * Returns NULL on failure.
 610  */
 611 void *
 612 palacios_mutex_alloc(void)
 613 {
 614     spinlock_t *lock = palacios_alloc(sizeof(spinlock_t));
 615
 616     if (lock) {
 617         spin_lock_init(lock);
 618     } else {
 619         ERROR("ALERT ALERT Unable to allocate lock\n");
 620         return NULL;
 621     }
 622
 623     return lock;
 624 }
 625
 626 /**
 627  * Frees a mutex.
 628  */
 629 void
 630 palacios_mutex_free(void * mutex) {
 631     palacios_free(mutex);
 632 }
 633
 634 /**
 635  * Locks a mutex.
 636  */
 637 void
 638 palacios_mutex_lock(void * mutex, int must_spin) {
 639     spin_lock((spinlock_t *)mutex);
 640 }
 641
 642
 643 /**
 644  * Locks a mutex, disabling interrupts on this core
 645  */
 646 void *
 647 palacios_mutex_lock_irqsave(void * mutex, int must_spin) {
 648
 649     unsigned long flags;
 650
 651     spin_lock_irqsave((spinlock_t *)mutex,flags);
 652
 653     return (void *)flags;
 654 }
 655
 656
 657 /**
 658  * Unlocks a mutex.
 659  */
 660 void
 661 palacios_mutex_unlock(
 662         void *                  mutex
 663 )
 664 {
 665     spin_unlock((spinlock_t *)mutex);
 666 }
 667
 668
 669 /**
 670  * Unlocks a mutex and restores previous interrupt state on this core
 671  */
 672 void
 673 palacios_mutex_unlock_irqrestore(void *mutex, void *flags)
 674 {
 675     // This is correct, flags is opaque
 676     spin_unlock_irqrestore((spinlock_t *)mutex,(unsigned long)flags);
 677 }
 678
 679 /**
 680  * Structure used by the Palacios hypervisor to interface with the host kernel.
 681  */
 682 static struct v3_os_hooks palacios_os_hooks = {
 683         .print                  = palacios_print_scoped,
 684         .allocate_pages         = palacios_allocate_pages,
 685         .free_pages             = palacios_free_pages,
 686         .malloc                 = palacios_alloc,
 687         .free                   = palacios_free,
 688         .vaddr_to_paddr         = palacios_vaddr_to_paddr,
 689         .paddr_to_vaddr         = palacios_paddr_to_vaddr,
 690         .hook_interrupt         = palacios_hook_interrupt,
 691         .ack_irq                = palacios_ack_interrupt,
 692         .get_cpu_khz            = palacios_get_cpu_khz,
 693         .start_kernel_thread    = palacios_start_kernel_thread,
 694         .yield_cpu              = palacios_yield_cpu,
 695         .sleep_cpu              = palacios_sleep_cpu,
 696         .wakeup_cpu             = palacios_wakeup_cpu,
 697         .mutex_alloc            = palacios_mutex_alloc,
 698         .mutex_free             = palacios_mutex_free,
 699         .mutex_lock             = palacios_mutex_lock,
 700         .mutex_unlock           = palacios_mutex_unlock,
 701         .mutex_lock_irqsave     = palacios_mutex_lock_irqsave,
 702         .mutex_unlock_irqrestore= palacios_mutex_unlock_irqrestore,
 703         .get_cpu                = palacios_get_cpu,
 704         .interrupt_cpu          = palacios_interrupt_cpu,
 705         .call_on_cpu            = palacios_xcall,
 706         .start_thread_on_cpu    = palacios_start_thread_on_cpu,
 707         .move_thread_to_cpu     = palacios_move_thread_to_cpu,
 708 };
 709
 710
 711
 712
 713 int palacios_vmm_init( char *options )
 714 {
 715     int num_cpus = num_online_cpus();
 716     char * cpu_mask = NULL;
 717
 718     if (cpu_list_len > 0) {
 719         int major = 0;
 720         int minor = 0;
 721         int i = 0;
 722
 723         cpu_mask = palacios_alloc((num_cpus / 8) + 1);
 724
 725         if (!cpu_mask) {
 726             ERROR("Cannot allocate cpu mask\n");
 727             return -1;
 728         }
 729
 730         memset(cpu_mask, 0, (num_cpus / 8) + 1);
 731
 732         for (i = 0; i < cpu_list_len; i++) {
 733             if (cpu_list[i] >= num_cpus) {
 734                 WARNING("CPU (%d) exceeds number of available CPUs. Ignoring...\n", cpu_list[i]);
 735                 continue;
 736             }
 737
 738             major = cpu_list[i] / 8;
 739             minor = cpu_list[i] % 8;
 740
 741             *(cpu_mask + major) |= (0x1 << minor);
 742         }
 743     }
 744
 745     memset(irq_to_guest_map, 0, sizeof(struct v3_vm_info *) * 256);
 746
 747     if (init_print_buffers()) {
 748         ERROR("Cannot initialize print buffers\n");
 749         palacios_free(cpu_mask);
 750         return -1;
 751     }
 752
 753     INFO("palacios_init starting - calling init_v3\n");
 754
 755     Init_V3(&palacios_os_hooks, cpu_mask, num_cpus, options);
 756
 757     return 0;
 758
 759 }
 760
 761
 762 int palacios_vmm_exit( void ) {
 763
 764     Shutdown_V3();
 765
 766     INFO("palacios shutdown complete\n");
 767
 768     deinit_print_buffers();
 769
 770     return 0;
 771 }