palacios/src/extensions/ext_trans_mem.c

   1 /*
   2  * This file is part of the Palacios Virtual Machine Monitor developed
   3  * by the V3VEE Project with funding from the United States National
   4  * Science Foundation and the Department of Energy.
   5  *
   6  * The V3VEE Project is a joint project between Northwestern University
   7  * and the University of New Mexico.  You can find out more at
   8  * http://www.v3vee.org
   9  *
  10  * Copyright (c) 2012, NWU EECS 441 Transactional Memory Team
  11  * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org>
  12  * All rights reserved.
  13  *
  14  * Author:  Maciek Swiech <dotpyfe@u.northwestern.edu>
  15  *          Kyle C. Hale <kh@u.northwestern.edu>
  16  *          Marcel Flores <marcel-flores@u.northwestern.edu>
  17  *          Zachary Bischof <zbischof@u.northwestern.edu>
  18  *
  19  *
  20  * This is free software.  You are permitted to use,
  21  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  22  */
  23
  24 #include <palacios/vmm_mem.h>
  25 #include <palacios/vmm.h>
  26 #include <palacios/vmcb.h>
  27 #include <palacios/vmm_decoder.h>
  28 #include <palacios/vm_guest_mem.h>
  29 #include <palacios/vmm_ctrl_regs.h>
  30 #include <palacios/vmm_paging.h>
  31 #include <palacios/vmm_direct_paging.h>
  32 #include <palacios/svm.h>
  33 #include <palacios/svm_handler.h>
  34 #include <palacios/vmm_excp.h>
  35 #include <palacios/vmm_extensions.h>
  36 #include <palacios/vmm_sprintf.h>
  37 #include <palacios/vmm_hashtable.h>
  38
  39 #include <extensions/trans_mem.h>
  40 #include <extensions/tm_util.h>
  41
  42 #if !V3_CONFIG_DEBUG_TM_FUNC
  43 #undef PrintDebug
  44 #define PrintDebug(fmt, args...)
  45 #endif
  46
  47 /* TODO LIST:
  48  * - save/restore register state on XBEGIN/XABORT
  49  * - put status codes in RAX
  50  * - Implement proper exceptions for failed XBEGINS etc.
  51  */
  52
  53 /* this includes a mov to rax */
  54 static const char * vmmcall_bytes = "\x48\xc7\xc0\x37\x13\x00\x00\x0f\x01\xd9";
  55 static struct v3_tm_state * tm_global_state = NULL;
  56
  57
  58 static void
  59 tm_translate_rip (struct guest_info * core, addr_t * target)
  60 {
  61
  62     if (core->mem_mode == PHYSICAL_MEM) {
  63         v3_gpa_to_hva(core,
  64                 get_addr_linear(core, core->rip, &(core->segments.cs)),
  65                 target);
  66     } else if (core->mem_mode == VIRTUAL_MEM) {
  67         v3_gva_to_hva(core,
  68                 get_addr_linear(core, core->rip, &(core->segments.cs)),
  69                 target);
  70     }
  71
  72 }
  73
  74
  75 static void
  76 tm_read_instr (struct guest_info * core,
  77                            addr_t addr,
  78                            uchar_t * dst,
  79                            uint_t size)
  80 {
  81
  82     if (core->mem_mode == PHYSICAL_MEM) {
  83         v3_read_gpa_memory(core,
  84                 get_addr_linear(core, addr , &(core->segments.cs)),
  85                 size,
  86                 dst);
  87
  88     } else {
  89        v3_read_gva_memory(core,
  90                 get_addr_linear(core, addr, &(core->segments.cs)),
  91                 size,
  92                 dst);
  93     }
  94
  95 }
  96
  97
  98 static int
  99 tm_handle_decode_fail (struct guest_info * core)
 100 {
 101     addr_t cur_rip;
 102     uint_t core_num;
 103
 104     tm_translate_rip(core, &cur_rip);
 105
 106 #ifdef V3_CONFIG_DEBUG_TM_FUNC
 107     v3_dump_mem((uint8_t *)cur_rip, INSTR_BUF_SZ);
 108 #endif
 109
 110     /* If we can't decode an instruction, we treat it as a catastrophic event, aborting *everyone* */
 111     for (core_num = 0; core_num < core->vm_info->num_cores; core_num++ ) {
 112         struct v3_trans_mem * remote_tm;
 113
 114         /* skip local core */
 115         if (core_num == core->vcpu_id) {
 116             continue;
 117         }
 118
 119         remote_tm = v3_get_ext_core_state(&(core->vm_info->cores[core_num]), "trans_mem");
 120         if (!remote_tm) {
 121             TM_ERR(core,DECODE,"couldnt get remote_tm\n");
 122             return -1;
 123         }
 124
 125         /* skip cores who aren't in transacitonal context */
 126         if (remote_tm->TM_MODE == TM_OFF) {
 127             continue;
 128         }
 129
 130         TM_DBG(core,DECODE,"setting abort for core %d due to decoding error\n", core_num);
 131         remote_tm->TM_ABORT = 1;
 132     }
 133
 134     return 0;
 135 }
 136
 137
 138 /* special casing for control-flow instructions
 139  * returns 1 if we need to jump
 140  * returns -1 on error
 141  */
 142 static int
 143 tm_handle_ctrl_flow (struct guest_info * core,
 144                                  struct v3_trans_mem * tm,
 145                                  addr_t * instr_location,
 146                                  struct x86_instr * struct_instr)
 147
 148 {
 149     /* special casing for control flow instructions */
 150     struct rflags * flags = (struct rflags *)&(core->ctrl_regs.rflags);
 151     addr_t offset;
 152     int to_jmp = 0;
 153
 154     switch (struct_instr->op_type) {
 155
 156         case V3_OP_JLE:
 157             TM_DBG(core,DECODE, "!!++ JLE\n");
 158             to_jmp = (flags->zf || flags->sf != flags->of);
 159             offset = struct_instr->dst_operand.operand;
 160
 161             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 162             tm->offset = offset;
 163             tm->to_branch = to_jmp;
 164             break;
 165         case V3_OP_JAE:
 166             TM_DBG(core,DECODE,"!!++ JAE\n");
 167             to_jmp = (flags->cf == 0);
 168             offset = struct_instr->dst_operand.operand;
 169
 170             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 171             tm->offset = offset;
 172             tm->to_branch = to_jmp;
 173             break;
 174         case V3_OP_JMP:
 175             TM_DBG(core,DECODE,"!!++ JMP\n");
 176             to_jmp = 1;
 177             offset = struct_instr->dst_operand.operand;
 178
 179             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 180             tm->offset = offset;
 181             tm->to_branch = to_jmp;
 182             break;
 183         case V3_OP_JNZ:
 184             TM_DBG(core,DECODE,"!!++ JNZ\n");
 185             to_jmp = (flags->zf == 0);
 186             offset = struct_instr->dst_operand.operand;
 187
 188             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 189             tm->offset = offset;
 190             tm->to_branch = to_jmp;
 191             break;
 192         case V3_OP_JL:
 193             TM_DBG(core,DECODE,"!!++ JL\n");
 194             to_jmp = (flags->sf != flags->of);
 195             offset = struct_instr->dst_operand.operand;
 196
 197             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 198             tm->offset = offset;
 199             tm->to_branch = to_jmp;
 200             break;
 201         case V3_OP_JNS:
 202             TM_DBG(core,DECODE,"!!++ JNS\n");
 203             to_jmp = (flags->sf == 0);
 204             offset = struct_instr->dst_operand.operand;
 205
 206             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 207             tm->offset = offset;
 208             tm->to_branch = to_jmp;
 209             break;
 210         default:
 211             *instr_location = core->rip + tm->cur_instr_len;
 212             break;
 213     }
 214     return to_jmp;
 215 }
 216
 217
 218 /* entry points :
 219  *
 220  * called inside #UD and VMMCALL handlers
 221  * only affects global state in case of quix86 fall over
 222  *  -> set other cores TM_ABORT to 1, return -2
 223  */
 224 static int
 225 v3_store_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
 226 {
 227     struct x86_instr struct_instr;
 228     uchar_t cur_instr[INSTR_BUF_SZ];
 229     addr_t  instr_location;
 230
 231     // Fetch the current instruction
 232     tm_read_instr(core, core->rip, cur_instr, INSTR_BUF_SZ);
 233
 234     TM_DBG(core,STORE,"storing next instruction, current rip: %llx\n", (uint64_t)core->rip);
 235
 236     /* Attempt to decode current instruction to determine its length */
 237     if (v3_decode(core, (addr_t)cur_instr, &struct_instr) == ERR_DECODE_FAIL) {
 238
 239         TM_ERR(core,Error,"Could not decode currrent instruction (at %llx)\n", (uint64_t)core->rip);
 240
 241         /* this will attempt to abort all the remote cores */
 242         if (tm_handle_decode_fail(core) == -1) {
 243             TM_ERR(core,Error,"Could not handle failed decode\n");
 244             return ERR_STORE_FAIL;
 245         }
 246
 247         /* we need to trigger a local abort */
 248         return ERR_STORE_MUST_ABORT;
 249     }
 250
 251
 252     /* we can't currently handle REP prefixes, abort */
 253     if (struct_instr.op_type != V3_INVALID_OP &&
 254             (struct_instr.prefixes.repne ||
 255              struct_instr.prefixes.repnz ||
 256              struct_instr.prefixes.rep   ||
 257              struct_instr.prefixes.repe  ||
 258              struct_instr.prefixes.repz)) {
 259
 260         TM_ERR(core,DECODE,"Encountered REP prefix, aborting\n");
 261         return ERR_STORE_MUST_ABORT;
 262     }
 263
 264     tm->cur_instr_len = struct_instr.instr_length;
 265
 266     /* handle jump instructions */
 267     tm_handle_ctrl_flow(core, tm, &instr_location, &struct_instr);
 268
 269     /* save next 10 bytes after current instruction, we'll put vmmcall here */
 270     tm_read_instr(core, instr_location, cur_instr, INSTR_INJECT_LEN);
 271
 272     /* store the next instruction and its length in info */
 273     memcpy(tm->dirty_instr, cur_instr, INSTR_INJECT_LEN);
 274
 275     return 0;
 276 }
 277
 278
 279 static int
 280 v3_overwrite_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
 281 {
 282     addr_t ptr;
 283
 284     // save rax
 285     tm->clobbered_rax = (core->vm_regs).rax;
 286
 287     ptr = core->rip;
 288
 289     /* we can't currently handle instructions that span page boundaries */
 290     if ((ptr + tm->cur_instr_len) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
 291         TM_ERR(core,OVERWRITE,"emulated instr straddling page boundary\n");
 292         return -1;
 293     }
 294
 295     ptr = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
 296
 297     if ((ptr + INSTR_INJECT_LEN) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
 298         TM_ERR(core,OVERWRITE,"injected instr straddling page boundary\n");
 299         return -1;
 300     }
 301
 302     if (v3_gva_to_hva(core,
 303                 get_addr_linear(core, ptr, &(core->segments.cs)),
 304                 &ptr) == -1) {
 305
 306         TM_ERR(core,Error,"Calculating next rip hva failed\n");
 307         return -1;
 308     }
 309
 310     TM_DBG(core,REPLACE,"Replacing next instruction at addr %llx with vmm hyper call, len=%d\n",
 311             core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0), (int)tm->cur_instr_len );
 312
 313     /* Copy VMM call into the memory address of beginning of next instruction (ptr) */
 314     memcpy((char*)ptr, vmmcall_bytes, INSTR_INJECT_LEN);
 315
 316     /* KCH: flag that we've dirtied an instruction, and store its host address */
 317     tm->dirty_instr_flag = 1;
 318     tm->dirty_gva        = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
 319     tm->dirty_hva        = ptr;
 320     tm->to_branch        = 0;
 321
 322     return 0;
 323 }
 324
 325
 326 /* entry points:
 327  *
 328  * this should only be called if TM_STATE == TM_NULL, additionally we check if our dirtied flag if set
 329  */
 330 int
 331 v3_restore_dirty_instr (struct guest_info * core)
 332 {
 333     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 334
 335     /* Restore next instruction, transition to IFETCH state */
 336     TM_DBG(core,RESTORE,"Restoring next instruction.\n");
 337
 338     /* check if we've actually done an instruction overwrite */
 339     if (!(tm->dirty_instr_flag)) {
 340         TM_DBG(core,RESTORE,"nothing to restore here...\n");
 341         return 0;
 342     }
 343
 344     // Actually restore instruction
 345     memcpy((char*)tm->dirty_hva, tm->dirty_instr, INSTR_INJECT_LEN);
 346
 347     // Put rax back
 348     (core->vm_regs).rax = tm->clobbered_rax;
 349
 350     // Scoot rip back up
 351     TM_DBG(core,RESTORE,"RIP in vmmcall: %llx\n", core->rip);
 352     core->rip = tm->dirty_gva;
 353
 354     // clean up
 355     tm->dirty_instr_flag = 0;
 356     tm->dirty_gva = 0;
 357     tm->dirty_hva = 0;
 358     memset(tm->dirty_instr, 0, 15);
 359
 360     TM_DBG(core,RESTORE,"RIP after scooting it back up: %llx\n", core->rip);
 361
 362     return 0;
 363 }
 364
 365
 366 static addr_t
 367 tm_handle_fault_ifetch (struct guest_info * core,
 368                         struct v3_trans_mem * tm)
 369 {
 370     int sto;
 371
 372     TM_DBG(core,IFETCH,"Page fault caused by IFETCH: rip is the same as faulting address, we must be at an ifetch.\n");
 373
 374     sto = v3_store_next_instr(core, tm);
 375
 376     if (sto == ERR_STORE_FAIL) {
 377         TM_ERR(core,EXIT,"Could not store next instruction in transaction\n");
 378         return ERR_TRANS_FAULT_FAIL;
 379     } else if (sto == ERR_STORE_MUST_ABORT) {
 380         TM_DBG(core,EXIT,"aborting for some reason\n");
 381         v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
 382         return TRANS_FAULT_OK;
 383     }
 384
 385     if (v3_overwrite_next_instr(core, tm) == -1) {
 386         TM_ERR(core,PF,"problem overwriting instruction\n");
 387         return ERR_TRANS_FAULT_FAIL;
 388     }
 389
 390     tm->TM_STATE = TM_EXEC;
 391
 392     return TRANS_FAULT_OK;
 393 }
 394
 395
 396 static addr_t
 397 tm_handle_fault_read (struct guest_info * core,
 398                       struct v3_trans_mem * tm,
 399                       addr_t fault_addr,
 400                       pf_error_t error)
 401
 402 {
 403     // This page fault was caused by a read to memory in the current instruction for a core in TM mode
 404     TM_DBG(core,DATA,"Page fault caused by read.\n");
 405     TM_DBG(core,PF,"Adding %p to read list and hash\n", (void*)fault_addr);
 406
 407     if (add_mem_op_to_list(&(tm->trans_r_list), fault_addr) == -1) {
 408         TM_ERR(core,PF,"problem adding to list\n");
 409         return ERR_TRANS_FAULT_FAIL;
 410     }
 411
 412     if (tm_record_access(tm, error.write, fault_addr) == -1) {
 413         TM_ERR(core,PF,"problem recording access\n");
 414         return ERR_TRANS_FAULT_FAIL;
 415     }
 416
 417     /* if we have previously written to this address, we need to update our
 418      * staging page and map it in */
 419     if (list_contains_guest_addr(&(tm->trans_w_list), fault_addr)) {
 420
 421         TM_DBG(core,PF,"Saw a read from something in the write list\n");
 422
 423         /* write the value from linked list to staging page */
 424         if (stage_entry(tm, &(tm->trans_w_list), fault_addr) == -1) {
 425             TM_ERR(core,PF, "could not stage entry!\n");
 426             return ERR_TRANS_FAULT_FAIL;
 427         }
 428
 429         /* Hand it the staging page */
 430         return (addr_t)(tm->staging_page);
 431
 432     } else {
 433
 434         //Add it to the read set
 435         addr_t shadow_addr = 0;
 436
 437         TM_DBG(core,PF,"Saw a read from a fresh address\n");
 438
 439         if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
 440             TM_ERR(core,PF,"Could not translate gva to hva for transaction read\n");
 441             return ERR_TRANS_FAULT_FAIL;
 442         }
 443
 444     }
 445
 446     return TRANS_FAULT_OK;
 447 }
 448
 449
 450 static addr_t
 451 tm_handle_fault_write (struct guest_info * core,
 452                        struct v3_trans_mem * tm,
 453                        addr_t fault_addr,
 454                        pf_error_t error)
 455 {
 456         void * data_loc;
 457         addr_t virt_data_loc;
 458         addr_t shadow_addr = 0;
 459
 460         TM_DBG(core,DATA,"Page fault cause by write\n");
 461         TM_DBG(core,PF,"Adding %p to write list and hash\n", (void*)fault_addr);
 462
 463         if (add_mem_op_to_list(&(tm->trans_w_list), fault_addr) == -1) {
 464             TM_ERR(core,WRITE,"could not add to list!\n");
 465             return ERR_TRANS_FAULT_FAIL;
 466         }
 467
 468         if (tm_record_access(tm, error.write, fault_addr) == -1) {
 469             TM_ERR(core,WRITE,"could not record access!\n");
 470             return ERR_TRANS_FAULT_FAIL;
 471         }
 472
 473         if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
 474             TM_ERR(core,WRITE,"could not translate gva to hva for transaction read\n");
 475             return ERR_TRANS_FAULT_FAIL;
 476         }
 477
 478         // Copy existing values to the staging page, populating that field
 479         // This avoids errors in optimized code such as ++, where the original
 480         // value is not read, but simply incremented
 481         data_loc = (void*)((addr_t)(tm->staging_page) + (shadow_addr % PAGE_SIZE_4KB));
 482
 483         if (v3_hpa_to_hva((addr_t)(data_loc), &virt_data_loc) == -1) {
 484             TM_ERR(core,WRITE,"Could not convert address on staging page to virt addr\n");
 485             return ERR_TRANS_FAULT_FAIL;
 486         }
 487
 488         TM_DBG(core,WRITE,"\tValue being copied (core %d): %p\n", core->vcpu_id, *((void**)(virt_data_loc)));
 489         //memcpy((void*)virt_data_loc, (void*)shadow_addr, sizeof(uint64_t));
 490         *(uint64_t*)virt_data_loc = *(uint64_t*)shadow_addr;
 491
 492         return (addr_t)(tm->staging_page);
 493 }
 494
 495
 496 static addr_t
 497 tm_handle_fault_extern_ifetch (struct guest_info * core,
 498                                struct v3_trans_mem * tm,
 499                                addr_t fault_addr,
 500                                pf_error_t error)
 501 {
 502     int sto;
 503
 504     // system is in tm state, record the access
 505     TM_DBG(core,IFETCH,"Page fault caused by IFETCH: we are not in TM, recording.\n");
 506
 507     sto = v3_store_next_instr(core,tm);
 508
 509     if (sto == ERR_STORE_FAIL) {
 510         TM_ERR(core,Error,"Could not store next instruction in transaction\n");
 511         return ERR_TRANS_FAULT_FAIL;
 512
 513     } else if (sto == ERR_STORE_MUST_ABORT) {
 514         TM_ERR(core,IFETCH,"decode failed, going out of single stepping\n");
 515         v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
 516         return TRANS_FAULT_OK;
 517     }
 518
 519     if (v3_overwrite_next_instr(core, tm) == -1) {
 520         TM_ERR(core,IFETCH,"could not overwrite next instr!\n");
 521         return ERR_TRANS_FAULT_FAIL;
 522     }
 523
 524     tm->TM_STATE = TM_EXEC;
 525
 526     if (tm_record_access(tm, error.write, fault_addr) == -1) {
 527         TM_ERR(core,IFETCH,"could not record access!\n");
 528         return ERR_TRANS_FAULT_FAIL;
 529     }
 530
 531     return TRANS_FAULT_OK;
 532 }
 533
 534
 535 static addr_t
 536 tm_handle_fault_extern_access (struct guest_info * core,
 537                                struct v3_trans_mem * tm,
 538                                addr_t fault_addr,
 539                                pf_error_t error)
 540 {
 541     TM_DBG(core,PF_HANDLE,"recording access\n");
 542     if (tm_record_access(tm, error.write, fault_addr) == -1) {
 543         TM_ERR(core,PF_HANDLE,"could not record access!\n");
 544         return ERR_TRANS_FAULT_FAIL;
 545     }
 546
 547     return TRANS_FAULT_OK;
 548 }
 549
 550
 551 static addr_t
 552 tm_handle_fault_tmoff (struct guest_info * core)
 553 {
 554     TM_DBG(core,PF_HANDLE, "in pf handler but noone is in tm mode anymore (core %d), i should try to eliminate hypercalls\n", core->vcpu_id);
 555
 556     if (v3_restore_dirty_instr(core) == -1) {
 557         TM_ERR(core,PF_HANDLE,"could not restore dirty instr!\n");
 558         return ERR_TRANS_FAULT_FAIL;
 559     }
 560
 561     return TRANS_FAULT_OK;
 562 }
 563
 564
 565 /* entry points:
 566  *
 567  * called from MMU - should mean at least tms->TM_MODE is on
 568  *
 569  * tm->on : ifetch -> store instr, overwrite instr
 570  *          r/w    -> record hash, write log, store instr, overwrite instr
 571  * tm->off: ifetch -> store instr, overwrite instr
 572  *          r/w    -> record hash, store instr, overwrite instr
 573  *
 574  *          returns ERR_TRANS_FAULT_FAIL on error
 575  *          TRANS_FAULT_OK when things are fine
 576  *          addr when we're passing back a staging page
 577  *
 578  */
 579 addr_t
 580 v3_handle_trans_mem_fault (struct guest_info * core,
 581                                   addr_t fault_addr,
 582                                   pf_error_t error)
 583 {
 584     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 585     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
 586
 587     if (!tm) {
 588         TM_ERR(core,ERROR,": coudln't get core state\n");
 589         return ERR_TRANS_FAULT_FAIL;
 590     }
 591
 592     if (!tms) {
 593         TM_ERR(core,ERROR,": couldn't get vm trans_mem state\n");
 594         return ERR_TRANS_FAULT_FAIL;
 595     }
 596
 597     TM_DBG(core,PF,"PF handler core->mode : %d, system->mode : %d\n", tm->TM_MODE, tms->TM_MODE);
 598
 599     if ((tm->TM_MODE == TM_ON) &&
 600         ((void *)fault_addr == (void *)(core->rip))) {
 601
 602         return tm_handle_fault_ifetch(core, tm);
 603
 604     } else if ((tm->TM_MODE == TM_ON)    &&
 605                (tm->TM_STATE == TM_EXEC) &&
 606                (error.write == 0)) {
 607
 608         return tm_handle_fault_read(core, tm, fault_addr, error);
 609
 610     } else if ((tm->TM_MODE == TM_ON)    &&
 611                (tm->TM_STATE == TM_EXEC) &&
 612                (error.write == 1)) {
 613
 614         return tm_handle_fault_write(core, tm, fault_addr, error);
 615
 616
 617     } else if ((tms->TM_MODE == TM_ON) &&
 618               ((void *)fault_addr == (void *)(core->rip))) {
 619
 620         return tm_handle_fault_extern_ifetch(core, tm, fault_addr, error);
 621
 622     } else if ((tms->TM_MODE == TM_ON) &&
 623                (tm->TM_STATE == TM_EXEC)) {
 624
 625         return tm_handle_fault_extern_access(core, tm, fault_addr, error);
 626     } else {
 627
 628         return tm_handle_fault_tmoff(core);
 629
 630     }
 631
 632     return TRANS_FAULT_OK;
 633 }
 634
 635
 636 static int
 637 tm_handle_hcall_tmoff (struct guest_info * core, struct v3_trans_mem * tm)
 638 {
 639     if (tm->TM_MODE == TM_ON) {
 640         TM_ERR(core,EXIT,"we are in tm mode but system is not!\n");
 641         return TRANS_HCALL_FAIL;
 642     }
 643
 644     // we got to an exit when things were off!
 645     TM_DBG(core,EXIT,"system is off, restore the instruction and go away\n");
 646
 647     if (v3_restore_dirty_instr(core) == -1) {
 648         TM_ERR(core,HCALL,"could not restore dirty instr!\n");
 649         return TRANS_HCALL_FAIL;
 650     }
 651
 652     tm->TM_STATE = TM_NULL;
 653
 654     return TRANS_HCALL_OK;
 655 }
 656
 657
 658 static int
 659 tm_handle_hcall_dec_abort (struct guest_info * core,
 660                            struct v3_trans_mem * tm)
 661 {
 662     // only ever get here from TM DECODE
 663     TM_DBG(core,EXIT,"we are in ABORT, call the abort handler\n");
 664     tm->TM_ABORT = 0;
 665
 666     v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
 667
 668     TM_DBG(core,EXIT,"RIP after abort: %p\n", ((void*)(core->rip)));
 669
 670     return TRANS_HCALL_OK;
 671 }
 672
 673
 674 static int
 675 tm_handle_hcall_ifetch_start (struct guest_info * core,
 676                               struct v3_trans_mem * tm)
 677 {
 678     tm->TM_STATE = TM_IFETCH;
 679
 680     TM_DBG(core,EXIT,"VMEXIT after TM_EXEC, blast away VTLB and go into TM_IFETCH\n");
 681
 682     // Finally, invalidate the shadow page table
 683     v3_invalidate_shadow_pts(core);
 684
 685     return TRANS_HCALL_OK;
 686 }
 687
 688
 689 static int
 690 tm_check_list_conflict (struct guest_info * core,
 691                         struct v3_trans_mem * tm,
 692                         struct list_head * access_list,
 693                         v3_tm_op_t op_type)
 694 {
 695     struct mem_op * curr = NULL;
 696     struct mem_op * tmp  = NULL;
 697     int conflict = 0;
 698
 699     list_for_each_entry_safe(curr, tmp, access_list, op_node) {
 700
 701         conflict = tm_check_conflict(tm->ginfo->vm_info, curr->guest_addr, op_type, core->vcpu_id, tm->t_num);
 702
 703         if (conflict == ERR_CHECK_FAIL) {
 704
 705             TM_ERR(core,EXIT,"error checking for conflicts\n");
 706             return TRANS_HCALL_FAIL;
 707
 708         } else if (conflict == CHECK_IS_CONFLICT) {
 709
 710             TM_DBG(core,EXIT,"we have a conflict, aborting\n");
 711             v3_handle_trans_abort(core, TM_ABORT_CONFLICT, 0);
 712             return CHECK_MUST_ABORT;
 713
 714         }
 715
 716     }
 717
 718     return TRANS_HCALL_OK;
 719 }
 720
 721
 722 static int
 723 tm_handle_hcall_check_conflicts (struct guest_info * core,
 724                                  struct v3_trans_mem * tm)
 725 {
 726     int ret;
 727
 728     TM_DBG(core,EXIT,"still TM_ON\n");
 729     TM_DBG(core,EXIT,"checking for conflicts\n");
 730
 731     if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_w_list), OP_TYPE_WRITE)) == TRANS_HCALL_FAIL) {
 732         return TRANS_HCALL_FAIL;
 733     } else if (ret == CHECK_MUST_ABORT) {
 734         return TRANS_HCALL_OK;
 735     }
 736
 737     if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_r_list), OP_TYPE_READ)) == TRANS_HCALL_FAIL) {
 738         return TRANS_HCALL_FAIL;
 739     } else if (ret == CHECK_MUST_ABORT) {
 740         return TRANS_HCALL_OK;
 741     }
 742
 743     tm->TM_STATE = TM_IFETCH;
 744
 745     return TRANS_HCALL_OK;
 746 }
 747
 748
 749 /* trans mem hypercall handler
 750  * entry points:
 751  *
 752  * running mime (tm or tms on)
 753  *   update record log
 754  *   restore instr
 755  *   overwrite instr
 756  *   check for conflicts
 757  *   flush vtlb
 758  * abort (due to quix86)
 759  *   restore instr
 760  *   set all to abort
 761  */
 762 static int
 763 tm_handle_hcall (struct guest_info * core,
 764                  unsigned int hcall_id,
 765                  void * priv_data)
 766 {
 767     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 768     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
 769
 770     if (tms->TM_MODE == TM_OFF) {
 771         return tm_handle_hcall_tmoff(core, tm);
 772     }
 773
 774     // Previous instruction has finished, copy staging page back into linked list!
 775     if (update_list(tm, &(tm->trans_w_list)) == -1) {
 776         TM_ERR(core,HCALL,"could not update_list!\n");
 777         return TRANS_HCALL_FAIL;
 778     }
 779
 780     // Done handling previous instruction, must put back the next instruction, reset %rip and go back to IFETCH state
 781     TM_DBG(core,EXIT,"saw VMEXIT, need to restore previous state and proceed\n");
 782
 783     if (v3_restore_dirty_instr(core) == -1) {
 784         TM_ERR(core,HCALL,"could not restore dirty instr!\n");
 785         return TRANS_HCALL_FAIL;
 786     }
 787
 788     /* Check TM_STATE */
 789     if (tm->TM_ABORT == 1 &&
 790         tms->TM_MODE == TM_ON) {
 791
 792         return tm_handle_hcall_dec_abort(core, tm);
 793
 794     } else if (tm->TM_STATE == TM_EXEC) {
 795         return tm_handle_hcall_ifetch_start(core, tm);
 796     }
 797
 798     /* Check TM_MODE */
 799     if (tm->TM_MODE == TM_ON &&
 800         tms->TM_MODE == TM_ON) {
 801
 802         return tm_handle_hcall_check_conflicts(core, tm);
 803
 804     } else if (tm->TM_MODE == TM_OFF) {
 805         TM_DBG(core,EXIT,"we are in TM_OFF\n");
 806     }
 807
 808     return TRANS_HCALL_OK;
 809 }
 810
 811
 812 int
 813 v3_tm_inc_tnum (struct v3_trans_mem * tm)
 814 {
 815     addr_t irqstate;
 816     uint64_t new_ctxt;
 817     uint64_t * lt;
 818
 819     lt = tm_global_state->last_trans;
 820
 821     // grab global last_trans
 822     irqstate = v3_lock_irqsave(tm_global_state->lock);
 823     new_ctxt = ++(lt[tm->ginfo->vcpu_id]);
 824     v3_unlock_irqrestore(tm_global_state->lock, irqstate);
 825
 826     tm->t_num++;
 827     /*
 828     TM_DBG(tm->ginfo,INC TNUM,"global state is |%d|%d|, my tnum is %d\n", (int)lt[0],
 829                                                                         (int)lt[1], (int)tm->t_num);
 830                                                                         */
 831     if (new_ctxt != tm->t_num) {
 832         TM_ERR(tm->ginfo,TM_INC_TNUM,"misaligned global and local context value\n");
 833         return -1;
 834     }
 835
 836     return 0;
 837 }
 838
 839
 840 static void
 841 tm_set_abort_status (struct guest_info * core,
 842                      tm_abrt_cause_t cause,
 843                      uint8_t xabort_reason)
 844 {
 845     core->vm_regs.rax = 0;
 846
 847     switch (cause) {
 848         case TM_ABORT_XABORT:
 849             // we put the xabort immediate in eax 31:24
 850             // cause is zero
 851             core->vm_regs.rax |= (xabort_reason << 24);
 852             break;
 853         case TM_ABORT_CONFLICT:
 854             // if this was a conflict from another core, it may work
 855             // if we try again
 856             core->vm_regs.rax |= (1 << ABORT_CONFLICT) | (1 << ABORT_RETRY);
 857             break;
 858         case TM_ABORT_INTERNAL:
 859         case TM_ABORT_BKPT:
 860             core->vm_regs.rax |= (1 << cause);
 861             break;
 862         default:
 863             TM_ERR(core, ABORT, "invalid abort cause\n");
 864             break;
 865     }
 866 }
 867
 868
 869 // xabort_reason is only used for XABORT instruction
 870 int
 871 v3_handle_trans_abort (struct guest_info * core,
 872                        tm_abrt_cause_t cause,
 873                        uint8_t xabort_reason)
 874 {
 875     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 876
 877     // Free the staging page
 878     if (v3_free_staging_page(tm) == -1) {
 879         TM_ERR(core,ABORT,"problem freeing staging page\n");
 880         return -1;
 881     }
 882
 883     // Clear the VTLB which still has our staging page in it
 884     if (v3_clr_vtlb(core) == -1) {
 885         TM_ERR(core,ABORT,"problem clearing vtlb\n");
 886         return -1;
 887     }
 888
 889     // Free the lists
 890     v3_clear_tm_lists(tm);
 891
 892     TM_DBG(core,ABORT -- handler,"TM_MODE: %d | RIP: %llx | XABORT RIP: %llx\n", tm->TM_MODE, (uint64_t)core->rip, (uint64_t)tm->fail_call);
 893
 894     if (tm->TM_MODE == TM_ON) {
 895         TM_DBG(core,ABORT,"Setting RIP to %llx\n", (uint64_t)tm->fail_call);
 896         core->rip = tm->fail_call;
 897
 898         // Turn TM off
 899         v3_clr_tm(tm);
 900
 901         // transaction # ++
 902         v3_tm_inc_tnum(tm);
 903     }
 904
 905     tm_set_abort_status(core, cause, xabort_reason);
 906
 907     // time to garbage collect
 908     if (tm_hash_gc(tm) == -1) {
 909         TM_ERR(core,GC,"could not gc!\n");
 910         return -1;
 911     }
 912
 913     return 0;
 914 }
 915
 916
 917 static uint_t
 918 tm_hash_fn (addr_t key)
 919 {
 920     return v3_hash_long(key, sizeof(void *));
 921 }
 922
 923
 924 static int
 925 tm_eq_fn (addr_t key1, addr_t key2)
 926 {
 927     return (key1 == key2);
 928 }
 929
 930
 931 static uint_t
 932 tm_hash_buf_fn (addr_t key)
 933 {
 934     return v3_hash_long(key, sizeof(addr_t));
 935 }
 936
 937
 938 static int
 939 tm_eq_buf_fn(addr_t key1, addr_t key2)
 940 {
 941     return (key1 == key2);
 942 }
 943
 944
 945 /* this checks if the remote access was done on the same
 946  * local transaction number as the current one */
 947 static int
 948 tm_check_context (struct v3_vm_info * vm,
 949                   addr_t gva,
 950                   uint64_t core_num,
 951                   uint64_t curr_ctxt,
 952                   uint64_t * curr_lt,
 953                   v3_tm_op_t op_type)
 954 {
 955     uint64_t  core_id_sub;
 956     struct v3_tm_access_type * type = NULL;
 957
 958     for (core_id_sub = 0; core_id_sub < vm->num_cores; core_id_sub++) {
 959         struct v3_trans_mem * remote_tm;
 960         void * buf[3];
 961         addr_t key;
 962
 963         /* skip the core that's doing the checking */
 964         if (core_id_sub == core_num) {
 965             continue;
 966         }
 967
 968         remote_tm = v3_get_ext_core_state(&(vm->cores[core_id_sub]), "trans_mem");
 969         if (!remote_tm) {
 970             PrintError(vm, VCORE_NONE, "Could not get ext core state for core %llu\n", core_id_sub);
 971             return ERR_CHECK_FAIL;
 972         }
 973
 974         buf[0] = (void *)gva;
 975         buf[1] = (void *)core_id_sub;
 976         buf[2] = (void *)curr_lt[core_id_sub];
 977
 978         key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
 979
 980         type = (struct v3_tm_access_type *)HTABLE_SEARCH(remote_tm->access_type, key);
 981
 982         if (type) {
 983             // conflict!
 984             if ( (op_type == OP_TYPE_WRITE && (type->w || type->r)) || // so basically if write?
 985                     (op_type != OP_TYPE_WRITE && type->w)) {
 986                 return CHECK_IS_CONFLICT;
 987             }
 988         }
 989     }
 990
 991     return CHECK_NO_CONFLICT;
 992 }
 993
 994
 995 /* check all the contexts in the list for a conflict */
 996 static int
 997 tm_check_all_contexts (struct v3_vm_info * vm,
 998                        struct list_head * hash_list,
 999                        addr_t   gva,
1000                        v3_tm_op_t  op_type,
1001                        uint64_t core_num,
1002                        uint64_t curr_ctxt)
1003 {
1004     struct hash_chain * curr = NULL;
1005     struct hash_chain * tmp  = NULL;
1006     uint64_t * curr_lt       = NULL;
1007     int ret = 0;
1008
1009     list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1010
1011         curr_lt = curr->curr_lt;
1012
1013         if (curr_lt[core_num] == curr_ctxt) {
1014
1015             ret = tm_check_context(vm, gva, core_num, curr_ctxt, curr_lt, op_type);
1016
1017             if (ret == ERR_CHECK_FAIL) {
1018                 return ERR_CHECK_FAIL;
1019             } else if (ret == CHECK_IS_CONFLICT) {
1020                 return CHECK_IS_CONFLICT;
1021             }
1022
1023         }
1024
1025     }
1026
1027     return CHECK_NO_CONFLICT;
1028 }
1029
1030
1031 /* The following access patterns trigger an abort:
1032  * We: Read     |   Anyone Else: Write
1033  * We: Write    |   Anyone Else: Read, Write
1034  *
1035  * (pg 8-2 of haswell manual)
1036  *
1037  * returns ERR_CHECK_FAIL on error
1038  * returns CHECK_IS_CONFLICT if there is a conflict
1039  * returns CHECK_NO_CONFLICT  if there isn't
1040  */
1041 int
1042 tm_check_conflict (struct v3_vm_info * vm,
1043                    addr_t gva,
1044                    v3_tm_op_t op_type,
1045                    uint64_t core_num,
1046                    uint64_t curr_ctxt)
1047 {
1048     uint64_t core_id;
1049
1050     /* loop over other cores -> core_id */
1051     for (core_id = 0; core_id < vm->num_cores; core_id++) {
1052
1053         struct guest_info * core = NULL;
1054         struct v3_trans_mem * tm = NULL;
1055         struct list_head * hash_list;
1056
1057         /* only check other cores */
1058         if (core_id == core_num) {
1059             continue;
1060         }
1061
1062         core = &(vm->cores[core_id]);
1063         tm = (struct v3_trans_mem*)v3_get_ext_core_state(core, "trans_mem");
1064
1065         if (!tm) {
1066             PrintError(vm, VCORE_NONE, "+++ TM ERROR +++ Couldn't get core state for core %llu\n", core_id);
1067             return ERR_CHECK_FAIL;
1068         }
1069
1070         /* this core didn't access the address, move on */
1071         if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1072             continue;
1073
1074         } else {
1075
1076             /* loop over chained hash for gva, find fields with curr_ctxt -> curr_lt*/
1077             int ret = tm_check_all_contexts(vm, hash_list, gva, op_type, core_num, curr_ctxt);
1078
1079             if (ret == ERR_CHECK_FAIL) {
1080                 return ERR_CHECK_FAIL;
1081             } else if (ret == CHECK_IS_CONFLICT) {
1082                 return CHECK_IS_CONFLICT;
1083             }
1084
1085         }
1086     }
1087
1088     return CHECK_NO_CONFLICT;
1089 }
1090
1091
1092 static int
1093 tm_need_to_gc (struct v3_trans_mem * tm,
1094                struct hash_chain * curr,
1095                uint64_t * lt_copy,
1096                uint64_t tmoff)
1097 {
1098     uint64_t to_gc = 1;
1099     uint64_t i;
1100
1101     /* if none of the cores are in transactional context,
1102      * we know we can collect this context
1103      */
1104     if (!tmoff) {
1105
1106         for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1107             /* if *any* of the cores are active in a transaction
1108              * number that is current (listed in this context),
1109              * we know we can't collect this context, as it
1110              * will be needed when that core's transaction ends
1111              */
1112             if (curr->curr_lt[i] >= lt_copy[i]) {
1113                 to_gc = 0;
1114                 break;
1115             }
1116         }
1117
1118     }
1119     return to_gc;
1120 }
1121
1122
1123 static void
1124 tm_del_stale_ctxt (struct hash_chain * curr)
1125 {
1126         list_del(&(curr->lt_node));
1127         V3_Free(curr->curr_lt);
1128         V3_Free(curr);
1129 }
1130
1131
1132 static void
1133 tm_del_acc_entry (struct v3_trans_mem * tm, addr_t key)
1134 {
1135     v3_htable_remove(tm->access_type, key, 0);
1136     (tm->access_type_entries)--;
1137 }
1138
1139
1140 static int
1141 tm_collect_context (struct v3_trans_mem * tm,
1142                     struct hashtable_iter * ctxt_iter,
1143                     struct hash_chain * curr,
1144                     uint64_t * begin_time,
1145                     uint64_t * end_time,
1146                     addr_t gva)
1147 {
1148         uint64_t i;
1149
1150         for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1151             void * buf[3];
1152             struct v3_tm_access_type * type;
1153             addr_t key;
1154
1155             rdtscll(*end_time);
1156             if ((*end_time - *begin_time) > 100000000) {
1157                 TM_ERR(tm->ginfo,GC,"time threshhold exceeded, exiting!!!\n");
1158                 return -1;
1159             }
1160
1161             buf[0] = (void *)gva;
1162             buf[1] = (void *)i;
1163             buf[2] = (void *)curr->curr_lt[i];
1164
1165             key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
1166
1167             type = (struct v3_tm_access_type *)v3_htable_search(tm->access_type, key);
1168
1169             if (!type) { // something has gone terribly wrong
1170                 TM_ERR(tm->ginfo,GC,"could not find accesstype entry to gc, THIS! IS! WRONG!\n");
1171                 return -1;
1172             }
1173
1174             /* delete the access type entry */
1175             tm_del_acc_entry(tm, key);
1176         }
1177
1178         /* delete the stale context */
1179         tm_del_stale_ctxt(curr);
1180
1181         return 0;
1182 }
1183
1184
1185 static int
1186 tm_collect_all_contexts (struct v3_trans_mem * tm,
1187                          struct hashtable_iter * ctxt_iter,
1188                          uint64_t tmoff,
1189                          uint64_t * lt_copy,
1190                          uint64_t * begin_time,
1191                          uint64_t * end_time)
1192 {
1193     struct hash_chain * tmp;
1194     struct hash_chain * curr;
1195     struct list_head * chain_list;
1196     addr_t gva;
1197
1198     gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1199
1200     chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1201
1202     /* this is a chained hash, so for each address, we will have
1203      * a list of contexts. We now check each context to see
1204      * whether or not it can be collected
1205      */
1206     list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1207
1208         uint64_t to_gc = tm_need_to_gc(tm, curr, lt_copy, tmoff);
1209
1210         /* not garbage, go on to the next context in the list */
1211         if (!to_gc) {
1212             TM_DBG(tm->ginfo,GC,"not garbage collecting entries for address %llx\n", (uint64_t)gva);
1213             continue;
1214         }
1215
1216         TM_DBG(tm->ginfo,GC,"garbage collecting entries for address %llx\n", (uint64_t)gva);
1217
1218         /* found one, delete corresponding entries in access_type */
1219         if (tm_collect_context(tm, ctxt_iter, curr, begin_time, end_time, gva) == -1) {
1220             TM_ERR(tm->ginfo,GC,"ERROR collecting context\n");
1221             return -1;
1222         }
1223
1224     }
1225
1226     /* if context list (hash chain) is now empty, remove the hash entry */
1227     if (list_empty(chain_list)) {
1228         v3_htable_iter_remove(ctxt_iter, 0);
1229         (tm->addr_ctxt_entries)--;
1230     } else {
1231         v3_htable_iter_advance(ctxt_iter);
1232     }
1233
1234     /* give the CPU away NONONO NEVER YIELD WHILE HOLDING A LOCK */
1235     //V3_Yield();
1236
1237     return 0;
1238 }
1239
1240
1241 int
1242 tm_hash_gc (struct v3_trans_mem * tm)
1243 {
1244     addr_t irqstate, irqstate2;
1245     int rc = 0;
1246     uint64_t begin_time, end_time, tmoff;
1247     uint64_t * lt_copy;
1248     struct v3_tm_state * tms = NULL;
1249     struct hashtable_iter * ctxt_iter = NULL;
1250
1251     tms = (struct v3_tm_state *)v3_get_extension_state(tm->ginfo->vm_info, "trans_mem");
1252     if (!tms) {
1253         TM_ERR(tm->ginfo,GC,"could not alloc tms\n");
1254         return -1;
1255     }
1256
1257     TM_DBG(tm->ginfo,GC,"beginning garbage collection\n");
1258     TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (pre)\n", (int)v3_htable_count(tm->addr_ctxt));
1259     TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (pre)\n", (int)v3_htable_count(tm->access_type));
1260
1261     tmoff = (tms->cores_active == 0);
1262
1263     lt_copy = V3_Malloc(sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1264     if (!lt_copy) {
1265         TM_ERR(tm->ginfo,GC,"Could not allocate space for lt_copy\n");
1266         return -1;
1267     }
1268
1269     memset(lt_copy, 0, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1270
1271     rdtscll(begin_time);
1272
1273     /* lt_copy holds the last transaction number for each core */
1274     irqstate = v3_lock_irqsave(tm_global_state->lock);
1275     memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1276     v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1277
1278     /* lock both hashes */
1279     irqstate = v3_lock_irqsave(tm->addr_ctxt_lock);
1280     irqstate2 = v3_lock_irqsave(tm->access_type_lock);
1281
1282     /* loop over hash entries in addr_ctxt */
1283     ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1284     if (!ctxt_iter) {
1285         TM_ERR(tm->ginfo,GC,"could not create htable iterator\n");
1286         rc = -1;
1287         goto out;
1288     }
1289
1290     /* we check each address stored in the hash */
1291     while (ctxt_iter->entry) {
1292         /* NOTE: this call advances the hash iterator */
1293         if (tm_collect_all_contexts(tm, ctxt_iter, tmoff, lt_copy, &begin_time, &end_time) == -1) {
1294             rc = -1;
1295             goto out1;
1296         }
1297     }
1298
1299 out1:
1300     v3_destroy_htable_iter(ctxt_iter);
1301 out:
1302     V3_Free(lt_copy);
1303     v3_unlock_irqrestore(tm->access_type_lock, irqstate);
1304     v3_unlock_irqrestore(tm->addr_ctxt_lock, irqstate2);
1305
1306     rdtscll(end_time);
1307
1308     if (rc == -1) {
1309         TM_ERR(tm->ginfo,GC,"garbage collection failed, time spent: %d cycles\n", (int)(end_time - begin_time));
1310     } else {
1311         TM_DBG(tm->ginfo,GC,"ended garbage collection succesfuly, time spent: %d cycles\n", (int)(end_time - begin_time));
1312     }
1313
1314     TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (post)\n", (int)v3_htable_count(tm->addr_ctxt));
1315     TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (post)\n", (int)v3_htable_count(tm->access_type));
1316
1317     return rc;
1318 }
1319
1320
1321 /* TODO: break out the for loops in these functions */
1322 static int
1323 tm_update_ctxt_list (struct v3_trans_mem * tm,
1324                      uint64_t * lt_copy,
1325                      addr_t gva,
1326                      uint8_t write,
1327                      struct list_head * hash_list)
1328 {
1329     struct hash_chain * curr = NULL;
1330     struct hash_chain * tmp  = NULL;
1331     uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1332     uint64_t core_id;
1333     uint_t new_le = 1;
1334     uint_t new_e;
1335
1336     list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1337         uint_t i;
1338         uint8_t same = 1;
1339
1340         for (i = 0; i < num_cores; i++) {
1341             if (curr->curr_lt[i] != lt_copy[i]) {
1342                 same = 0;
1343                 break;
1344             }
1345         }
1346
1347         if (same) {
1348             new_le = 0;
1349             break;
1350         }
1351
1352     }
1353
1354     if (new_le) {
1355         struct hash_chain * new_l = V3_Malloc(sizeof(struct hash_chain));
1356
1357         if (!new_l) {
1358             TM_ERR(tm->ginfo,HASH,"Could not allocate new list\n");
1359             return -1;
1360         }
1361
1362         memset(new_l, 0, sizeof(struct hash_chain));
1363
1364         new_l->curr_lt = lt_copy;
1365
1366         list_add_tail(&(new_l->lt_node), hash_list);
1367     }
1368
1369     for (core_id = 0; core_id < num_cores; core_id++) {
1370         struct v3_tm_access_type * type;
1371         struct v3_ctxt_tuple tup;
1372         tup.gva     = (void*)gva;
1373         tup.core_id = (void*)core_id;
1374         tup.core_lt = (void*)lt_copy[core_id];
1375         addr_t key;
1376
1377         key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1378
1379         new_e = 0;
1380
1381         type = (struct v3_tm_access_type *)HTABLE_SEARCH(tm->access_type, key);
1382
1383         if (!type) {
1384             // no entry yet
1385             new_e = 1;
1386             type = V3_Malloc(sizeof(struct v3_tm_access_type));
1387
1388             if (!type) {
1389                 TM_ERR(tm->ginfo,HASH,"could not allocate type access struct\n");
1390                 return -1;
1391             }
1392         }
1393
1394         if (write) {
1395             type->w = 1;
1396         } else {
1397             type->r = 1;
1398         }
1399
1400         if (new_e) {
1401             if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1402                 TM_ERR(tm->ginfo,HASH,"problem inserting new mem access in htable\n");
1403                 return -1;
1404             }
1405             (tm->access_type_entries)++;
1406         }
1407     }
1408
1409     return 0;
1410 }
1411
1412
1413 /* no entry in addr-ctxt yet, create one */
1414 static int
1415 tm_create_ctxt_key (struct v3_trans_mem * tm,
1416                     uint64_t * lt_copy,
1417                     addr_t gva,
1418                     uint8_t write)
1419 {
1420     struct list_head * hash_list = NULL;
1421     struct hash_chain * new_l = NULL;
1422     uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1423
1424     hash_list = (struct list_head *)V3_Malloc(sizeof(struct list_head));
1425
1426     if (!hash_list) {
1427         TM_ERR(tm->ginfo,HASH,"Problem allocating hash_list\n");
1428         return -1;
1429     }
1430
1431     INIT_LIST_HEAD(hash_list);
1432
1433     new_l = V3_Malloc(sizeof(struct hash_chain));
1434
1435     if (!new_l) {
1436         TM_ERR(tm->ginfo,HASH,"Problem allocating hash_chain\n");
1437         goto out_err;
1438     }
1439
1440     memset(new_l, 0, sizeof(struct hash_chain));
1441
1442     new_l->curr_lt = lt_copy;
1443
1444     /* add the context to the hash chain */
1445     list_add_tail(&(new_l->lt_node), hash_list);
1446
1447     if (!(HTABLE_INSERT(tm->addr_ctxt, gva, hash_list))) {
1448         TM_ERR(tm->ginfo,HASH CHAIN,"problem inserting new chain into hash\n");
1449         goto out_err1;
1450     }
1451
1452     (tm->addr_ctxt_entries)++;
1453
1454     uint64_t core_id;
1455     /* TODO: we need a way to unwind and deallocate for all cores on failure here */
1456     for (core_id = 0; core_id < num_cores; core_id++) {
1457         struct v3_tm_access_type * type = NULL;
1458         struct v3_ctxt_tuple tup;
1459         tup.gva     = (void*)gva;
1460         tup.core_id = (void*)core_id;
1461         tup.core_lt = (void*)lt_copy[core_id];
1462         addr_t key;
1463
1464         type = V3_Malloc(sizeof(struct v3_tm_access_type));
1465
1466         if (!type) {
1467             TM_ERR(tm->ginfo,HASH,"could not allocate access type struct\n");
1468             goto out_err1;
1469         }
1470
1471         if (write) {
1472             type->w = 1;
1473         } else {
1474             type->r = 1;
1475         }
1476
1477         key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1478
1479         if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1480             TM_ERR(tm->ginfo,HASH,"TM: problem inserting new mem access in htable\n");
1481             goto out_err1;
1482         }
1483         (tm->access_type_entries)++;
1484     }
1485
1486     return 0;
1487
1488 out_err1:
1489     list_del(&(new_l->lt_node));
1490     V3_Free(new_l);
1491 out_err:
1492     V3_Free(hash_list);
1493     return -1;
1494 }
1495
1496
1497 /* entry points:
1498  *
1499  * called during MIME execution
1500  * record memory access in conflict logs
1501  *   this locks the table during insertion
1502  */
1503 int
1504 tm_record_access (struct  v3_trans_mem * tm,
1505                   uint8_t write,
1506                   addr_t  gva)
1507 {
1508     uint64_t * lt_copy;
1509     struct list_head * hash_list;
1510     addr_t irqstate;
1511     uint64_t num_cores;
1512
1513     num_cores = tm->ginfo->vm_info->num_cores;
1514
1515     TM_DBG(tm->ginfo,REC,"recording addr %llx, addr-ctxt.cnt = %d, access-type.cnt = %d\n", (uint64_t)gva,
1516                                         (int)v3_htable_count(tm->addr_ctxt), (int)v3_htable_count(tm->access_type));
1517     //PrintDebug(tm->ginfo->vm_info, tm->ginfo,"\tWe think that addr-ctxt.cnt = %d, access-type.cnt = %d\n",(int)tm->addr_ctxt_entries,(int)tm->access_type_entries);
1518
1519     lt_copy = V3_Malloc(sizeof(uint64_t)*num_cores);
1520     if (!lt_copy) {
1521         TM_ERR(tm->ginfo,REC,"Allocating array failed\n");
1522         return -1;
1523     }
1524
1525     memset(lt_copy, 0, sizeof(uint64_t)*num_cores);
1526
1527     irqstate = v3_lock_irqsave(tm_global_state->lock);
1528     memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*num_cores);
1529     v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1530
1531     if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1532         /* we haven't created a context list for this address yet, go do it */
1533         return tm_create_ctxt_key(tm, lt_copy, gva, write);
1534
1535     } else {
1536         /* we have a context list for this addres already, do we need to create a new context? */
1537         return tm_update_ctxt_list(tm, lt_copy, gva, write, hash_list);
1538     }
1539
1540     return 0;
1541 }
1542
1543
1544 static void
1545 tm_prepare_cpuid (struct v3_vm_info * vm)
1546 {
1547
1548     V3_Print(vm, VCORE_NONE, "TM INIT | enabling RTM cap in CPUID\n");
1549
1550     /* increase max CPUID function to 7 (extended feature flags enumeration) */
1551     v3_cpuid_add_fields(vm,0x0,
1552             0xf, 0x7,
1553             0, 0,
1554             0, 0,
1555             0, 0);
1556
1557
1558     /* do the same for AMD */
1559     v3_cpuid_add_fields(vm,0x80000000,
1560             0xffffffff, 0x80000007,
1561             0, 0,
1562             0, 0,
1563             0, 0);
1564
1565
1566     /* enable RTM (CPUID.07H.EBX.RTM = 1) */
1567     v3_cpuid_add_fields(vm, 0x07, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1568     v3_cpuid_add_fields(vm, 0x80000007, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1569 }
1570
1571
1572 static int
1573 init_trans_mem (struct v3_vm_info * vm,
1574                 v3_cfg_tree_t * cfg,
1575                 void ** priv_data)
1576 {
1577     struct v3_tm_state * tms;
1578
1579     PrintDebug(vm, VCORE_NONE, "Trans Mem. Init\n");
1580
1581     tms = V3_Malloc(sizeof(struct v3_tm_state));
1582     if (!tms) {
1583         PrintError(vm, VCORE_NONE, "Problem allocating v3_tm_state\n");
1584         return -1;
1585     }
1586
1587     memset(tms, 0, sizeof(struct v3_tm_state));
1588
1589     if (v3_register_hypercall(vm, TM_KICKBACK_CALL, tm_handle_hcall, NULL) == -1) {
1590       PrintError(vm, VCORE_NONE, "TM could not register hypercall\n");
1591       goto out_err;
1592     }
1593
1594     v3_lock_init(&(tms->lock));
1595
1596     tms->TM_MODE      = TM_OFF;
1597     tms->cores_active = 0;
1598
1599     uint64_t * lt = V3_Malloc(sizeof(uint64_t) * vm->num_cores);
1600     if (!lt) {
1601         PrintError(vm, VCORE_NONE, "Problem allocating last_trans array\n");
1602         goto out_err1;
1603     }
1604
1605     memset(lt, 0, sizeof(uint64_t) * vm->num_cores);
1606
1607     int i;
1608     for (i = 0; i < vm->num_cores; i++) {
1609         lt[i] = 0;
1610     }
1611
1612     tms->last_trans = lt;
1613
1614     *priv_data = tms;
1615     tm_global_state = tms;
1616
1617     tm_prepare_cpuid(vm);
1618
1619     return 0;
1620
1621 out_err1:
1622     v3_lock_deinit(&(tms->lock));
1623     v3_remove_hypercall(vm, TM_KICKBACK_CALL);
1624 out_err:
1625     V3_Free(tms);
1626     return -1;
1627 }
1628
1629
1630 static int
1631 init_trans_mem_core (struct guest_info * core,
1632                      void * priv_data,
1633                      void ** core_data)
1634 {
1635     struct v3_trans_mem * tm = V3_Malloc(sizeof(struct v3_trans_mem));
1636
1637     TM_DBG(core,INIT, "Trans Mem. Core Init\n");
1638
1639     if (!tm) {
1640         TM_ERR(core,INIT, "Problem allocating TM state\n");
1641         return -1;
1642     }
1643
1644     memset(tm, 0, sizeof(struct v3_trans_mem));
1645
1646     INIT_LIST_HEAD(&tm->trans_r_list);
1647     INIT_LIST_HEAD(&tm->trans_w_list);
1648
1649     tm->addr_ctxt  = v3_create_htable(0, tm_hash_fn, tm_eq_fn);
1650     if (!(tm->addr_ctxt)) {
1651         TM_ERR(core,INIT,"problem creating addr_ctxt\n");
1652         goto out_err;
1653     }
1654
1655     tm->access_type = v3_create_htable(0, tm_hash_buf_fn, tm_eq_buf_fn);
1656     if (!(tm->access_type)) {
1657         TM_ERR(core,INIT,"problem creating access_type\n");
1658         goto out_err1;
1659     }
1660
1661     v3_lock_init(&(tm->addr_ctxt_lock));
1662     v3_lock_init(&(tm->access_type_lock));
1663
1664     tm->TM_STATE = TM_NULL;
1665     tm->TM_MODE  = TM_OFF;
1666     tm->TM_ABORT = 0;
1667     tm->ginfo    = core;
1668     tm->t_num = 0;
1669     tm->to_branch = 0;
1670     tm->offset = 0;
1671     tm->access_type_entries = 0;
1672     tm->addr_ctxt_entries = 0;
1673     tm->dirty_instr_flag = 0;
1674
1675     /* TODO: Cache Model */
1676     //tm->box = (struct cache_box *)V3_Malloc(sizeof(struct cache_box *));
1677     //tm->box->init = init_cache;
1678     //tm->box->init(sample_spec, tm->box);
1679
1680     *core_data = tm;
1681
1682     return 0;
1683
1684 out_err1:
1685     v3_free_htable(tm->addr_ctxt, 0, 0);
1686 out_err:
1687     V3_Free(tm);
1688     return -1;
1689 }
1690
1691
1692 static int
1693 deinit_trans_mem (struct v3_vm_info * vm, void * priv_data)
1694 {
1695     struct v3_tm_state * tms = (struct v3_tm_state *)priv_data;
1696
1697     if (v3_remove_hypercall(vm, TM_KICKBACK_CALL) == -1) {
1698         PrintError(vm, VCORE_NONE, "Problem removing TM hypercall\n");
1699         return -1;
1700     }
1701
1702     v3_lock_deinit(&(tms->lock));
1703
1704     if (tms) {
1705         V3_Free(tms);
1706     }
1707
1708     return 0;
1709 }
1710
1711
1712 static int
1713 deinit_trans_mem_core (struct guest_info * core,
1714                        void * priv_data,
1715                        void * core_data)
1716 {
1717     struct v3_trans_mem * tm = (struct v3_trans_mem *)core_data;
1718     struct hashtable_iter * ctxt_iter = NULL;
1719
1720     v3_clear_tm_lists(tm);
1721
1722     if (tm->staging_page) {
1723         TM_ERR(core,DEINIT CORE,"WARNING: staging page not freed!\n");
1724     }
1725
1726     ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1727     if (!ctxt_iter) {
1728         TM_DBG(core,DEINIT_CORE,"could not create htable iterator\n");
1729         return -1;
1730     }
1731
1732     /* delete all context entries for each hashed address */
1733     while (ctxt_iter->entry) {
1734         struct hash_chain * tmp;
1735         struct hash_chain * curr;
1736         struct list_head * chain_list;
1737         addr_t gva;
1738
1739         gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1740         chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1741
1742         /* delete the context */
1743         list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1744             tm_del_stale_ctxt(curr);
1745         }
1746
1747         v3_htable_iter_advance(ctxt_iter);
1748     }
1749
1750     v3_destroy_htable_iter(ctxt_iter);
1751
1752     /* we've already deleted the values in this one */
1753     v3_free_htable(tm->addr_ctxt, 0, 0);
1754
1755     /* KCH WARNING: we may not want to free access type values here */
1756     v3_free_htable(tm->access_type, 1, 0);
1757
1758     v3_lock_deinit(&(tm->addr_ctxt_lock));
1759     v3_lock_deinit(&(tm->access_type_lock));
1760
1761     if (tm) {
1762         V3_Free(tm);
1763     }
1764
1765     return 0;
1766 }
1767
1768
1769 static struct v3_extension_impl trans_mem_impl = {
1770     .name = "trans_mem",
1771     .init = NULL,
1772     .vm_init = init_trans_mem,
1773     .vm_deinit = deinit_trans_mem,
1774     .core_init = init_trans_mem_core,
1775     .core_deinit = deinit_trans_mem_core,
1776     .on_entry = NULL,
1777     .on_exit = NULL
1778 };
1779
1780 register_extension(&trans_mem_impl);
1781
1782
1783 /* entry conditions
1784  * tms->on => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1785  * tms->off => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1786  */
1787 static int
1788 tm_handle_xend (struct guest_info * core,
1789                 struct v3_trans_mem * tm)
1790 {
1791     rdtscll(tm->exit_time);
1792
1793     // Error checking! make sure that we have gotten here in a legitimate manner
1794     if (tm->TM_MODE != TM_ON) {
1795         TM_ERR(core, UD, "Encountered XEND while not in a transactional region\n");
1796         v3_free_staging_page(tm);
1797         v3_clr_vtlb(core);
1798         v3_clear_tm_lists(tm);
1799         v3_raise_exception(core, UD_EXCEPTION);
1800         return 0;
1801     }
1802
1803     /* Our transaction finished! */
1804     /* Copy over data from the staging page */
1805     TM_DBG(core, UD,"Copying data from our staging page back into 'real' memory\n");
1806
1807     if (commit_list(core, tm) == -1) {
1808         TM_ERR(core,UD,"error commiting tm list to memory\n");
1809         return -1;
1810     }
1811
1812     TM_DBG(core,UD,"Freeing staging page and internal data structures\n");
1813
1814     // Free the staging page
1815     if (v3_free_staging_page(tm) == -1) {
1816         TM_ERR(core,XEND,"couldnt free staging page\n");
1817         return -1;
1818     }
1819
1820     // clear vtlb, as it may still contain our staging page
1821     if (v3_clr_vtlb(core) == -1) {
1822         TM_ERR(core,XEND,"couldnt clear vtlb\n");
1823         return -1;
1824     }
1825
1826     // Clear the lists
1827     v3_clear_tm_lists(tm);
1828
1829     /* Set the state and advance the RIP */
1830     TM_DBG(core,XEND,"advancing rip to %llx\n", core->rip + XEND_INSTR_LEN);
1831     core->rip += XEND_INSTR_LEN;
1832
1833     v3_clr_tm(tm);
1834
1835     // time to garbage collect
1836     v3_tm_inc_tnum(tm);
1837     if (tm_hash_gc(tm) == -1) {
1838         TM_ERR(core,XEND,"could not gc!\n");
1839         return -1;
1840     }
1841
1842     return 0;
1843 }
1844
1845
1846 /* entry conditions
1847  * tms->on => handle our abort code, handle_trans_abort will clear necessary state
1848  * tms->off => handle our abort code, handle_trans_abort will clear necessary state
1849  */
1850 static int
1851 tm_handle_xabort (struct guest_info * core,
1852                   struct v3_trans_mem * tm,
1853                   uchar_t * instr)
1854 {
1855         uint8_t reason;
1856
1857         // we must reflect the immediate back into EAX 31:24
1858         reason = *(uint8_t*)(instr+2);
1859
1860         /* TODO: this probably needs to move somewhere else */
1861         rdtscll(tm->exit_time);
1862
1863         // Error checking! make sure that we have gotten here in a legitimate manner
1864         if (tm->TM_MODE != TM_ON) {
1865             TM_DBG(core, UD, "We got here while not in a transactional core!\n");
1866             v3_raise_exception(core, UD_EXCEPTION);
1867         }
1868
1869         TM_DBG(core,UD,"aborting\n");
1870
1871         if (tm->TM_STATE != TM_NULL) {
1872             v3_restore_dirty_instr(core);
1873         }
1874
1875         // Handle the exit
1876         v3_handle_trans_abort(core, TM_ABORT_XABORT, reason);
1877
1878         return 0;
1879 }
1880
1881
1882 /* entry conditions
1883  * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1884  * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1885  */
1886 static int
1887 tm_handle_xbegin (struct guest_info * core,
1888                   struct v3_trans_mem * tm,
1889                   uchar_t * instr)
1890 {
1891     sint32_t rel_addr = 0;
1892
1893     if (tm->TM_MODE == TM_ON) {
1894         TM_ERR(core,UD,"We got here while already in a transactional region!");
1895         v3_raise_exception(core, UD_EXCEPTION);
1896     }
1897
1898     rdtscll(tm->entry_time);
1899     tm->entry_exits = core->num_exits;
1900
1901     /* set the tm_mode for this core */
1902     v3_set_tm(tm);
1903
1904     TM_DBG(core,UD,"Set the system in TM Mode, save fallback address");
1905
1906     // Save the fail_call address (first 2 bytes = opcode, last 4 = fail call addr)
1907     rel_addr = *(sint32_t*)(instr+2);
1908     tm->fail_call = core->rip + XBEGIN_INSTR_LEN + rel_addr;
1909
1910     TM_DBG(core,UD,"we set fail_call to %llx, rip is %llx, rel_addr is %x", (uint64_t)tm->fail_call,(uint64_t)core->rip,rel_addr);
1911
1912     /* flush the shadow page tables */
1913     TM_DBG(core,UD,"Throwing out the shadow table");
1914     v3_clr_vtlb(core);
1915
1916     // Increase RIP, ready to go to next instruction
1917     core->rip += XBEGIN_INSTR_LEN;
1918
1919     return 0;
1920 }
1921
1922
1923 /* entry conditions
1924  * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1925  * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1926  */
1927 static int
1928 tm_handle_xtest (struct guest_info * core,
1929                  struct v3_trans_mem * tm)
1930 {
1931     // if we are in tm mode, set zf to 0, otherwise 1
1932     if (tm->TM_MODE == TM_ON) {
1933         core->ctrl_regs.rflags &= ~(1ULL << 6);
1934     } else {
1935         core->ctrl_regs.rflags |= (1ULL << 6);
1936     }
1937
1938     core->rip += XTEST_INSTR_LEN;
1939
1940     return 0;
1941 }
1942
1943
1944 /* instructions:
1945  * XBEGIN c7 f8 rel32
1946  * XABORT c6 f8 imm8
1947  * XEND   0f 01 d5
1948  */
1949 static int
1950 tm_handle_ud (struct guest_info * core)
1951 {
1952     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
1953     uchar_t instr[INSTR_BUF_SZ];
1954     uint8_t byte1, byte2, byte3;
1955
1956     tm_read_instr(core, (addr_t)core->rip, instr, INSTR_BUF_SZ);
1957
1958     byte1 = *(uint8_t *)((addr_t)instr);
1959     byte2 = *(uint8_t *)((addr_t)instr + 1);
1960     byte3 = *(uint8_t *)((addr_t)instr + 2);
1961
1962
1963     if (byte1 == 0xc7 && byte2 == 0xf8) {  /* third byte is an immediate */
1964
1965         TM_DBG(core,UD,"Encountered Haswell-specific XBEGIN %x %x %d at %llx", byte1, byte2, byte3, (uint64_t)core->rip);
1966
1967         if (tm_handle_xbegin(core, tm, instr) == -1) {
1968             TM_ERR(core, UD, "Problem handling XBEGIN\n");
1969             return -1;
1970         }
1971
1972     } else if (byte1 == 0xc6 && byte2 == 0xf8) { /* third byte is an immediate */
1973
1974         TM_DBG(core, UD, "Encountered Haswell-specific XABORT %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1975
1976         if (tm_handle_xabort(core, tm, instr) == -1) {
1977             TM_ERR(core, UD, "Problem handling XABORT\n");
1978             return -1;
1979         }
1980
1981     } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd5) {
1982
1983         TM_DBG(core, UD, "Encountered Haswell-specific XEND %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1984
1985         if (tm_handle_xend(core, tm) == -1) {
1986             TM_ERR(core, UD, "Problem handling XEND\n");
1987             return -1;
1988         }
1989
1990
1991     } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd6) {  /* third byte is an immediate */
1992
1993         TM_DBG(core,UD,"Encountered Haswell-specific XTEST %x %x %x at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1994
1995         if (tm_handle_xtest(core, tm) == -1) {
1996             TM_ERR(core, UD, "Problem handling XTEST\n");
1997             return -1;
1998         }
1999
2000     } else {
2001
2002         /* oh no, this is still unknown, pass the error back to the guest! */
2003         TM_DBG(core,UD,"Encountered:%x %x %x\n", byte1, byte2, byte3);
2004         v3_raise_exception(core, UD_EXCEPTION);
2005     }
2006
2007     return 0;
2008 }
2009
2010
2011 int
2012 v3_tm_handle_exception (struct guest_info * info,
2013                         addr_t exit_code)
2014 {
2015     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2016
2017     if (!tm) {
2018         TM_ERR(info,ERR,"TM extension state not found\n");
2019         return -1;
2020     }
2021
2022     switch (exit_code) {
2023         /* any of these exceptions should abort current transactions */
2024         case SVM_EXIT_EXCP6:
2025             if (tm_handle_ud(info) == -1) {
2026                 return -1;
2027             }
2028             break;
2029         case SVM_EXIT_EXCP0:
2030             if (tm->TM_MODE != TM_ON) {
2031                 v3_raise_exception(info, DE_EXCEPTION);
2032             }
2033             else {
2034                 TM_DBG(info,EXCP,"aborting due to DE exception\n");
2035                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2036             }
2037             break;
2038         case SVM_EXIT_EXCP1:
2039             if (tm->TM_MODE != TM_ON) {
2040                 v3_raise_exception(info, DB_EXCEPTION);
2041             }
2042             else {
2043                 TM_DBG(info,EXCP,"aborting due to DB exception\n");
2044                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2045             }
2046             break;
2047         case SVM_EXIT_EXCP3:
2048             if (tm->TM_MODE != TM_ON) {
2049                 v3_raise_exception(info, BP_EXCEPTION);
2050             }
2051             else {
2052                 TM_DBG(info,EXCP,"aborting due to BP exception\n");
2053                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2054             }
2055             break;
2056         case SVM_EXIT_EXCP4:
2057             if (tm->TM_MODE != TM_ON) {
2058                 v3_raise_exception(info, OF_EXCEPTION);
2059             }
2060             else {
2061                 TM_DBG(info,EXCP,"aborting due to OF exception\n");
2062                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2063             }
2064             break;
2065         case SVM_EXIT_EXCP5:
2066             if (tm->TM_MODE != TM_ON) {
2067                 v3_raise_exception(info, BR_EXCEPTION);
2068             }
2069             else {
2070                 TM_DBG(info,EXCP,"aborting due to BR exception\n");
2071                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2072             }
2073             break;
2074         case SVM_EXIT_EXCP7:
2075             if (tm->TM_MODE != TM_ON) {
2076                 v3_raise_exception(info, NM_EXCEPTION);
2077             }
2078             else {
2079                 TM_DBG(info,EXCP,"aborting due to NM exception\n");
2080                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2081             }
2082             break;
2083         case SVM_EXIT_EXCP10:
2084             if (tm->TM_MODE != TM_ON) {
2085                 v3_raise_exception(info, TS_EXCEPTION);
2086             }
2087             else {
2088                 TM_DBG(info,EXCP,"aborting due to TS exception\n");
2089                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2090             }
2091             break;
2092         case SVM_EXIT_EXCP11:
2093             if (tm->TM_MODE != TM_ON) {
2094                 v3_raise_exception(info, NP_EXCEPTION);
2095             }
2096             else {
2097                 TM_DBG(info,EXCP,"aborting due to NP exception\n");
2098                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2099             }
2100             break;
2101         case SVM_EXIT_EXCP12:
2102             if (tm->TM_MODE != TM_ON) {
2103                 v3_raise_exception(info, SS_EXCEPTION);
2104             }
2105             else {
2106                 TM_DBG(info,EXCP,"aborting due to SS exception\n");
2107                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2108             }
2109             break;
2110         case SVM_EXIT_EXCP13:
2111             if (tm->TM_MODE != TM_ON) {
2112                 v3_raise_exception(info, GPF_EXCEPTION);
2113             }
2114             else {
2115                 TM_DBG(info,EXCP,"aborting due to GPF exception\n");
2116                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2117             }
2118             break;
2119         case SVM_EXIT_EXCP16:
2120             if (tm->TM_MODE != TM_ON) {
2121                 v3_raise_exception(info, MF_EXCEPTION);
2122             }
2123             else {
2124                 TM_DBG(info,EXCP,"aborting due to MF exception\n");
2125                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2126             }
2127             break;
2128         case SVM_EXIT_EXCP17:
2129             if (tm->TM_MODE != TM_ON) {
2130                 v3_raise_exception(info, AC_EXCEPTION);
2131             }
2132             else {
2133                 TM_DBG(info,EXCP,"aborting due to AC exception\n");
2134                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2135             }
2136             break;
2137         case SVM_EXIT_EXCP19:
2138             if (tm->TM_MODE != TM_ON) {
2139                 v3_raise_exception(info, XF_EXCEPTION);
2140             }
2141             else {
2142                 TM_DBG(info,EXCP,"aborting due to XF exception\n");
2143                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2144             }
2145             break;
2146
2147             TM_DBG(info,EXCP,"exception # %d\n", (int)exit_code - 0x40);
2148     }
2149
2150     return 0;
2151 }
2152
2153
2154 void
2155 v3_tm_set_excp_intercepts (vmcb_ctrl_t * ctrl_area)
2156 {
2157     ctrl_area->exceptions.de = 1; // 0  : divide by zero
2158     ctrl_area->exceptions.db = 1; // 1  : debug
2159     ctrl_area->exceptions.bp = 1; // 3  : breakpoint
2160     ctrl_area->exceptions.of = 1; // 4  : overflow
2161     ctrl_area->exceptions.br = 1; // 5  : bound range
2162     ctrl_area->exceptions.ud = 1; // 6  : undefined opcode
2163     ctrl_area->exceptions.nm = 1; // 7  : device not available
2164     ctrl_area->exceptions.ts = 1; // 10 : invalid tss
2165     ctrl_area->exceptions.np = 1; // 11 : segment not present
2166     ctrl_area->exceptions.ss = 1; // 12 : stack
2167     ctrl_area->exceptions.gp = 1; // 13 : general protection
2168     ctrl_area->exceptions.mf = 1; // 16 : x87 exception pending
2169     ctrl_area->exceptions.ac = 1; // 17 : alignment check
2170     ctrl_area->exceptions.xf = 1; // 19 : simd floating point
2171 }
2172
2173
2174 extern void v3_stgi();
2175 extern void v3_clgi();
2176
2177 /* 441-tm: if we are in TM mode, we need to check for any interrupts here,
2178  * and if there are any, need to do some aborting! Make sure not to die here
2179  * if we are already 'aborting', this results in infiloop
2180  */
2181 void
2182 v3_tm_check_intr_state (struct guest_info * info,
2183                         vmcb_ctrl_t * guest_ctrl,
2184                         vmcb_saved_state_t * guest_state)
2185
2186 {
2187     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2188
2189     if (!tm) {
2190         TM_ERR(info,INTR,"TM extension state not found\n");
2191         v3_stgi();
2192         return;
2193     }
2194
2195     /* TODO: work this in */
2196     if (0 && (tm->TM_MODE == TM_ON) &&
2197              (tm->TM_ABORT != 1)) {
2198
2199         if (guest_ctrl->guest_ctrl.V_IRQ ||
2200             guest_ctrl->EVENTINJ.valid) {
2201
2202             rdtscll(tm->exit_time);
2203             TM_DBG(info,INTR,"%lld exits happened, time delta is %lld",(info->num_exits - tm->entry_exits),(tm->entry_time - tm->exit_time));
2204
2205             // We do indeed have pending interrupts
2206             v3_stgi();
2207             TM_DBG(info,INTR,"we have a pending interrupt!\n");
2208
2209             v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2210             // Copy new RIP state into arch dependent structure
2211             guest_state->rip = info->rip;
2212             TM_DBG(info,INTR,"currently guest state rip is %llx\n",(uint64_t)guest_state->rip);
2213             v3_clgi();
2214         }
2215
2216     }
2217
2218 }
2219
2220
2221 int
2222 v3_tm_handle_pf_64 (struct guest_info * info,
2223                     pf_error_t error_code,
2224                     addr_t fault_addr,
2225                     addr_t * page_to_use)
2226 {
2227     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2228     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2229
2230     if (!tm) {
2231         TM_ERR(info,HANDLE_PF, "couldn't get tm core state\n");
2232         return -1;
2233     }
2234
2235     if (!tms) {
2236         TM_ERR(info,HANDLE_PF, "couldn't get tm global state\n");
2237         return -1;
2238     }
2239
2240     if ((tms->TM_MODE == TM_ON) &&
2241             (error_code.user == 1)) {
2242
2243         TM_DBG(info,PF,"Core reporting in, got a #PF (tms->mode is %d)\n", tms->TM_MODE);
2244
2245         *page_to_use = v3_handle_trans_mem_fault(info, fault_addr,  error_code);
2246
2247         if (*page_to_use == ERR_TRANS_FAULT_FAIL){
2248             TM_ERR(info,HANDLE_PF, "could not handle transaction page fault\n");
2249             return -1;
2250         }
2251
2252         if ((tm->TM_MODE == TM_ON) &&
2253                 (tm->staging_page == NULL)) {
2254
2255             tm->staging_page = V3_AllocPages(1);
2256
2257             if (!(tm->staging_page)) {
2258                 TM_ERR(info,MMU,"Problem allocating staging page\n");
2259                 return -1;
2260             }
2261
2262             TM_DBG(info,MMU,"Created staging page at %p\n", (void *)tm->staging_page);
2263         }
2264     }
2265
2266     return 0;
2267 }
2268
2269
2270 void
2271 v3_tm_handle_usr_tlb_miss (struct guest_info * info,
2272                            pf_error_t error_code,
2273                            addr_t page_to_use,
2274                            addr_t * shadow_pa)
2275 {
2276     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2277
2278     /* TLB miss from user */
2279     if ((tm->TM_MODE == TM_ON) &&
2280             (error_code.user == 1)) {
2281
2282         if (page_to_use > TRANS_FAULT_OK) {
2283             TM_DBG(info,MMU, "Using alternate page at: %llx\n", (uint64_t)page_to_use);
2284             *shadow_pa = page_to_use;
2285         }
2286
2287     }
2288
2289 }
2290
2291
2292 void
2293 v3_tm_handle_read_fault (struct guest_info * info,
2294                          pf_error_t error_code,
2295                          pte64_t * shadow_pte)
2296 {
2297     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2298     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2299
2300     // If we are about to read, make it read only
2301     if ((tms->TM_MODE == TM_ON) &&
2302         (tm->TM_STATE == TM_EXEC) &&
2303         (error_code.write == 0) &&
2304         (error_code.user == 1)) {
2305
2306         TM_DBG(info,MMU, "Flagging the page read only\n");
2307         shadow_pte->writable = 0;
2308     }
2309 }
2310
2311
2312 int
2313 v3_tm_decode_rtm_instrs (struct guest_info * info,
2314                          addr_t instr_ptr,
2315                          struct x86_instr * instr)
2316 {
2317     uint8_t byte1, byte2, byte3;
2318     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2319
2320     if (tm->TM_MODE == TM_ON) {
2321
2322         byte1 = *(uint8_t *)(instr_ptr);
2323         byte2 = *(uint8_t *)(instr_ptr + 1);
2324         byte3 = *(uint8_t *)(instr_ptr + 2);
2325
2326         if (byte1 == 0xc7 &&
2327             byte2 == 0xf8) {  /* third byte is an immediate */
2328
2329             TM_DBG(info, DECODE,"Decoding XBEGIN %x %x %d\n", byte1, byte2, byte3);
2330             instr->instr_length = 6;
2331             return 0;
2332
2333         } else if (byte1 == 0xc6 &&
2334                    byte2 == 0xf8) { /* third byte is an immediate */
2335
2336             TM_DBG(info, DECODE, "Decoding XABORT %x %x %d\n", byte1, byte2, byte3);
2337             instr->instr_length = 3;
2338             return 0;
2339
2340         } else if (byte1 == 0x0f &&
2341                    byte2 == 0x01 &&
2342                    byte3 == 0xd5) {
2343
2344             TM_DBG(info, DECODE, "Decoding XEND %x %x %x\n", byte1, byte2, byte3);
2345             instr->instr_length = 3;
2346             return 0;
2347
2348         }
2349
2350     }
2351
2352     return 0;
2353 }
2354
2355