palacios/src/extensions/ext_trans_mem.c

   1 /*
   2  * This file is part of the Palacios Virtual Machine Monitor developed
   3  * by the V3VEE Project with funding from the United States National
   4  * Science Foundation and the Department of Energy.
   5  *
   6  * The V3VEE Project is a joint project between Northwestern University
   7  * and the University of New Mexico.  You can find out more at
   8  * http://www.v3vee.org
   9  *
  10  * Copyright (c) 2012, NWU EECS 441 Transactional Memory Team
  11  * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org>
  12  * All rights reserved.
  13  *
  14  * Author:  Maciek Swiech <dotpyfe@u.northwestern.edu>
  15  *          Kyle C. Hale <kh@u.northwestern.edu>
  16  *          Marcel Flores <marcel-flores@u.northwestern.edu>
  17  *          Zachary Bischof <zbischof@u.northwestern.edu>
  18  *
  19  *
  20  * This is free software.  You are permitted to use,
  21  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  22  */
  23
  24 #include <palacios/vmm_mem.h>
  25 #include <palacios/vmm.h>
  26 #include <palacios/vmcb.h>
  27 #include <palacios/vmm_decoder.h>
  28 #include <palacios/vm_guest_mem.h>
  29 #include <palacios/vmm_ctrl_regs.h>
  30 #include <palacios/vmm_paging.h>
  31 #include <palacios/vmm_direct_paging.h>
  32 #include <palacios/svm.h>
  33 #include <palacios/svm_handler.h>
  34 #include <palacios/vmm_excp.h>
  35 #include <palacios/vmm_extensions.h>
  36 #include <palacios/vmm_sprintf.h>
  37 #include <palacios/vmm_hashtable.h>
  38
  39 #include <extensions/trans_mem.h>
  40 #include <extensions/tm_util.h>
  41
  42 #if !V3_CONFIG_DEBUG_TM_FUNC
  43 #undef PrintDebug
  44 #define PrintDebug(fmt, args...)
  45 #endif
  46
  47 /* TODO LIST:
  48  * - save/restore register state on XBEGIN/XABORT
  49  * - put status codes in RAX
  50  * - Implement proper exceptions for failed XBEGINS etc.
  51  */
  52
  53 /* this includes a mov to rax */
  54 static const char * vmmcall_bytes = "\x48\xc7\xc0\x37\x13\x00\x00\x0f\x01\xd9";
  55 static struct v3_tm_state * tm_global_state = NULL;
  56
  57
  58 static void
  59 tm_translate_rip (struct guest_info * core, addr_t * target)
  60 {
  61
  62     if (core->mem_mode == PHYSICAL_MEM) {
  63         v3_gpa_to_hva(core,
  64                 get_addr_linear(core, core->rip, &(core->segments.cs)),
  65                 target);
  66     } else if (core->mem_mode == VIRTUAL_MEM) {
  67         v3_gva_to_hva(core,
  68                 get_addr_linear(core, core->rip, &(core->segments.cs)),
  69                 target);
  70     }
  71
  72 }
  73
  74
  75 static void
  76 tm_read_instr (struct guest_info * core,
  77                            addr_t addr,
  78                            uchar_t * dst,
  79                            uint_t size)
  80 {
  81
  82     if (core->mem_mode == PHYSICAL_MEM) {
  83         v3_read_gpa_memory(core,
  84                 get_addr_linear(core, addr , &(core->segments.cs)),
  85                 size,
  86                 dst);
  87
  88     } else {
  89        v3_read_gva_memory(core,
  90                 get_addr_linear(core, addr, &(core->segments.cs)),
  91                 size,
  92                 dst);
  93     }
  94
  95 }
  96
  97
  98 static int
  99 tm_handle_decode_fail (struct guest_info * core)
 100 {
 101     addr_t cur_rip;
 102     uint_t core_num;
 103
 104     tm_translate_rip(core, &cur_rip);
 105
 106 #ifdef V3_CONFIG_DEBUG_TM_FUNC
 107     v3_dump_mem((uint8_t *)cur_rip, INSTR_BUF_SZ);
 108 #endif
 109
 110     /* If we can't decode an instruction, we treat it as a catastrophic event, aborting *everyone* */
 111     for (core_num = 0; core_num < core->vm_info->num_cores; core_num++ ) {
 112         struct v3_trans_mem * remote_tm;
 113
 114         /* skip local core */
 115         if (core_num == core->vcpu_id) {
 116             continue;
 117         }
 118
 119         remote_tm = v3_get_ext_core_state(&(core->vm_info->cores[core_num]), "trans_mem");
 120         if (!remote_tm) {
 121             TM_ERR(core,DECODE,"couldnt get remote_tm\n");
 122             return -1;
 123         }
 124
 125         /* skip cores who aren't in transacitonal context */
 126         if (remote_tm->TM_MODE == TM_OFF) {
 127             continue;
 128         }
 129
 130         TM_DBG(core,DECODE,"setting abort for core %d due to decoding error\n", core_num);
 131         remote_tm->TM_ABORT = 1;
 132     }
 133
 134     return 0;
 135 }
 136
 137
 138 /* special casing for control-flow instructions
 139  * returns 1 if we need to jump
 140  * returns -1 on error
 141  */
 142 static int
 143 tm_handle_ctrl_flow (struct guest_info * core,
 144                                  struct v3_trans_mem * tm,
 145                                  addr_t * instr_location,
 146                                  struct x86_instr * struct_instr)
 147
 148 {
 149     /* special casing for control flow instructions */
 150     struct rflags * flags = (struct rflags *)&(core->ctrl_regs.rflags);
 151     addr_t offset;
 152     int to_jmp = 0;
 153
 154     switch (struct_instr->op_type) {
 155
 156         case V3_OP_JLE:
 157             TM_DBG(core,DECODE, "!!++ JLE\n");
 158             to_jmp = (flags->zf || flags->sf != flags->of);
 159             offset = struct_instr->dst_operand.operand;
 160
 161             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 162             tm->offset = offset;
 163             tm->to_branch = to_jmp;
 164             break;
 165         case V3_OP_JAE:
 166             TM_DBG(core,DECODE,"!!++ JAE\n");
 167             to_jmp = (flags->cf == 0);
 168             offset = struct_instr->dst_operand.operand;
 169
 170             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 171             tm->offset = offset;
 172             tm->to_branch = to_jmp;
 173             break;
 174         case V3_OP_JMP:
 175             TM_DBG(core,DECODE,"!!++ JMP\n");
 176             to_jmp = 1;
 177             offset = struct_instr->dst_operand.operand;
 178
 179             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 180             tm->offset = offset;
 181             tm->to_branch = to_jmp;
 182             break;
 183         case V3_OP_JNZ:
 184             TM_DBG(core,DECODE,"!!++ JNZ\n");
 185             to_jmp = (flags->zf == 0);
 186             offset = struct_instr->dst_operand.operand;
 187
 188             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 189             tm->offset = offset;
 190             tm->to_branch = to_jmp;
 191             break;
 192         case V3_OP_JL:
 193             TM_DBG(core,DECODE,"!!++ JL\n");
 194             to_jmp = (flags->sf != flags->of);
 195             offset = struct_instr->dst_operand.operand;
 196
 197             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 198             tm->offset = offset;
 199             tm->to_branch = to_jmp;
 200             break;
 201         case V3_OP_JNS:
 202             TM_DBG(core,DECODE,"!!++ JNS\n");
 203             to_jmp = (flags->sf == 0);
 204             offset = struct_instr->dst_operand.operand;
 205
 206             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 207             tm->offset = offset;
 208             tm->to_branch = to_jmp;
 209             break;
 210         default:
 211             *instr_location = core->rip + tm->cur_instr_len;
 212             break;
 213     }
 214     return to_jmp;
 215 }
 216
 217
 218 /* entry points :
 219  *
 220  * called inside #UD and VMMCALL handlers
 221  * only affects global state in case of quix86 fall over
 222  *  -> set other cores TM_ABORT to 1, return -2
 223  */
 224 static int
 225 v3_store_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
 226 {
 227     struct x86_instr struct_instr;
 228     uchar_t cur_instr[INSTR_BUF_SZ];
 229     addr_t  instr_location;
 230
 231     // Fetch the current instruction
 232     tm_read_instr(core, core->rip, cur_instr, INSTR_BUF_SZ);
 233
 234     TM_DBG(core,STORE,"storing next instruction, current rip: %llx\n", (uint64_t)core->rip);
 235
 236     /* Attempt to decode current instruction to determine its length */
 237     if (v3_decode(core, (addr_t)cur_instr, &struct_instr) == ERR_DECODE_FAIL) {
 238
 239         TM_ERR(core,Error,"Could not decode currrent instruction (at %llx)\n", (uint64_t)core->rip);
 240
 241         /* this will attempt to abort all the remote cores */
 242         if (tm_handle_decode_fail(core) == -1) {
 243             TM_ERR(core,Error,"Could not handle failed decode\n");
 244             return -1;
 245         }
 246
 247         /* we need to trigger a local abort */
 248         return ERR_STORE_MUST_ABORT;
 249     }
 250
 251
 252     /* we can't currently handle REP prefixes, abort */
 253     if (struct_instr.op_type != V3_INVALID_OP &&
 254             (struct_instr.prefixes.repne ||
 255              struct_instr.prefixes.repnz ||
 256              struct_instr.prefixes.rep   ||
 257              struct_instr.prefixes.repe  ||
 258              struct_instr.prefixes.repz)) {
 259
 260         TM_ERR(core,DECODE,"Encountered REP prefix, aborting\n");
 261         return ERR_STORE_MUST_ABORT;
 262     }
 263
 264     tm->cur_instr_len = struct_instr.instr_length;
 265
 266     /* handle jump instructions */
 267     tm_handle_ctrl_flow(core, tm, &instr_location, &struct_instr);
 268
 269     /* save next 10 bytes after current instruction, we'll put vmmcall here */
 270     tm_read_instr(core, instr_location, cur_instr, INSTR_INJECT_LEN);
 271
 272     /* store the next instruction and its length in info */
 273     memcpy(tm->dirty_instr, cur_instr, INSTR_INJECT_LEN);
 274
 275     return 0;
 276 }
 277
 278
 279 static int
 280 v3_overwrite_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
 281 {
 282     addr_t ptr;
 283
 284     // save rax
 285     tm->clobbered_rax = (core->vm_regs).rax;
 286
 287     ptr = core->rip;
 288
 289     /* we can't currently handle instructions that span page boundaries */
 290     if ((ptr + tm->cur_instr_len) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
 291         TM_ERR(core,OVERWRITE,"emulated instr straddling page boundary\n");
 292         return -1;
 293     }
 294
 295     ptr = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
 296
 297     if ((ptr + INSTR_INJECT_LEN) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
 298         TM_ERR(core,OVERWRITE,"injected instr straddling page boundary\n");
 299         return -1;
 300     }
 301
 302     if (v3_gva_to_hva(core,
 303                 get_addr_linear(core, ptr, &(core->segments.cs)),
 304                 &ptr) == -1) {
 305
 306         TM_ERR(core,Error,"Calculating next rip hva failed\n");
 307         return -1;
 308     }
 309
 310     TM_DBG(core,REPLACE,"Replacing next instruction at addr %llx with vmm hyper call, len=%d\n",
 311             core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0), (int)tm->cur_instr_len );
 312
 313     /* Copy VMM call into the memory address of beginning of next instruction (ptr) */
 314     memcpy((char*)ptr, vmmcall_bytes, INSTR_INJECT_LEN);
 315
 316     /* KCH: flag that we've dirtied an instruction, and store its host address */
 317     tm->dirty_instr_flag = 1;
 318     tm->dirty_gva        = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
 319     tm->dirty_hva        = ptr;
 320     tm->to_branch        = 0;
 321
 322     return 0;
 323 }
 324
 325
 326 /* entry points:
 327  *
 328  * this should only be called if TM_STATE == TM_NULL, additionally we check if our dirtied flag if set
 329  */
 330 int
 331 v3_restore_dirty_instr (struct guest_info * core)
 332 {
 333     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 334
 335     /* Restore next instruction, transition to IFETCH state */
 336     TM_DBG(core,RESTORE,"Restoring next instruction.\n");
 337
 338     /* check if we've actually done an instruction overwrite */
 339     if (!(tm->dirty_instr_flag)) {
 340         TM_DBG(core,RESTORE,"nothing to restore here...\n");
 341         return 0;
 342     }
 343
 344     // Actually restore instruction
 345     memcpy((char*)tm->dirty_hva, tm->dirty_instr, INSTR_INJECT_LEN);
 346
 347     // Put rax back
 348     (core->vm_regs).rax = tm->clobbered_rax;
 349
 350     // Scoot rip back up
 351     TM_DBG(core,RESTORE,"RIP in vmmcall: %llx\n", core->rip);
 352     core->rip = tm->dirty_gva;
 353
 354     // clean up
 355     tm->dirty_instr_flag = 0;
 356     tm->dirty_gva = 0;
 357     tm->dirty_hva = 0;
 358     memset(tm->dirty_instr, 0, 15);
 359
 360     TM_DBG(core,RESTORE,"RIP after scooting it back up: %llx\n", core->rip);
 361
 362     return 0;
 363 }
 364
 365
 366 static addr_t
 367 tm_handle_fault_ifetch (struct guest_info * core,
 368                         struct v3_trans_mem * tm)
 369 {
 370     int sto;
 371
 372     TM_DBG(core,IFETCH,"Page fault caused by IFETCH: rip is the same as faulting address, we must be at an ifetch.\n");
 373
 374     sto = v3_store_next_instr(core, tm);
 375
 376     if (sto == ERR_STORE_FAIL) {
 377         TM_ERR(core,EXIT,"Could not store next instruction in transaction\n");
 378         return ERR_TRANS_FAULT_FAIL;
 379     } else if (sto == ERR_STORE_MUST_ABORT) {
 380         TM_DBG(core,EXIT,"aborting for some reason\n");
 381         v3_handle_trans_abort(core);
 382         return TRANS_FAULT_OK;
 383     }
 384
 385     if (v3_overwrite_next_instr(core, tm) == -1) {
 386         TM_ERR(core,PF,"problem overwriting instruction\n");
 387         return ERR_TRANS_FAULT_FAIL;
 388     }
 389
 390     tm->TM_STATE = TM_EXEC;
 391
 392     return TRANS_FAULT_OK;
 393 }
 394
 395
 396 static addr_t
 397 tm_handle_fault_read (struct guest_info * core,
 398                       struct v3_trans_mem * tm,
 399                       addr_t fault_addr,
 400                       pf_error_t error)
 401
 402 {
 403     // This page fault was caused by a read to memory in the current instruction for a core in TM mode
 404     TM_DBG(core,DATA,"Page fault caused by read.\n");
 405     TM_DBG(core,PF,"Adding %p to read list and hash\n", (void*)fault_addr);
 406
 407     if (add_mem_op_to_list(&(tm->trans_r_list), fault_addr) == -1) {
 408         TM_ERR(core,PF,"problem adding to list\n");
 409         return ERR_TRANS_FAULT_FAIL;
 410     }
 411
 412     if (tm_record_access(tm, error.write, fault_addr) == -1) {
 413         TM_ERR(core,PF,"problem recording access\n");
 414         return ERR_TRANS_FAULT_FAIL;
 415     }
 416
 417     /* if we have previously written to this address, we need to update our
 418      * staging page and map it in */
 419     if (list_contains_guest_addr(&(tm->trans_w_list), fault_addr)) {
 420
 421         TM_DBG(core,PF,"Saw a read from something in the write list\n");
 422
 423         /* write the value from linked list to staging page */
 424         if (stage_entry(tm, &(tm->trans_w_list), fault_addr) == -1) {
 425             TM_ERR(core,PF, "could not stage entry!\n");
 426             return ERR_TRANS_FAULT_FAIL;
 427         }
 428
 429         /* Hand it the staging page */
 430         return (addr_t)(tm->staging_page);
 431
 432     } else {
 433
 434         //Add it to the read set
 435         addr_t shadow_addr = 0;
 436
 437         TM_DBG(core,PF,"Saw a read from a fresh address\n");
 438
 439         if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
 440             TM_ERR(core,PF,"Could not translate gva to hva for transaction read\n");
 441             return ERR_TRANS_FAULT_FAIL;
 442         }
 443
 444     }
 445
 446     return TRANS_FAULT_OK;
 447 }
 448
 449
 450 static addr_t
 451 tm_handle_fault_write (struct guest_info * core,
 452                        struct v3_trans_mem * tm,
 453                        addr_t fault_addr,
 454                        pf_error_t error)
 455 {
 456         void * data_loc;
 457         addr_t virt_data_loc;
 458         addr_t shadow_addr = 0;
 459
 460         TM_DBG(core,DATA,"Page fault cause by write\n");
 461         TM_DBG(core,PF,"Adding %p to write list and hash\n", (void*)fault_addr);
 462
 463         if (add_mem_op_to_list(&(tm->trans_w_list), fault_addr) == -1) {
 464             TM_ERR(core,WRITE,"could not add to list!\n");
 465             return ERR_TRANS_FAULT_FAIL;
 466         }
 467
 468         if (tm_record_access(tm, error.write, fault_addr) == -1) {
 469             TM_ERR(core,WRITE,"could not record access!\n");
 470             return ERR_TRANS_FAULT_FAIL;
 471         }
 472
 473         if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
 474             TM_ERR(core,WRITE,"could not translate gva to hva for transaction read\n");
 475             return ERR_TRANS_FAULT_FAIL;
 476         }
 477
 478         // Copy existing values to the staging page, populating that field
 479         // This avoids errors in optimized code such as ++, where the original
 480         // value is not read, but simply incremented
 481         data_loc = (void*)((addr_t)(tm->staging_page) + (shadow_addr % PAGE_SIZE_4KB));
 482
 483         if (v3_hpa_to_hva((addr_t)(data_loc), &virt_data_loc) == -1) {
 484             TM_ERR(core,WRITE,"Could not convert address on staging page to virt addr\n");
 485             return ERR_TRANS_FAULT_FAIL;
 486         }
 487
 488         TM_DBG(core,WRITE,"\tValue being copied (core %d): %p\n", core->vcpu_id, *((void**)(virt_data_loc)));
 489         //memcpy((void*)virt_data_loc, (void*)shadow_addr, sizeof(uint64_t));
 490         *(uint64_t*)virt_data_loc = *(uint64_t*)shadow_addr;
 491
 492         return (addr_t)(tm->staging_page);
 493 }
 494
 495
 496 static addr_t
 497 tm_handle_fault_extern_ifetch (struct guest_info * core,
 498                                struct v3_trans_mem * tm,
 499                                addr_t fault_addr,
 500                                pf_error_t error)
 501 {
 502     int sto;
 503
 504     // system is in tm state, record the access
 505     TM_DBG(core,IFETCH,"Page fault caused by IFETCH: we are not in TM, recording.\n");
 506
 507     sto = v3_store_next_instr(core,tm);
 508
 509     if (sto == ERR_STORE_FAIL) {
 510         TM_ERR(core,Error,"Could not store next instruction in transaction\n");
 511         return ERR_TRANS_FAULT_FAIL;
 512
 513     } else if (sto == ERR_STORE_MUST_ABORT) {
 514         TM_ERR(core,IFETCH,"decode failed, going out of single stepping\n");
 515         v3_handle_trans_abort(core);
 516         return TRANS_FAULT_OK;
 517     }
 518
 519     if (v3_overwrite_next_instr(core, tm) == -1) {
 520         TM_ERR(core,IFETCH,"could not overwrite next instr!\n");
 521         return ERR_TRANS_FAULT_FAIL;
 522     }
 523
 524     tm->TM_STATE = TM_EXEC;
 525
 526     if (tm_record_access(tm, error.write, fault_addr) == -1) {
 527         TM_ERR(core,IFETCH,"could not record access!\n");
 528         return ERR_TRANS_FAULT_FAIL;
 529     }
 530
 531     return TRANS_FAULT_OK;
 532 }
 533
 534
 535 static addr_t
 536 tm_handle_fault_extern_access (struct guest_info * core,
 537                                struct v3_trans_mem * tm,
 538                                addr_t fault_addr,
 539                                pf_error_t error)
 540 {
 541     TM_DBG(core,PF_HANDLE,"recording access\n");
 542     if (tm_record_access(tm, error.write, fault_addr) == -1) {
 543         TM_ERR(core,PF_HANDLE,"could not record access!\n");
 544         return ERR_TRANS_FAULT_FAIL;
 545     }
 546
 547     return TRANS_FAULT_OK;
 548 }
 549
 550
 551 static addr_t
 552 tm_handle_fault_tmoff (struct guest_info * core)
 553 {
 554     TM_DBG(core,PF_HANDLE, "in pf handler but noone is in tm mode anymore (core %d), i should try to eliminate hypercalls\n", core->vcpu_id);
 555
 556     if (v3_restore_dirty_instr(core) == -1) {
 557         TM_ERR(core,PF_HANDLE,"could not restore dirty instr!\n");
 558         return ERR_TRANS_FAULT_FAIL;
 559     }
 560
 561     return TRANS_FAULT_OK;
 562 }
 563
 564
 565 /* entry points:
 566  *
 567  * called from MMU - should mean at least tms->TM_MODE is on
 568  *
 569  * tm->on : ifetch -> store instr, overwrite instr
 570  *          r/w    -> record hash, write log, store instr, overwrite instr
 571  * tm->off: ifetch -> store instr, overwrite instr
 572  *          r/w    -> record hash, store instr, overwrite instr
 573  *
 574  *          returns ERR_TRANS_FAULT_FAIL on error
 575  *          TRANS_FAULT_OK when things are fine
 576  *          addr when we're passing back a staging page
 577  *
 578  */
 579 addr_t
 580 v3_handle_trans_mem_fault (struct guest_info * core,
 581                                   addr_t fault_addr,
 582                                   pf_error_t error)
 583 {
 584     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 585     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
 586
 587     if (!tm) {
 588         TM_ERR(core,ERROR,": coudln't get core state\n");
 589         return ERR_TRANS_FAULT_FAIL;
 590     }
 591
 592     if (!tms) {
 593         TM_ERR(core,ERROR,": couldn't get vm trans_mem state\n");
 594         return ERR_TRANS_FAULT_FAIL;
 595     }
 596
 597     TM_DBG(core,PF,"PF handler core->mode : %d, system->mode : %d\n", tm->TM_MODE, tms->TM_MODE);
 598
 599     if ((tm->TM_MODE == TM_ON) &&
 600         ((void *)fault_addr == (void *)(core->rip))) {
 601
 602         return tm_handle_fault_ifetch(core, tm);
 603
 604     } else if ((tm->TM_MODE == TM_ON)    &&
 605                (tm->TM_STATE == TM_EXEC) &&
 606                (error.write == 0)) {
 607
 608         return tm_handle_fault_read(core, tm, fault_addr, error);
 609
 610     } else if ((tm->TM_MODE == TM_ON)    &&
 611                (tm->TM_STATE == TM_EXEC) &&
 612                (error.write == 1)) {
 613
 614         return tm_handle_fault_write(core, tm, fault_addr, error);
 615
 616
 617     } else if ((tms->TM_MODE == TM_ON) &&
 618               ((void *)fault_addr == (void *)(core->rip))) {
 619
 620         return tm_handle_fault_extern_ifetch(core, tm, fault_addr, error);
 621
 622     } else if ((tms->TM_MODE == TM_ON) &&
 623                (tm->TM_STATE == TM_EXEC)) {
 624
 625         return tm_handle_fault_extern_access(core, tm, fault_addr, error);
 626     } else {
 627
 628         return tm_handle_fault_tmoff(core);
 629
 630     }
 631
 632     return TRANS_FAULT_OK;
 633 }
 634
 635
 636 static int
 637 tm_handle_hcall_tmoff (struct guest_info * core, struct v3_trans_mem * tm)
 638 {
 639     if (tm->TM_MODE == TM_ON) {
 640         TM_ERR(core,EXIT,"we are in tm mode but system is not!\n");
 641         return TRANS_HCALL_FAIL;
 642     }
 643
 644     // we got to an exit when things were off!
 645     TM_DBG(core,EXIT,"system is off, restore the instruction and go away\n");
 646
 647     if (v3_restore_dirty_instr(core) == -1) {
 648         TM_ERR(core,HCALL,"could not restore dirty instr!\n");
 649         return TRANS_HCALL_FAIL;
 650     }
 651
 652     tm->TM_STATE = TM_NULL;
 653
 654     return TRANS_HCALL_OK;
 655 }
 656
 657
 658 static int
 659 tm_handle_hcall_dec_abort (struct guest_info * core,
 660                            struct v3_trans_mem * tm)
 661 {
 662     // only ever get here from TM DECODE
 663     TM_DBG(core,EXIT,"we are in ABORT, call the abort handler\n");
 664     tm->TM_ABORT = 0;
 665
 666     v3_handle_trans_abort(core);
 667
 668     TM_DBG(core,EXIT,"RIP after abort: %p\n", ((void*)(core->rip)));
 669
 670     return TRANS_HCALL_OK;
 671 }
 672
 673
 674 static int
 675 tm_handle_hcall_ifetch_start (struct guest_info * core,
 676                               struct v3_trans_mem * tm)
 677 {
 678     tm->TM_STATE = TM_IFETCH;
 679
 680     TM_DBG(core,EXIT,"VMEXIT after TM_EXEC, blast away VTLB and go into TM_IFETCH\n");
 681
 682     // Finally, invalidate the shadow page table
 683     v3_invalidate_shadow_pts(core);
 684
 685     return TRANS_HCALL_OK;
 686 }
 687
 688
 689 static int
 690 tm_check_list_conflict (struct guest_info * core,
 691                         struct v3_trans_mem * tm,
 692                         struct list_head * access_list,
 693                         v3_tm_op_t op_type)
 694 {
 695     struct mem_op * curr = NULL;
 696     struct mem_op * tmp  = NULL;
 697     int conflict = 0;
 698
 699     list_for_each_entry_safe(curr, tmp, access_list, op_node) {
 700
 701         conflict = tm_check_conflict(tm->ginfo->vm_info, curr->guest_addr, op_type, core->vcpu_id, tm->t_num);
 702
 703         if (conflict == ERR_CHECK_FAIL) {
 704
 705             TM_ERR(core,EXIT,"error checking for conflicts\n");
 706             return TRANS_HCALL_FAIL;
 707
 708         } else if (conflict == CHECK_IS_CONFLICT) {
 709
 710             TM_DBG(core,EXIT,"we have a conflict, aborting\n");
 711             v3_handle_trans_abort(core);
 712             return CHECK_MUST_ABORT;
 713
 714         }
 715
 716     }
 717
 718     return TRANS_HCALL_OK;
 719 }
 720
 721
 722 static int
 723 tm_handle_hcall_check_conflicts (struct guest_info * core,
 724                                  struct v3_trans_mem * tm)
 725 {
 726     int ret;
 727
 728     TM_DBG(core,EXIT,"still TM_ON\n");
 729     TM_DBG(core,EXIT,"checking for conflicts\n");
 730
 731     if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_w_list), OP_TYPE_WRITE)) == TRANS_HCALL_FAIL) {
 732         return TRANS_HCALL_FAIL;
 733     } else if (ret == CHECK_MUST_ABORT) {
 734         return TRANS_HCALL_OK;
 735     }
 736
 737     if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_r_list), OP_TYPE_READ)) == TRANS_HCALL_FAIL) {
 738         return TRANS_HCALL_FAIL;
 739     } else if (ret == CHECK_MUST_ABORT) {
 740         return TRANS_HCALL_OK;
 741     }
 742
 743     tm->TM_STATE = TM_IFETCH;
 744
 745     return TRANS_HCALL_OK;
 746 }
 747
 748
 749 /* trans mem hypercall handler
 750  * entry points:
 751  *
 752  * running mime (tm or tms on)
 753  *   update record log
 754  *   restore instr
 755  *   overwrite instr
 756  *   check for conflicts
 757  *   flush vtlb
 758  * abort (due to quix86)
 759  *   restore instr
 760  *   set all to abort
 761  */
 762 static int
 763 tm_handle_hcall (struct guest_info * core,
 764                  unsigned int hcall_id,
 765                  void * priv_data)
 766 {
 767     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 768     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
 769
 770     if (tms->TM_MODE == TM_OFF) {
 771         return tm_handle_hcall_tmoff(core, tm);
 772     }
 773
 774     // Previous instruction has finished, copy staging page back into linked list!
 775     if (update_list(tm, &(tm->trans_w_list)) == -1) {
 776         TM_ERR(core,HCALL,"could not update_list!\n");
 777         return TRANS_HCALL_FAIL;
 778     }
 779
 780     // Done handling previous instruction, must put back the next instruction, reset %rip and go back to IFETCH state
 781     TM_DBG(core,EXIT,"saw VMEXIT, need to restore previous state and proceed\n");
 782
 783     if (v3_restore_dirty_instr(core) == -1) {
 784         TM_ERR(core,HCALL,"could not restore dirty instr!\n");
 785         return TRANS_HCALL_FAIL;
 786     }
 787
 788     /* Check TM_STATE */
 789     if (tm->TM_ABORT == 1 &&
 790         tms->TM_MODE == TM_ON) {
 791
 792         return tm_handle_hcall_dec_abort(core, tm);
 793
 794     } else if (tm->TM_STATE == TM_EXEC) {
 795         return tm_handle_hcall_ifetch_start(core, tm);
 796     }
 797
 798     /* Check TM_MODE */
 799     if (tm->TM_MODE == TM_ON &&
 800         tms->TM_MODE == TM_ON) {
 801
 802         return tm_handle_hcall_check_conflicts(core, tm);
 803
 804     } else if (tm->TM_MODE == TM_OFF) {
 805         TM_DBG(core,EXIT,"we are in TM_OFF\n");
 806     }
 807
 808     return TRANS_HCALL_OK;
 809 }
 810
 811
 812 int
 813 v3_tm_inc_tnum (struct v3_trans_mem * tm)
 814 {
 815     addr_t irqstate;
 816     uint64_t new_ctxt;
 817     uint64_t * lt;
 818
 819     lt = tm_global_state->last_trans;
 820
 821     // grab global last_trans
 822     irqstate = v3_lock_irqsave(tm_global_state->lock);
 823     new_ctxt = ++(lt[tm->ginfo->vcpu_id]);
 824     v3_unlock_irqrestore(tm_global_state->lock, irqstate);
 825
 826     tm->t_num++;
 827     /*
 828     TM_DBG(tm->ginfo,INC TNUM,"global state is |%d|%d|, my tnum is %d\n", (int)lt[0],
 829                                                                         (int)lt[1], (int)tm->t_num);
 830                                                                         */
 831     if (new_ctxt != tm->t_num) {
 832         TM_ERR(tm->ginfo,TM_INC_TNUM,"misaligned global and local context value\n");
 833         return -1;
 834     }
 835
 836     return 0;
 837 }
 838
 839
 840 int
 841 v3_handle_trans_abort (struct guest_info * core)
 842 {
 843     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 844
 845     // Free the staging page
 846     if (v3_free_staging_page(tm) == -1) {
 847         TM_ERR(core,ABORT,"problem freeing staging page\n");
 848         return -1;
 849     }
 850
 851     // Clear the VTLB which still has our staging page in it
 852     if (v3_clr_vtlb(core) == -1) {
 853         TM_ERR(core,ABORT,"problem clearing vtlb\n");
 854         return -1;
 855     }
 856
 857     // Free the lists
 858     v3_clear_tm_lists(tm);
 859
 860     TM_DBG(core,ABORT -- handler,"TM_MODE: %d | RIP: %llx | XABORT RIP: %llx\n", tm->TM_MODE, (uint64_t)core->rip, (uint64_t)tm->fail_call);
 861
 862     if (tm->TM_MODE == TM_ON) {
 863         TM_DBG(core,ABORT,"Setting RIP to %llx\n", (uint64_t)tm->fail_call);
 864         core->rip = tm->fail_call;
 865
 866         // Turn TM off
 867         v3_clr_tm(tm);
 868
 869         // transaction # ++
 870         v3_tm_inc_tnum(tm);
 871     }
 872
 873
 874     // time to garbage collect
 875     if (tm_hash_gc(tm) == -1) {
 876         TM_ERR(core,GC,"could not gc!\n");
 877         return -1;
 878     }
 879
 880     return 0;
 881 }
 882
 883
 884 static uint_t
 885 tm_hash_fn (addr_t key)
 886 {
 887     return v3_hash_long(key, sizeof(void *));
 888 }
 889
 890
 891 static int
 892 tm_eq_fn (addr_t key1, addr_t key2)
 893 {
 894     return (key1 == key2);
 895 }
 896
 897
 898 static uint_t
 899 tm_hash_buf_fn (addr_t key)
 900 {
 901     return v3_hash_long(key, sizeof(addr_t));
 902 }
 903
 904
 905 static int
 906 tm_eq_buf_fn(addr_t key1, addr_t key2)
 907 {
 908     return (key1 == key2);
 909 }
 910
 911
 912 /* this checks if the remote access was done on the same
 913  * local transaction number as the current one */
 914 static int
 915 tm_check_context (struct v3_vm_info * vm,
 916                   addr_t gva,
 917                   uint64_t core_num,
 918                   uint64_t curr_ctxt,
 919                   uint64_t * curr_lt,
 920                   v3_tm_op_t op_type)
 921 {
 922     uint64_t  core_id_sub;
 923     struct v3_tm_access_type * type = NULL;
 924
 925     for (core_id_sub = 0; core_id_sub < vm->num_cores; core_id_sub++) {
 926         struct v3_trans_mem * remote_tm;
 927         void * buf[3];
 928         addr_t key;
 929
 930         /* skip the core that's doing the checking */
 931         if (core_id_sub == core_num) {
 932             continue;
 933         }
 934
 935         remote_tm = v3_get_ext_core_state(&(vm->cores[core_id_sub]), "trans_mem");
 936         if (!remote_tm) {
 937             PrintError(vm, VCORE_NONE, "Could not get ext core state for core %llu\n", core_id_sub);
 938             return ERR_CHECK_FAIL;
 939         }
 940
 941         buf[0] = (void *)gva;
 942         buf[1] = (void *)core_id_sub;
 943         buf[2] = (void *)curr_lt[core_id_sub];
 944
 945         key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
 946
 947         type = (struct v3_tm_access_type *)HTABLE_SEARCH(remote_tm->access_type, key);
 948
 949         if (type) {
 950             // conflict!
 951             if ( (op_type == OP_TYPE_WRITE && (type->w || type->r)) || // so basically if write?
 952                     (op_type != OP_TYPE_WRITE && type->w)) {
 953                 return CHECK_IS_CONFLICT;
 954             }
 955         }
 956     }
 957
 958     return CHECK_NO_CONFLICT;
 959 }
 960
 961
 962 /* check all the contexts in the list for a conflict */
 963 static int
 964 tm_check_all_contexts (struct v3_vm_info * vm,
 965                        struct list_head * hash_list,
 966                        addr_t   gva,
 967                        v3_tm_op_t  op_type,
 968                        uint64_t core_num,
 969                        uint64_t curr_ctxt)
 970 {
 971     struct hash_chain * curr = NULL;
 972     struct hash_chain * tmp  = NULL;
 973     uint64_t * curr_lt       = NULL;
 974     int ret = 0;
 975
 976     list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
 977
 978         curr_lt = curr->curr_lt;
 979
 980         if (curr_lt[core_num] == curr_ctxt) {
 981
 982             ret = tm_check_context(vm, gva, core_num, curr_ctxt, curr_lt, op_type);
 983
 984             if (ret == ERR_CHECK_FAIL) {
 985                 return ERR_CHECK_FAIL;
 986             } else if (ret == CHECK_IS_CONFLICT) {
 987                 return CHECK_IS_CONFLICT;
 988             }
 989
 990         }
 991
 992     }
 993
 994     return CHECK_NO_CONFLICT;
 995 }
 996
 997
 998 /* The following access patterns trigger an abort:
 999  * We: Read     |   Anyone Else: Write
1000  * We: Write    |   Anyone Else: Read, Write
1001  *
1002  * (pg 8-2 of haswell manual)
1003  *
1004  * returns ERR_CHECK_FAIL on error
1005  * returns CHECK_IS_CONFLICT if there is a conflict
1006  * returns CHECK_NO_CONFLICT  if there isn't
1007  */
1008 int
1009 tm_check_conflict (struct v3_vm_info * vm,
1010                    addr_t gva,
1011                    v3_tm_op_t op_type,
1012                    uint64_t core_num,
1013                    uint64_t curr_ctxt)
1014 {
1015     uint64_t core_id;
1016
1017     /* loop over other cores -> core_id */
1018     for (core_id = 0; core_id < vm->num_cores; core_id++) {
1019
1020         struct guest_info * core = NULL;
1021         struct v3_trans_mem * tm = NULL;
1022         struct list_head * hash_list;
1023
1024         /* only check other cores */
1025         if (core_id == core_num) {
1026             continue;
1027         }
1028
1029         core = &(vm->cores[core_id]);
1030         tm = (struct v3_trans_mem*)v3_get_ext_core_state(core, "trans_mem");
1031
1032         if (!tm) {
1033             PrintError(vm, VCORE_NONE, "+++ TM ERROR +++ Couldn't get core state for core %llu\n", core_id);
1034             return ERR_CHECK_FAIL;
1035         }
1036
1037         /* this core didn't access the address, move on */
1038         if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1039             continue;
1040
1041         } else {
1042
1043             /* loop over chained hash for gva, find fields with curr_ctxt -> curr_lt*/
1044             int ret = tm_check_all_contexts(vm, hash_list, gva, op_type, core_num, curr_ctxt);
1045
1046             if (ret == ERR_CHECK_FAIL) {
1047                 return ERR_CHECK_FAIL;
1048             } else if (ret == CHECK_IS_CONFLICT) {
1049                 return CHECK_IS_CONFLICT;
1050             }
1051
1052         }
1053     }
1054
1055     return CHECK_NO_CONFLICT;
1056 }
1057
1058
1059 static int
1060 tm_need_to_gc (struct v3_trans_mem * tm,
1061                struct hash_chain * curr,
1062                uint64_t * lt_copy,
1063                uint64_t tmoff)
1064 {
1065     uint64_t to_gc = 1;
1066     uint64_t i;
1067
1068     /* if none of the cores are in transactional context,
1069      * we know we can collect this context
1070      */
1071     if (!tmoff) {
1072
1073         for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1074             /* if *any* of the cores are active in a transaction
1075              * number that is current (listed in this context),
1076              * we know we can't collect this context, as it
1077              * will be needed when that core's transaction ends
1078              */
1079             if (curr->curr_lt[i] >= lt_copy[i]) {
1080                 to_gc = 0;
1081                 break;
1082             }
1083         }
1084
1085     }
1086     return to_gc;
1087 }
1088
1089
1090 static void
1091 tm_del_stale_ctxt (struct hash_chain * curr)
1092 {
1093         list_del(&(curr->lt_node));
1094         V3_Free(curr->curr_lt);
1095         V3_Free(curr);
1096 }
1097
1098
1099 static void
1100 tm_del_acc_entry (struct v3_trans_mem * tm, addr_t key)
1101 {
1102     v3_htable_remove(tm->access_type, key, 0);
1103     (tm->access_type_entries)--;
1104 }
1105
1106
1107 static int
1108 tm_collect_context (struct v3_trans_mem * tm,
1109                     struct hashtable_iter * ctxt_iter,
1110                     struct hash_chain * curr,
1111                     uint64_t * begin_time,
1112                     uint64_t * end_time,
1113                     addr_t gva)
1114 {
1115         uint64_t i;
1116
1117         for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1118             void * buf[3];
1119             struct v3_tm_access_type * type;
1120             addr_t key;
1121
1122             rdtscll(*end_time);
1123             if ((*end_time - *begin_time) > 100000000) {
1124                 TM_ERR(tm->ginfo,GC,"time threshhold exceeded, exiting!!!\n");
1125                 return -1;
1126             }
1127
1128             buf[0] = (void *)gva;
1129             buf[1] = (void *)i;
1130             buf[2] = (void *)curr->curr_lt[i];
1131
1132             key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
1133
1134             type = (struct v3_tm_access_type *)v3_htable_search(tm->access_type, key);
1135
1136             if (!type) { // something has gone terribly wrong
1137                 TM_ERR(tm->ginfo,GC,"could not find accesstype entry to gc, THIS! IS! WRONG!\n");
1138                 return -1;
1139             }
1140
1141             /* delete the access type entry */
1142             tm_del_acc_entry(tm, key);
1143         }
1144
1145         /* delete the stale context */
1146         tm_del_stale_ctxt(curr);
1147
1148         return 0;
1149 }
1150
1151
1152 static int
1153 tm_collect_all_contexts (struct v3_trans_mem * tm,
1154                          struct hashtable_iter * ctxt_iter,
1155                          uint64_t tmoff,
1156                          uint64_t * lt_copy,
1157                          uint64_t * begin_time,
1158                          uint64_t * end_time)
1159 {
1160     struct hash_chain * tmp;
1161     struct hash_chain * curr;
1162     struct list_head * chain_list;
1163     addr_t gva;
1164
1165     gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1166
1167     chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1168
1169     /* this is a chained hash, so for each address, we will have
1170      * a list of contexts. We now check each context to see
1171      * whether or not it can be collected
1172      */
1173     list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1174
1175         uint64_t to_gc = tm_need_to_gc(tm, curr, lt_copy, tmoff);
1176
1177         /* not garbage, go on to the next context in the list */
1178         if (!to_gc) {
1179             TM_DBG(tm->ginfo,GC,"not garbage collecting entries for address %llx\n", (uint64_t)gva);
1180             continue;
1181         }
1182
1183         TM_DBG(tm->ginfo,GC,"garbage collecting entries for address %llx\n", (uint64_t)gva);
1184
1185         /* found one, delete corresponding entries in access_type */
1186         if (tm_collect_context(tm, ctxt_iter, curr, begin_time, end_time, gva) == -1) {
1187             TM_ERR(tm->ginfo,GC,"ERROR collecting context\n");
1188             return -1;
1189         }
1190
1191     }
1192
1193     /* if context list (hash chain) is now empty, remove the hash entry */
1194     if (list_empty(chain_list)) {
1195         v3_htable_iter_remove(ctxt_iter, 0);
1196         (tm->addr_ctxt_entries)--;
1197     } else {
1198         v3_htable_iter_advance(ctxt_iter);
1199     }
1200
1201     /* give the CPU away NONONO NEVER YIELD WHILE HOLDING A LOCK */
1202     //V3_Yield();
1203
1204     return 0;
1205 }
1206
1207
1208 int
1209 tm_hash_gc (struct v3_trans_mem * tm)
1210 {
1211     addr_t irqstate, irqstate2;
1212     int rc = 0;
1213     uint64_t begin_time, end_time, tmoff;
1214     uint64_t * lt_copy;
1215     struct v3_tm_state * tms = NULL;
1216     struct hashtable_iter * ctxt_iter = NULL;
1217
1218     tms = (struct v3_tm_state *)v3_get_extension_state(tm->ginfo->vm_info, "trans_mem");
1219     if (!tms) {
1220         TM_ERR(tm->ginfo,GC,"could not alloc tms\n");
1221         return -1;
1222     }
1223
1224     TM_DBG(tm->ginfo,GC,"beginning garbage collection\n");
1225     TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (pre)\n", (int)v3_htable_count(tm->addr_ctxt));
1226     TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (pre)\n", (int)v3_htable_count(tm->access_type));
1227
1228     tmoff = (tms->cores_active == 0);
1229
1230     lt_copy = V3_Malloc(sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1231     if (!lt_copy) {
1232         TM_ERR(tm->ginfo,GC,"Could not allocate space for lt_copy\n");
1233         return -1;
1234     }
1235
1236     memset(lt_copy, 0, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1237
1238     rdtscll(begin_time);
1239
1240     /* lt_copy holds the last transaction number for each core */
1241     irqstate = v3_lock_irqsave(tm_global_state->lock);
1242     memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1243     v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1244
1245     /* lock both hashes */
1246     irqstate = v3_lock_irqsave(tm->addr_ctxt_lock);
1247     irqstate2 = v3_lock_irqsave(tm->access_type_lock);
1248
1249     /* loop over hash entries in addr_ctxt */
1250     ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1251     if (!ctxt_iter) {
1252         TM_ERR(tm->ginfo,GC,"could not create htable iterator\n");
1253         rc = -1;
1254         goto out;
1255     }
1256
1257     /* we check each address stored in the hash */
1258     while (ctxt_iter->entry) {
1259         /* NOTE: this call advances the hash iterator */
1260         if (tm_collect_all_contexts(tm, ctxt_iter, tmoff, lt_copy, &begin_time, &end_time) == -1) {
1261             rc = -1;
1262             goto out1;
1263         }
1264     }
1265
1266 out1:
1267     v3_destroy_htable_iter(ctxt_iter);
1268 out:
1269     V3_Free(lt_copy);
1270     v3_unlock_irqrestore(tm->access_type_lock, irqstate);
1271     v3_unlock_irqrestore(tm->addr_ctxt_lock, irqstate2);
1272
1273     rdtscll(end_time);
1274
1275     if (rc == -1) {
1276         TM_ERR(tm->ginfo,GC,"garbage collection failed, time spent: %d cycles\n", (int)(end_time - begin_time));
1277     } else {
1278         TM_DBG(tm->ginfo,GC,"ended garbage collection succesfuly, time spent: %d cycles\n", (int)(end_time - begin_time));
1279     }
1280
1281     TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (post)\n", (int)v3_htable_count(tm->addr_ctxt));
1282     TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (post)\n", (int)v3_htable_count(tm->access_type));
1283
1284     return rc;
1285 }
1286
1287
1288 /* TODO: break out the for loops in these functions */
1289 static int
1290 tm_update_ctxt_list (struct v3_trans_mem * tm,
1291                      uint64_t * lt_copy,
1292                      addr_t gva,
1293                      uint8_t write,
1294                      struct list_head * hash_list)
1295 {
1296     struct hash_chain * curr = NULL;
1297     struct hash_chain * tmp  = NULL;
1298     uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1299     uint64_t core_id;
1300     uint_t new_le = 1;
1301     uint_t new_e;
1302
1303     list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1304         uint_t i;
1305         uint8_t same = 1;
1306
1307         for (i = 0; i < num_cores; i++) {
1308             if (curr->curr_lt[i] != lt_copy[i]) {
1309                 same = 0;
1310                 break;
1311             }
1312         }
1313
1314         if (same) {
1315             new_le = 0;
1316             break;
1317         }
1318
1319     }
1320
1321     if (new_le) {
1322         struct hash_chain * new_l = V3_Malloc(sizeof(struct hash_chain));
1323
1324         if (!new_l) {
1325             TM_ERR(tm->ginfo,HASH,"Could not allocate new list\n");
1326             return -1;
1327         }
1328
1329         memset(new_l, 0, sizeof(struct hash_chain));
1330
1331         new_l->curr_lt = lt_copy;
1332
1333         list_add_tail(&(new_l->lt_node), hash_list);
1334     }
1335
1336     for (core_id = 0; core_id < num_cores; core_id++) {
1337         struct v3_tm_access_type * type;
1338         struct v3_ctxt_tuple tup;
1339         tup.gva     = (void*)gva;
1340         tup.core_id = (void*)core_id;
1341         tup.core_lt = (void*)lt_copy[core_id];
1342         addr_t key;
1343
1344         key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1345
1346         new_e = 0;
1347
1348         type = (struct v3_tm_access_type *)HTABLE_SEARCH(tm->access_type, key);
1349
1350         if (!type) {
1351             // no entry yet
1352             new_e = 1;
1353             type = V3_Malloc(sizeof(struct v3_tm_access_type));
1354
1355             if (!type) {
1356                 TM_ERR(tm->ginfo,HASH,"could not allocate type access struct\n");
1357                 return -1;
1358             }
1359         }
1360
1361         if (write) {
1362             type->w = 1;
1363         } else {
1364             type->r = 1;
1365         }
1366
1367         if (new_e) {
1368             if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1369                 TM_ERR(tm->ginfo,HASH,"problem inserting new mem access in htable\n");
1370                 return -1;
1371             }
1372             (tm->access_type_entries)++;
1373         }
1374     }
1375
1376     return 0;
1377 }
1378
1379
1380 /* no entry in addr-ctxt yet, create one */
1381 static int
1382 tm_create_ctxt_key (struct v3_trans_mem * tm,
1383                     uint64_t * lt_copy,
1384                     addr_t gva,
1385                     uint8_t write)
1386 {
1387     struct list_head * hash_list = NULL;
1388     struct hash_chain * new_l = NULL;
1389     uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1390
1391     hash_list = (struct list_head *)V3_Malloc(sizeof(struct list_head));
1392
1393     if (!hash_list) {
1394         TM_ERR(tm->ginfo,HASH,"Problem allocating hash_list\n");
1395         return -1;
1396     }
1397
1398     INIT_LIST_HEAD(hash_list);
1399
1400     new_l = V3_Malloc(sizeof(struct hash_chain));
1401
1402     if (!new_l) {
1403         TM_ERR(tm->ginfo,HASH,"Problem allocating hash_chain\n");
1404         goto out_err;
1405     }
1406
1407     memset(new_l, 0, sizeof(struct hash_chain));
1408
1409     new_l->curr_lt = lt_copy;
1410
1411     /* add the context to the hash chain */
1412     list_add_tail(&(new_l->lt_node), hash_list);
1413
1414     if (!(HTABLE_INSERT(tm->addr_ctxt, gva, hash_list))) {
1415         TM_ERR(tm->ginfo,HASH CHAIN,"problem inserting new chain into hash\n");
1416         goto out_err1;
1417     }
1418
1419     (tm->addr_ctxt_entries)++;
1420
1421     uint64_t core_id;
1422     /* TODO: we need a way to unwind and deallocate for all cores on failure here */
1423     for (core_id = 0; core_id < num_cores; core_id++) {
1424         struct v3_tm_access_type * type = NULL;
1425         struct v3_ctxt_tuple tup;
1426         tup.gva     = (void*)gva;
1427         tup.core_id = (void*)core_id;
1428         tup.core_lt = (void*)lt_copy[core_id];
1429         addr_t key;
1430
1431         type = V3_Malloc(sizeof(struct v3_tm_access_type));
1432
1433         if (!type) {
1434             TM_ERR(tm->ginfo,HASH,"could not allocate access type struct\n");
1435             goto out_err1;
1436         }
1437
1438         if (write) {
1439             type->w = 1;
1440         } else {
1441             type->r = 1;
1442         }
1443
1444         key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1445
1446         if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1447             TM_ERR(tm->ginfo,HASH,"TM: problem inserting new mem access in htable\n");
1448             goto out_err1;
1449         }
1450         (tm->access_type_entries)++;
1451     }
1452
1453     return 0;
1454
1455 out_err1:
1456     list_del(&(new_l->lt_node));
1457     V3_Free(new_l);
1458 out_err:
1459     V3_Free(hash_list);
1460     return -1;
1461 }
1462
1463
1464 /* entry points:
1465  *
1466  * called during MIME execution
1467  * record memory access in conflict logs
1468  *   this locks the table during insertion
1469  */
1470 int
1471 tm_record_access (struct  v3_trans_mem * tm,
1472                   uint8_t write,
1473                   addr_t  gva)
1474 {
1475     uint64_t * lt_copy;
1476     struct list_head * hash_list;
1477     addr_t irqstate;
1478     uint64_t num_cores;
1479
1480     num_cores = tm->ginfo->vm_info->num_cores;
1481
1482     TM_DBG(tm->ginfo,REC,"recording addr %llx, addr-ctxt.cnt = %d, access-type.cnt = %d\n", (uint64_t)gva,
1483                                         (int)v3_htable_count(tm->addr_ctxt), (int)v3_htable_count(tm->access_type));
1484     //PrintDebug(tm->ginfo->vm_info, tm->ginfo,"\tWe think that addr-ctxt.cnt = %d, access-type.cnt = %d\n",(int)tm->addr_ctxt_entries,(int)tm->access_type_entries);
1485
1486     lt_copy = V3_Malloc(sizeof(uint64_t)*num_cores);
1487     if (!lt_copy) {
1488         TM_ERR(tm->ginfo,REC,"Allocating array failed\n");
1489         return -1;
1490     }
1491
1492     memset(lt_copy, 0, sizeof(uint64_t)*num_cores);
1493
1494     irqstate = v3_lock_irqsave(tm_global_state->lock);
1495     memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*num_cores);
1496     v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1497
1498     if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1499         /* we haven't created a context list for this address yet, go do it */
1500         return tm_create_ctxt_key(tm, lt_copy, gva, write);
1501
1502     } else {
1503         /* we have a context list for this addres already, do we need to create a new context? */
1504         return tm_update_ctxt_list(tm, lt_copy, gva, write, hash_list);
1505     }
1506
1507     return 0;
1508 }
1509
1510
1511 static void
1512 tm_prepare_cpuid (struct v3_vm_info * vm)
1513 {
1514
1515     V3_Print(vm, VCORE_NONE, "TM INIT | enabling RTM cap in CPUID\n");
1516
1517     /* increase max CPUID function to 7 (extended feature flags enumeration) */
1518     v3_cpuid_add_fields(vm,0x0,
1519             0xf, 0x7,
1520             0, 0,
1521             0, 0,
1522             0, 0);
1523
1524
1525     /* do the same for AMD */
1526     v3_cpuid_add_fields(vm,0x80000000,
1527             0xffffffff, 0x80000007,
1528             0, 0,
1529             0, 0,
1530             0, 0);
1531
1532
1533     /* enable RTM (CPUID.07H.EBX.RTM = 1) */
1534     v3_cpuid_add_fields(vm, 0x07, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1535     v3_cpuid_add_fields(vm, 0x80000007, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1536 }
1537
1538
1539 static int
1540 init_trans_mem (struct v3_vm_info * vm,
1541                 v3_cfg_tree_t * cfg,
1542                 void ** priv_data)
1543 {
1544     struct v3_tm_state * tms;
1545
1546     PrintDebug(vm, VCORE_NONE, "Trans Mem. Init\n");
1547
1548     tms = V3_Malloc(sizeof(struct v3_tm_state));
1549     if (!tms) {
1550         PrintError(vm, VCORE_NONE, "Problem allocating v3_tm_state\n");
1551         return -1;
1552     }
1553
1554     memset(tms, 0, sizeof(struct v3_tm_state));
1555
1556     if (v3_register_hypercall(vm, TM_KICKBACK_CALL, tm_handle_hcall, NULL) == -1) {
1557       PrintError(vm, VCORE_NONE, "TM could not register hypercall\n");
1558       goto out_err;
1559     }
1560
1561     v3_lock_init(&(tms->lock));
1562
1563     tms->TM_MODE      = TM_OFF;
1564     tms->cores_active = 0;
1565
1566     uint64_t * lt = V3_Malloc(sizeof(uint64_t) * vm->num_cores);
1567     if (!lt) {
1568         PrintError(vm, VCORE_NONE, "Problem allocating last_trans array\n");
1569         goto out_err1;
1570     }
1571
1572     memset(lt, 0, sizeof(uint64_t) * vm->num_cores);
1573
1574     int i;
1575     for (i = 0; i < vm->num_cores; i++) {
1576         lt[i] = 0;
1577     }
1578
1579     tms->last_trans = lt;
1580
1581     *priv_data = tms;
1582     tm_global_state = tms;
1583
1584     tm_prepare_cpuid(vm);
1585
1586     return 0;
1587
1588 out_err1:
1589     v3_lock_deinit(&(tms->lock));
1590     v3_remove_hypercall(vm, TM_KICKBACK_CALL);
1591 out_err:
1592     V3_Free(tms);
1593     return -1;
1594 }
1595
1596
1597 static int
1598 init_trans_mem_core (struct guest_info * core,
1599                      void * priv_data,
1600                      void ** core_data)
1601 {
1602     struct v3_trans_mem * tm = V3_Malloc(sizeof(struct v3_trans_mem));
1603
1604     TM_DBG(core,INIT, "Trans Mem. Core Init\n");
1605
1606     if (!tm) {
1607         TM_ERR(core,INIT, "Problem allocating TM state\n");
1608         return -1;
1609     }
1610
1611     memset(tm, 0, sizeof(struct v3_trans_mem));
1612
1613     INIT_LIST_HEAD(&tm->trans_r_list);
1614     INIT_LIST_HEAD(&tm->trans_w_list);
1615
1616     tm->addr_ctxt  = v3_create_htable(0, tm_hash_fn, tm_eq_fn);
1617     if (!(tm->addr_ctxt)) {
1618         TM_ERR(core,INIT,"problem creating addr_ctxt\n");
1619         goto out_err;
1620     }
1621
1622     tm->access_type = v3_create_htable(0, tm_hash_buf_fn, tm_eq_buf_fn);
1623     if (!(tm->access_type)) {
1624         TM_ERR(core,INIT,"problem creating access_type\n");
1625         goto out_err1;
1626     }
1627
1628     v3_lock_init(&(tm->addr_ctxt_lock));
1629     v3_lock_init(&(tm->access_type_lock));
1630
1631     tm->TM_STATE = TM_NULL;
1632     tm->TM_MODE  = TM_OFF;
1633     tm->TM_ABORT = 0;
1634     tm->ginfo    = core;
1635     tm->t_num = 0;
1636     tm->to_branch = 0;
1637     tm->offset = 0;
1638     tm->access_type_entries = 0;
1639     tm->addr_ctxt_entries = 0;
1640     tm->dirty_instr_flag = 0;
1641
1642     /* TODO: Cache Model */
1643     //tm->box = (struct cache_box *)V3_Malloc(sizeof(struct cache_box *));
1644     //tm->box->init = init_cache;
1645     //tm->box->init(sample_spec, tm->box);
1646
1647     *core_data = tm;
1648
1649     return 0;
1650
1651 out_err1:
1652     v3_free_htable(tm->addr_ctxt, 0, 0);
1653 out_err:
1654     V3_Free(tm);
1655     return -1;
1656 }
1657
1658
1659 static int
1660 deinit_trans_mem (struct v3_vm_info * vm, void * priv_data)
1661 {
1662     struct v3_tm_state * tms = (struct v3_tm_state *)priv_data;
1663
1664     if (v3_remove_hypercall(vm, TM_KICKBACK_CALL) == -1) {
1665         PrintError(vm, VCORE_NONE, "Problem removing TM hypercall\n");
1666         return -1;
1667     }
1668
1669     v3_lock_deinit(&(tms->lock));
1670
1671     if (tms) {
1672         V3_Free(tms);
1673     }
1674
1675     return 0;
1676 }
1677
1678
1679 static int
1680 deinit_trans_mem_core (struct guest_info * core,
1681                        void * priv_data,
1682                        void * core_data)
1683 {
1684     struct v3_trans_mem * tm = (struct v3_trans_mem *)core_data;
1685     struct hashtable_iter * ctxt_iter = NULL;
1686
1687     v3_clear_tm_lists(tm);
1688
1689     if (tm->staging_page) {
1690         TM_ERR(core,DEINIT CORE,"WARNING: staging page not freed!\n");
1691     }
1692
1693     ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1694     if (!ctxt_iter) {
1695         TM_DBG(core,DEINIT_CORE,"could not create htable iterator\n");
1696         return -1;
1697     }
1698
1699     /* delete all context entries for each hashed address */
1700     while (ctxt_iter->entry) {
1701         struct hash_chain * tmp;
1702         struct hash_chain * curr;
1703         struct list_head * chain_list;
1704         addr_t gva;
1705
1706         gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1707         chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1708
1709         /* delete the context */
1710         list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1711             tm_del_stale_ctxt(curr);
1712         }
1713
1714         v3_htable_iter_advance(ctxt_iter);
1715     }
1716
1717     v3_destroy_htable_iter(ctxt_iter);
1718
1719     /* we've already deleted the values in this one */
1720     v3_free_htable(tm->addr_ctxt, 0, 0);
1721
1722     /* KCH WARNING: we may not want to free access type values here */
1723     v3_free_htable(tm->access_type, 1, 0);
1724
1725     v3_lock_deinit(&(tm->addr_ctxt_lock));
1726     v3_lock_deinit(&(tm->access_type_lock));
1727
1728     if (tm) {
1729         V3_Free(tm);
1730     }
1731
1732     return 0;
1733 }
1734
1735
1736 static struct v3_extension_impl trans_mem_impl = {
1737     .name = "trans_mem",
1738     .init = NULL,
1739     .vm_init = init_trans_mem,
1740     .vm_deinit = deinit_trans_mem,
1741     .core_init = init_trans_mem_core,
1742     .core_deinit = deinit_trans_mem_core,
1743     .on_entry = NULL,
1744     .on_exit = NULL
1745 };
1746
1747 register_extension(&trans_mem_impl);
1748
1749
1750 /* entry conditions
1751  * tms->on => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1752  * tms->off => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1753  */
1754 static int
1755 tm_handle_xend (struct guest_info * core,
1756                 struct v3_trans_mem * tm)
1757 {
1758     rdtscll(tm->exit_time);
1759
1760     // Error checking! make sure that we have gotten here in a legitimate manner
1761     if (tm->TM_MODE != TM_ON) {
1762         TM_ERR(core, UD, "Encountered XEND while not in a transactional region\n");
1763         v3_free_staging_page(tm);
1764         v3_clr_vtlb(core);
1765         v3_clear_tm_lists(tm);
1766         v3_raise_exception(core, UD_EXCEPTION);
1767         return 0;
1768     }
1769
1770     /* Our transaction finished! */
1771     /* Copy over data from the staging page */
1772     TM_DBG(core, UD,"Copying data from our staging page back into 'real' memory\n");
1773
1774     if (commit_list(core, tm) == -1) {
1775         TM_ERR(core,UD,"error commiting tm list to memory\n");
1776         return -1;
1777     }
1778
1779     TM_DBG(core,UD,"Freeing staging page and internal data structures\n");
1780
1781     // Free the staging page
1782     if (v3_free_staging_page(tm) == -1) {
1783         TM_ERR(core,XEND,"couldnt free staging page\n");
1784         return -1;
1785     }
1786
1787     // clear vtlb, as it may still contain our staging page
1788     if (v3_clr_vtlb(core) == -1) {
1789         TM_ERR(core,XEND,"couldnt clear vtlb\n");
1790         return -1;
1791     }
1792
1793     // Clear the lists
1794     v3_clear_tm_lists(tm);
1795
1796     /* Set the state and advance the RIP */
1797     TM_DBG(core,XEND,"advancing rip to %llx\n", core->rip + XEND_INSTR_LEN);
1798     core->rip += XEND_INSTR_LEN;
1799
1800     v3_clr_tm(tm);
1801
1802     // time to garbage collect
1803     v3_tm_inc_tnum(tm);
1804     if (tm_hash_gc(tm) == -1) {
1805         TM_ERR(core,XEND,"could not gc!\n");
1806         return -1;
1807     }
1808
1809     return 0;
1810 }
1811
1812
1813 /* entry conditions
1814  * tms->on => handle our abort code, handle_trans_abort will clear necessary state
1815  * tms->off => handle our abort code, handle_trans_abort will clear necessary state
1816  */
1817 static int
1818 tm_handle_xabort (struct guest_info * core,
1819                   struct v3_trans_mem * tm)
1820 {
1821         /* TODO: this probably needs to move somewhere else */
1822         rdtscll(tm->exit_time);
1823
1824         // Error checking! make sure that we have gotten here in a legitimate manner
1825         if (tm->TM_MODE != TM_ON) {
1826             TM_DBG(core, UD, "We got here while not in a transactional core!\n");
1827             v3_raise_exception(core, UD_EXCEPTION);
1828         }
1829
1830         TM_DBG(core,UD,"aborting\n");
1831
1832         if (tm->TM_STATE != TM_NULL) {
1833             v3_restore_dirty_instr(core);
1834         }
1835
1836         // Handle the exit
1837         v3_handle_trans_abort(core);
1838
1839         return 0;
1840 }
1841
1842
1843 /* entry conditions
1844  * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1845  * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1846  */
1847 static int
1848 tm_handle_xbegin (struct guest_info * core,
1849                   struct v3_trans_mem * tm,
1850                   uchar_t * instr)
1851 {
1852     sint32_t rel_addr = 0;
1853
1854     if (tm->TM_MODE == TM_ON) {
1855         TM_ERR(core,UD,"We got here while already in a transactional region!");
1856         v3_raise_exception(core, UD_EXCEPTION);
1857     }
1858
1859     rdtscll(tm->entry_time);
1860     tm->entry_exits = core->num_exits;
1861
1862     /* set the tm_mode for this core */
1863     v3_set_tm(tm);
1864
1865     TM_DBG(core,UD,"Set the system in TM Mode, save fallback address");
1866
1867     // Save the fail_call address (first 2 bytes = opcode, last 4 = fail call addr)
1868     rel_addr = *(sint32_t*)(instr+2);
1869     tm->fail_call = core->rip + XBEGIN_INSTR_LEN + rel_addr;
1870
1871     TM_DBG(core,UD,"we set fail_call to %llx, rip is %llx, rel_addr is %x", (uint64_t)tm->fail_call,(uint64_t)core->rip,rel_addr);
1872
1873     /* flush the shadow page tables */
1874     TM_DBG(core,UD,"Throwing out the shadow table");
1875     v3_clr_vtlb(core);
1876
1877     // Increase RIP, ready to go to next instruction
1878     core->rip += XBEGIN_INSTR_LEN;
1879
1880     return 0;
1881 }
1882
1883
1884 /* entry conditions
1885  * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1886  * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1887  */
1888 static int
1889 tm_handle_xtest (struct guest_info * core,
1890                  struct v3_trans_mem * tm)
1891 {
1892     // if we are in tm mode, set zf to 0, otherwise 1
1893     if (tm->TM_MODE == TM_ON) {
1894         core->ctrl_regs.rflags &= ~(1ULL << 6);
1895     } else {
1896         core->ctrl_regs.rflags |= (1ULL << 6);
1897     }
1898
1899     core->rip += XTEST_INSTR_LEN;
1900
1901     return 0;
1902 }
1903
1904
1905 /* instructions:
1906  * XBEGIN c7 f8 rel32
1907  * XABORT c6 f8 imm8
1908  * XEND   0f 01 d5
1909  */
1910 static int
1911 tm_handle_ud (struct guest_info * core)
1912 {
1913     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
1914     uchar_t instr[INSTR_BUF_SZ];
1915     uint8_t byte1, byte2, byte3;
1916
1917     tm_read_instr(core, (addr_t)core->rip, instr, INSTR_BUF_SZ);
1918
1919     byte1 = *(uint8_t *)((addr_t)instr);
1920     byte2 = *(uint8_t *)((addr_t)instr + 1);
1921     byte3 = *(uint8_t *)((addr_t)instr + 2);
1922
1923
1924     if (byte1 == 0xc7 && byte2 == 0xf8) {  /* third byte is an immediate */
1925
1926         TM_DBG(core,UD,"Encountered Haswell-specific XBEGIN %x %x %d at %llx", byte1, byte2, byte3, (uint64_t)core->rip);
1927
1928         if (tm_handle_xbegin(core, tm, instr) == -1) {
1929             TM_ERR(core, UD, "Problem handling XBEGIN\n");
1930             return -1;
1931         }
1932
1933     } else if (byte1 == 0xc6 && byte2 == 0xf8) { /* third byte is an immediate */
1934
1935         TM_DBG(core, UD, "Encountered Haswell-specific XABORT %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1936
1937         if (tm_handle_xabort(core, tm) == -1) {
1938             TM_ERR(core, UD, "Problem handling XABORT\n");
1939             return -1;
1940         }
1941
1942     } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd5) {
1943
1944         TM_DBG(core, UD, "Encountered Haswell-specific XEND %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1945
1946         if (tm_handle_xend(core, tm) == -1) {
1947             TM_ERR(core, UD, "Problem handling XEND\n");
1948             return -1;
1949         }
1950
1951
1952     } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd6) {  /* third byte is an immediate */
1953
1954         TM_DBG(core,UD,"Encountered Haswell-specific XTEST %x %x %x at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1955
1956         if (tm_handle_xtest(core, tm) == -1) {
1957             TM_ERR(core, UD, "Problem handling XTEST\n");
1958             return -1;
1959         }
1960
1961     } else {
1962
1963         /* oh no, this is still unknown, pass the error back to the guest! */
1964         TM_DBG(core,UD,"Encountered:%x %x %x\n", byte1, byte2, byte3);
1965         v3_raise_exception(core, UD_EXCEPTION);
1966     }
1967
1968     return 0;
1969 }
1970
1971
1972 int
1973 v3_tm_handle_exception (struct guest_info * info,
1974                         addr_t exit_code)
1975 {
1976     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
1977
1978     if (!tm) {
1979         TM_ERR(info,ERR,"TM extension state not found\n");
1980         return -1;
1981     }
1982
1983     switch (exit_code) {
1984         /* any of these exceptions should abort current transactions */
1985         case SVM_EXIT_EXCP6:
1986             if (tm_handle_ud(info) == -1) {
1987                 return -1;
1988             }
1989             break;
1990         case SVM_EXIT_EXCP0:
1991             if (tm->TM_MODE != TM_ON) {
1992                 v3_raise_exception(info, DE_EXCEPTION);
1993             }
1994             else {
1995                 TM_DBG(info,EXCP,"aborting due to DE exception\n");
1996                 v3_handle_trans_abort(info);
1997             }
1998             break;
1999         case SVM_EXIT_EXCP1:
2000             if (tm->TM_MODE != TM_ON) {
2001                 v3_raise_exception(info, DB_EXCEPTION);
2002             }
2003             else {
2004                 TM_DBG(info,EXCP,"aborting due to DB exception\n");
2005                 v3_handle_trans_abort(info);
2006             }
2007             break;
2008         case SVM_EXIT_EXCP3:
2009             if (tm->TM_MODE != TM_ON) {
2010                 v3_raise_exception(info, BP_EXCEPTION);
2011             }
2012             else {
2013                 TM_DBG(info,EXCP,"aborting due to BP exception\n");
2014                 v3_handle_trans_abort(info);
2015             }
2016             break;
2017         case SVM_EXIT_EXCP4:
2018             if (tm->TM_MODE != TM_ON) {
2019                 v3_raise_exception(info, OF_EXCEPTION);
2020             }
2021             else {
2022                 TM_DBG(info,EXCP,"aborting due to OF exception\n");
2023                 v3_handle_trans_abort(info);
2024             }
2025             break;
2026         case SVM_EXIT_EXCP5:
2027             if (tm->TM_MODE != TM_ON) {
2028                 v3_raise_exception(info, BR_EXCEPTION);
2029             }
2030             else {
2031                 TM_DBG(info,EXCP,"aborting due to BR exception\n");
2032                 v3_handle_trans_abort(info);
2033             }
2034             break;
2035         case SVM_EXIT_EXCP7:
2036             if (tm->TM_MODE != TM_ON) {
2037                 v3_raise_exception(info, NM_EXCEPTION);
2038             }
2039             else {
2040                 TM_DBG(info,EXCP,"aborting due to NM exception\n");
2041                 v3_handle_trans_abort(info);
2042             }
2043             break;
2044         case SVM_EXIT_EXCP10:
2045             if (tm->TM_MODE != TM_ON) {
2046                 v3_raise_exception(info, TS_EXCEPTION);
2047             }
2048             else {
2049                 TM_DBG(info,EXCP,"aborting due to TS exception\n");
2050                 v3_handle_trans_abort(info);
2051             }
2052             break;
2053         case SVM_EXIT_EXCP11:
2054             if (tm->TM_MODE != TM_ON) {
2055                 v3_raise_exception(info, NP_EXCEPTION);
2056             }
2057             else {
2058                 TM_DBG(info,EXCP,"aborting due to NP exception\n");
2059                 v3_handle_trans_abort(info);
2060             }
2061             break;
2062         case SVM_EXIT_EXCP12:
2063             if (tm->TM_MODE != TM_ON) {
2064                 v3_raise_exception(info, SS_EXCEPTION);
2065             }
2066             else {
2067                 TM_DBG(info,EXCP,"aborting due to SS exception\n");
2068                 v3_handle_trans_abort(info);
2069             }
2070             break;
2071         case SVM_EXIT_EXCP13:
2072             if (tm->TM_MODE != TM_ON) {
2073                 v3_raise_exception(info, GPF_EXCEPTION);
2074             }
2075             else {
2076                 TM_DBG(info,EXCP,"aborting due to GPF exception\n");
2077                 v3_handle_trans_abort(info);
2078             }
2079             break;
2080         case SVM_EXIT_EXCP16:
2081             if (tm->TM_MODE != TM_ON) {
2082                 v3_raise_exception(info, MF_EXCEPTION);
2083             }
2084             else {
2085                 TM_DBG(info,EXCP,"aborting due to MF exception\n");
2086                 v3_handle_trans_abort(info);
2087             }
2088             break;
2089         case SVM_EXIT_EXCP17:
2090             if (tm->TM_MODE != TM_ON) {
2091                 v3_raise_exception(info, AC_EXCEPTION);
2092             }
2093             else {
2094                 TM_DBG(info,EXCP,"aborting due to AC exception\n");
2095                 v3_handle_trans_abort(info);
2096             }
2097             break;
2098         case SVM_EXIT_EXCP19:
2099             if (tm->TM_MODE != TM_ON) {
2100                 v3_raise_exception(info, XF_EXCEPTION);
2101             }
2102             else {
2103                 TM_DBG(info,EXCP,"aborting due to XF exception\n");
2104                 v3_handle_trans_abort(info);
2105             }
2106             break;
2107
2108             TM_DBG(info,EXCP,"exception # %d\n", (int)exit_code - 0x40);
2109     }
2110
2111     return 0;
2112 }
2113
2114
2115 void
2116 v3_tm_set_excp_intercepts (vmcb_ctrl_t * ctrl_area)
2117 {
2118     ctrl_area->exceptions.de = 1; // 0  : divide by zero
2119     ctrl_area->exceptions.db = 1; // 1  : debug
2120     ctrl_area->exceptions.bp = 1; // 3  : breakpoint
2121     ctrl_area->exceptions.of = 1; // 4  : overflow
2122     ctrl_area->exceptions.br = 1; // 5  : bound range
2123     ctrl_area->exceptions.ud = 1; // 6  : undefined opcode
2124     ctrl_area->exceptions.nm = 1; // 7  : device not available
2125     ctrl_area->exceptions.ts = 1; // 10 : invalid tss
2126     ctrl_area->exceptions.np = 1; // 11 : segment not present
2127     ctrl_area->exceptions.ss = 1; // 12 : stack
2128     ctrl_area->exceptions.gp = 1; // 13 : general protection
2129     ctrl_area->exceptions.mf = 1; // 16 : x87 exception pending
2130     ctrl_area->exceptions.ac = 1; // 17 : alignment check
2131     ctrl_area->exceptions.xf = 1; // 19 : simd floating point
2132 }
2133
2134
2135 extern void v3_stgi();
2136 extern void v3_clgi();
2137
2138 /* 441-tm: if we are in TM mode, we need to check for any interrupts here,
2139  * and if there are any, need to do some aborting! Make sure not to die here
2140  * if we are already 'aborting', this results in infiloop
2141  */
2142 void
2143 v3_tm_check_intr_state (struct guest_info * info,
2144                         vmcb_ctrl_t * guest_ctrl,
2145                         vmcb_saved_state_t * guest_state)
2146
2147 {
2148     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2149
2150     if (!tm) {
2151         TM_ERR(info,INTR,"TM extension state not found\n");
2152         v3_stgi();
2153         return;
2154     }
2155
2156     /* TODO: work this in */
2157     if (0 && (tm->TM_MODE == TM_ON) &&
2158              (tm->TM_ABORT != 1)) {
2159
2160         if (guest_ctrl->guest_ctrl.V_IRQ ||
2161             guest_ctrl->EVENTINJ.valid) {
2162
2163             rdtscll(tm->exit_time);
2164             TM_DBG(info,INTR,"%lld exits happened, time delta is %lld",(info->num_exits - tm->entry_exits),(tm->entry_time - tm->exit_time));
2165
2166             // We do indeed have pending interrupts
2167             v3_stgi();
2168             TM_DBG(info,INTR,"we have a pending interrupt!\n");
2169
2170             v3_handle_trans_abort(info);
2171             // Copy new RIP state into arch dependent structure
2172             guest_state->rip = info->rip;
2173             TM_DBG(info,INTR,"currently guest state rip is %llx\n",(uint64_t)guest_state->rip);
2174             v3_clgi();
2175         }
2176
2177     }
2178
2179 }
2180
2181
2182 int
2183 v3_tm_handle_pf_64 (struct guest_info * info,
2184                     pf_error_t error_code,
2185                     addr_t fault_addr,
2186                     addr_t * page_to_use)
2187 {
2188     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2189     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2190
2191     if (!tm) {
2192         TM_ERR(info,HANDLE_PF, "couldn't get tm core state\n");
2193         return -1;
2194     }
2195
2196     if (!tms) {
2197         TM_ERR(info,HANDLE_PF, "couldn't get tm global state\n");
2198         return -1;
2199     }
2200
2201     if ((tms->TM_MODE == TM_ON) &&
2202             (error_code.user == 1)) {
2203
2204         TM_DBG(info,PF,"Core reporting in, got a #PF (tms->mode is %d)\n", tms->TM_MODE);
2205
2206         *page_to_use = v3_handle_trans_mem_fault(info, fault_addr,  error_code);
2207
2208         if (*page_to_use == ERR_TRANS_FAULT_FAIL){
2209             TM_ERR(info,HANDLE_PF, "could not handle transaction page fault\n");
2210             return -1;
2211         }
2212
2213         if ((tm->TM_MODE == TM_ON) &&
2214                 (tm->staging_page == NULL)) {
2215
2216             tm->staging_page = V3_AllocPages(1);
2217
2218             if (!(tm->staging_page)) {
2219                 TM_ERR(info,MMU,"Problem allocating staging page\n");
2220                 return -1;
2221             }
2222
2223             TM_DBG(info,MMU,"Created staging page at %p\n", (void *)tm->staging_page);
2224         }
2225     }
2226
2227     return 0;
2228 }
2229
2230
2231 void
2232 v3_tm_handle_usr_tlb_miss (struct guest_info * info,
2233                            pf_error_t error_code,
2234                            addr_t page_to_use,
2235                            addr_t * shadow_pa)
2236 {
2237     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2238
2239     /* TLB miss from user */
2240     if ((tm->TM_MODE == TM_ON) &&
2241             (error_code.user == 1)) {
2242
2243         if (page_to_use > TRANS_FAULT_OK) {
2244             TM_DBG(info,MMU, "Using alternate page at: %llx\n", (uint64_t)page_to_use);
2245             *shadow_pa = page_to_use;
2246         }
2247
2248     }
2249
2250 }
2251
2252
2253 void
2254 v3_tm_handle_read_fault (struct guest_info * info,
2255                          pf_error_t error_code,
2256                          pte64_t * shadow_pte)
2257 {
2258     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2259     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2260
2261     // If we are about to read, make it read only
2262     if ((tms->TM_MODE == TM_ON) &&
2263         (tm->TM_STATE == TM_EXEC) &&
2264         (error_code.write == 0) &&
2265         (error_code.user == 1)) {
2266
2267         TM_DBG(info,MMU, "Flagging the page read only\n");
2268         shadow_pte->writable = 0;
2269     }
2270 }
2271
2272
2273 int
2274 v3_tm_decode_rtm_instrs (struct guest_info * info,
2275                          addr_t instr_ptr,
2276                          struct x86_instr * instr)
2277 {
2278     uint8_t byte1, byte2, byte3;
2279     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2280
2281     if (tm->TM_MODE == TM_ON) {
2282
2283         byte1 = *(uint8_t *)(instr_ptr);
2284         byte2 = *(uint8_t *)(instr_ptr + 1);
2285         byte3 = *(uint8_t *)(instr_ptr + 2);
2286
2287         if (byte1 == 0xc7 &&
2288             byte2 == 0xf8) {  /* third byte is an immediate */
2289
2290             TM_DBG(info, DECODE,"Decoding XBEGIN %x %x %d\n", byte1, byte2, byte3);
2291             instr->instr_length = 6;
2292             return 0;
2293
2294         } else if (byte1 == 0xc6 &&
2295                    byte2 == 0xf8) { /* third byte is an immediate */
2296
2297             TM_DBG(info, DECODE, "Decoding XABORT %x %x %d\n", byte1, byte2, byte3);
2298             instr->instr_length = 3;
2299             return 0;
2300
2301         } else if (byte1 == 0x0f &&
2302                    byte2 == 0x01 &&
2303                    byte3 == 0xd5) {
2304
2305             TM_DBG(info, DECODE, "Decoding XEND %x %x %x\n", byte1, byte2, byte3);
2306             instr->instr_length = 3;
2307             return 0;
2308
2309         }
2310
2311     }
2312
2313     return 0;
2314 }
2315
2316