palacios/src/extensions/ext_trans_mem.c

   1 /*
   2  * This file is part of the Palacios Virtual Machine Monitor developed
   3  * by the V3VEE Project with funding from the United States National
   4  * Science Foundation and the Department of Energy.
   5  *
   6  * The V3VEE Project is a joint project between Northwestern University
   7  * and the University of New Mexico.  You can find out more at
   8  * http://www.v3vee.org
   9  *
  10  * Copyright (c) 2012, NWU EECS 441 Transactional Memory Team
  11  * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org>
  12  * All rights reserved.
  13  *
  14  * Author:  Maciek Swiech <dotpyfe@u.northwestern.edu>
  15  *          Kyle C. Hale <kh@u.northwestern.edu>
  16  *          Marcel Flores <marcel-flores@u.northwestern.edu>
  17  *          Zachary Bischof <zbischof@u.northwestern.edu>
  18  *
  19  *
  20  * This is free software.  You are permitted to use,
  21  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  22  */
  23
  24 #include <palacios/vmm_mem.h>
  25 #include <palacios/vmm.h>
  26 #include <palacios/vmcb.h>
  27 #include <palacios/vmm_decoder.h>
  28 #include <palacios/vm_guest_mem.h>
  29 #include <palacios/vmm_ctrl_regs.h>
  30 #include <palacios/vmm_paging.h>
  31 #include <palacios/vmm_direct_paging.h>
  32 #include <palacios/svm.h>
  33 #include <palacios/svm_handler.h>
  34 #include <palacios/vmm_excp.h>
  35 #include <palacios/vmm_extensions.h>
  36 #include <palacios/vmm_sprintf.h>
  37 #include <palacios/vmm_hashtable.h>
  38
  39 #include <extensions/trans_mem.h>
  40 #include <extensions/tm_util.h>
  41
  42 #if !V3_CONFIG_DEBUG_TM_FUNC
  43 #undef PrintDebug
  44 #define PrintDebug(fmt, args...)
  45 #endif
  46
  47 /* TODO LIST:
  48  * - save/restore register state on XBEGIN/XABORT
  49  * - put status codes in RAX
  50  * - Implement proper exceptions for failed XBEGINS etc.
  51  */
  52
  53 /* this includes a mov to rax */
  54 static const char * vmmcall_bytes = "\x48\xc7\xc0\x37\x13\x00\x00\x0f\x01\xd9";
  55 static struct v3_tm_state * tm_global_state = NULL;
  56
  57
  58 static void
  59 tm_translate_rip (struct guest_info * core, addr_t * target)
  60 {
  61
  62     if (core->mem_mode == PHYSICAL_MEM) {
  63         v3_gpa_to_hva(core,
  64                 get_addr_linear(core, core->rip, &(core->segments.cs)),
  65                 target);
  66     } else if (core->mem_mode == VIRTUAL_MEM) {
  67         v3_gva_to_hva(core,
  68                 get_addr_linear(core, core->rip, &(core->segments.cs)),
  69                 target);
  70     }
  71
  72 }
  73
  74
  75 static void
  76 tm_read_instr (struct guest_info * core,
  77                            addr_t addr,
  78                            uchar_t * dst,
  79                            uint_t size)
  80 {
  81
  82     if (core->mem_mode == PHYSICAL_MEM) {
  83         v3_read_gpa_memory(core,
  84                 get_addr_linear(core, addr , &(core->segments.cs)),
  85                 size,
  86                 dst);
  87
  88     } else {
  89        v3_read_gva_memory(core,
  90                 get_addr_linear(core, addr, &(core->segments.cs)),
  91                 size,
  92                 dst);
  93     }
  94
  95 }
  96
  97
  98 static int
  99 tm_handle_decode_fail (struct guest_info * core)
 100 {
 101     addr_t cur_rip;
 102     uint_t core_num;
 103
 104     tm_translate_rip(core, &cur_rip);
 105
 106 #ifdef V3_CONFIG_DEBUG_TM_FUNC
 107     v3_dump_mem((uint8_t *)cur_rip, INSTR_BUF_SZ);
 108 #endif
 109
 110     /* If we can't decode an instruction, we treat it as a catastrophic event, aborting *everyone* */
 111     for (core_num = 0; core_num < core->vm_info->num_cores; core_num++ ) {
 112         struct v3_trans_mem * remote_tm;
 113
 114         /* skip local core */
 115         if (core_num == core->vcpu_id) {
 116             continue;
 117         }
 118
 119         remote_tm = v3_get_ext_core_state(&(core->vm_info->cores[core_num]), "trans_mem");
 120         if (!remote_tm) {
 121             TM_ERR(core,DECODE,"couldnt get remote_tm\n");
 122             return -1;
 123         }
 124
 125         /* skip cores who aren't in transacitonal context */
 126         if (remote_tm->TM_MODE == TM_OFF) {
 127             continue;
 128         }
 129
 130         TM_DBG(core,DECODE,"setting abort for core %d due to decoding error\n", core_num);
 131         remote_tm->TM_ABORT = 1;
 132     }
 133
 134     return 0;
 135 }
 136
 137
 138 /* special casing for control-flow instructions
 139  * returns 1 if we need to jump
 140  * returns -1 on error
 141  */
 142 static int
 143 tm_handle_ctrl_flow (struct guest_info * core,
 144                                  struct v3_trans_mem * tm,
 145                                  addr_t * instr_location,
 146                                  struct x86_instr * struct_instr)
 147
 148 {
 149     /* special casing for control flow instructions */
 150     struct rflags * flags = (struct rflags *)&(core->ctrl_regs.rflags);
 151     addr_t offset;
 152     int to_jmp = 0;
 153
 154     switch (struct_instr->op_type) {
 155
 156         case V3_OP_JLE:
 157             TM_DBG(core,DECODE, "!!++ JLE\n");
 158             to_jmp = (flags->zf || flags->sf != flags->of);
 159             offset = struct_instr->dst_operand.operand;
 160
 161             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 162             tm->offset = offset;
 163             tm->to_branch = to_jmp;
 164             break;
 165         case V3_OP_JAE:
 166             TM_DBG(core,DECODE,"!!++ JAE\n");
 167             to_jmp = (flags->cf == 0);
 168             offset = struct_instr->dst_operand.operand;
 169
 170             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 171             tm->offset = offset;
 172             tm->to_branch = to_jmp;
 173             break;
 174         case V3_OP_JMP:
 175             TM_DBG(core,DECODE,"!!++ JMP\n");
 176             to_jmp = 1;
 177             offset = struct_instr->dst_operand.operand;
 178
 179             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 180             tm->offset = offset;
 181             tm->to_branch = to_jmp;
 182             break;
 183         case V3_OP_JNZ:
 184             TM_DBG(core,DECODE,"!!++ JNZ\n");
 185             to_jmp = (flags->zf == 0);
 186             offset = struct_instr->dst_operand.operand;
 187
 188             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 189             tm->offset = offset;
 190             tm->to_branch = to_jmp;
 191             break;
 192         case V3_OP_JL:
 193             TM_DBG(core,DECODE,"!!++ JL\n");
 194             to_jmp = (flags->sf != flags->of);
 195             offset = struct_instr->dst_operand.operand;
 196
 197             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 198             tm->offset = offset;
 199             tm->to_branch = to_jmp;
 200             break;
 201         case V3_OP_JNS:
 202             TM_DBG(core,DECODE,"!!++ JNS\n");
 203             to_jmp = (flags->sf == 0);
 204             offset = struct_instr->dst_operand.operand;
 205
 206             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 207             tm->offset = offset;
 208             tm->to_branch = to_jmp;
 209             break;
 210         default:
 211             *instr_location = core->rip + tm->cur_instr_len;
 212             break;
 213     }
 214     return to_jmp;
 215 }
 216
 217
 218 /* entry points :
 219  *
 220  * called inside #UD and VMMCALL handlers
 221  * only affects global state in case of quix86 fall over
 222  *  -> set other cores TM_ABORT to 1, return -2
 223  */
 224 static int
 225 v3_store_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
 226 {
 227     struct x86_instr struct_instr;
 228     uchar_t cur_instr[INSTR_BUF_SZ];
 229     addr_t  instr_location;
 230
 231     // Fetch the current instruction
 232     tm_read_instr(core, core->rip, cur_instr, INSTR_BUF_SZ);
 233
 234     TM_DBG(core,STORE,"storing next instruction, current rip: %llx\n", (uint64_t)core->rip);
 235
 236     /* Attempt to decode current instruction to determine its length */
 237     if (v3_decode(core, (addr_t)cur_instr, &struct_instr) == ERR_DECODE_FAIL) {
 238
 239         TM_ERR(core,Error,"Could not decode currrent instruction (at %llx)\n", (uint64_t)core->rip);
 240
 241         /* this will attempt to abort all the remote cores */
 242         if (tm_handle_decode_fail(core) == -1) {
 243             TM_ERR(core,Error,"Could not handle failed decode\n");
 244             return ERR_STORE_FAIL;
 245         }
 246
 247         /* we need to trigger a local abort */
 248         return ERR_STORE_MUST_ABORT;
 249     }
 250
 251
 252     /* we can't currently handle REP prefixes, abort */
 253     if (struct_instr.op_type != V3_INVALID_OP &&
 254             (struct_instr.prefixes.repne ||
 255              struct_instr.prefixes.repnz ||
 256              struct_instr.prefixes.rep   ||
 257              struct_instr.prefixes.repe  ||
 258              struct_instr.prefixes.repz)) {
 259
 260         TM_ERR(core,DECODE,"Encountered REP prefix, aborting\n");
 261         return ERR_STORE_MUST_ABORT;
 262     }
 263
 264     tm->cur_instr_len = struct_instr.instr_length;
 265
 266     /* handle jump instructions */
 267     tm_handle_ctrl_flow(core, tm, &instr_location, &struct_instr);
 268
 269     /* save next 10 bytes after current instruction, we'll put vmmcall here */
 270     tm_read_instr(core, instr_location, cur_instr, INSTR_INJECT_LEN);
 271
 272     /* store the next instruction and its length in info */
 273     memcpy(tm->dirty_instr, cur_instr, INSTR_INJECT_LEN);
 274
 275     return 0;
 276 }
 277
 278
 279 static int
 280 v3_overwrite_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
 281 {
 282     addr_t ptr;
 283
 284     // save rax
 285     tm->clobbered_rax = (core->vm_regs).rax;
 286
 287     ptr = core->rip;
 288
 289     /* we can't currently handle instructions that span page boundaries */
 290     if ((ptr + tm->cur_instr_len) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
 291         TM_ERR(core,OVERWRITE,"emulated instr straddling page boundary\n");
 292         return -1;
 293     }
 294
 295     ptr = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
 296
 297     if ((ptr + INSTR_INJECT_LEN) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
 298         TM_ERR(core,OVERWRITE,"injected instr straddling page boundary\n");
 299         return -1;
 300     }
 301
 302     if (v3_gva_to_hva(core,
 303                 get_addr_linear(core, ptr, &(core->segments.cs)),
 304                 &ptr) == -1) {
 305
 306         TM_ERR(core,Error,"Calculating next rip hva failed\n");
 307         return -1;
 308     }
 309
 310     TM_DBG(core,REPLACE,"Replacing next instruction at addr %llx with vmm hyper call, len=%d\n",
 311             core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0), (int)tm->cur_instr_len );
 312
 313     /* Copy VMM call into the memory address of beginning of next instruction (ptr) */
 314     memcpy((char*)ptr, vmmcall_bytes, INSTR_INJECT_LEN);
 315
 316     /* KCH: flag that we've dirtied an instruction, and store its host address */
 317     tm->dirty_instr_flag = 1;
 318     tm->dirty_gva        = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
 319     tm->dirty_hva        = ptr;
 320     tm->to_branch        = 0;
 321
 322     return 0;
 323 }
 324
 325
 326 /* entry points:
 327  *
 328  * this should only be called if TM_STATE == TM_NULL, additionally we check if our dirtied flag if set
 329  */
 330 int
 331 v3_restore_dirty_instr (struct guest_info * core)
 332 {
 333     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 334
 335     /* Restore next instruction, transition to IFETCH state */
 336     TM_DBG(core,RESTORE,"Restoring next instruction.\n");
 337
 338     /* check if we've actually done an instruction overwrite */
 339     if (!(tm->dirty_instr_flag)) {
 340         TM_DBG(core,RESTORE,"nothing to restore here...\n");
 341         return 0;
 342     }
 343
 344     // Actually restore instruction
 345     memcpy((char*)tm->dirty_hva, tm->dirty_instr, INSTR_INJECT_LEN);
 346
 347     // Put rax back
 348     (core->vm_regs).rax = tm->clobbered_rax;
 349
 350     // Scoot rip back up
 351     TM_DBG(core,RESTORE,"RIP in vmmcall: %llx\n", core->rip);
 352     core->rip = tm->dirty_gva;
 353
 354     // clean up
 355     tm->dirty_instr_flag = 0;
 356     tm->dirty_gva = 0;
 357     tm->dirty_hva = 0;
 358     memset(tm->dirty_instr, 0, 15);
 359
 360     TM_DBG(core,RESTORE,"RIP after scooting it back up: %llx\n", core->rip);
 361
 362     return 0;
 363 }
 364
 365
 366 static addr_t
 367 tm_handle_fault_ifetch (struct guest_info * core,
 368                         struct v3_trans_mem * tm)
 369 {
 370     int sto;
 371
 372     TM_DBG(core,IFETCH,"Page fault caused by IFETCH: rip is the same as faulting address, we must be at an ifetch.\n");
 373
 374     sto = v3_store_next_instr(core, tm);
 375
 376     if (sto == ERR_STORE_FAIL) {
 377         TM_ERR(core,EXIT,"Could not store next instruction in transaction\n");
 378         return ERR_TRANS_FAULT_FAIL;
 379     } else if (sto == ERR_STORE_MUST_ABORT) {
 380         TM_DBG(core,EXIT,"aborting for some reason\n");
 381         v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
 382         return TRANS_FAULT_OK;
 383     }
 384
 385     if (v3_overwrite_next_instr(core, tm) == -1) {
 386         TM_ERR(core,PF,"problem overwriting instruction\n");
 387         return ERR_TRANS_FAULT_FAIL;
 388     }
 389
 390     tm->TM_STATE = TM_EXEC;
 391
 392     return TRANS_FAULT_OK;
 393 }
 394
 395
 396 static addr_t
 397 tm_handle_fault_read (struct guest_info * core,
 398                       struct v3_trans_mem * tm,
 399                       addr_t fault_addr,
 400                       pf_error_t error)
 401
 402 {
 403     // This page fault was caused by a read to memory in the current instruction for a core in TM mode
 404     TM_DBG(core,DATA,"Page fault caused by read.\n");
 405     TM_DBG(core,PF,"Adding %p to read list and hash\n", (void*)fault_addr);
 406
 407     if (add_mem_op_to_list(&(tm->trans_r_list), fault_addr) == -1) {
 408         TM_ERR(core,PF,"problem adding to list\n");
 409         return ERR_TRANS_FAULT_FAIL;
 410     }
 411
 412     if (tm_record_access(tm, error.write, fault_addr) == -1) {
 413         TM_ERR(core,PF,"problem recording access\n");
 414         return ERR_TRANS_FAULT_FAIL;
 415     }
 416
 417     /* if we have previously written to this address, we need to update our
 418      * staging page and map it in */
 419     if (list_contains_guest_addr(&(tm->trans_w_list), fault_addr)) {
 420
 421         TM_DBG(core,PF,"Saw a read from something in the write list\n");
 422
 423         /* write the value from linked list to staging page */
 424         if (stage_entry(tm, &(tm->trans_w_list), fault_addr) == -1) {
 425             TM_ERR(core,PF, "could not stage entry!\n");
 426             return ERR_TRANS_FAULT_FAIL;
 427         }
 428
 429         /* Hand it the staging page */
 430         return (addr_t)(tm->staging_page);
 431
 432     } else {
 433
 434         //Add it to the read set
 435         addr_t shadow_addr = 0;
 436
 437         TM_DBG(core,PF,"Saw a read from a fresh address\n");
 438
 439         if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
 440             TM_ERR(core,PF,"Could not translate gva to hva for transaction read\n");
 441             return ERR_TRANS_FAULT_FAIL;
 442         }
 443
 444     }
 445
 446     return TRANS_FAULT_OK;
 447 }
 448
 449
 450 static addr_t
 451 tm_handle_fault_write (struct guest_info * core,
 452                        struct v3_trans_mem * tm,
 453                        addr_t fault_addr,
 454                        pf_error_t error)
 455 {
 456         void * data_loc;
 457         addr_t virt_data_loc;
 458         addr_t shadow_addr = 0;
 459
 460         TM_DBG(core,DATA,"Page fault cause by write\n");
 461         TM_DBG(core,PF,"Adding %p to write list and hash\n", (void*)fault_addr);
 462
 463         if (add_mem_op_to_list(&(tm->trans_w_list), fault_addr) == -1) {
 464             TM_ERR(core,WRITE,"could not add to list!\n");
 465             return ERR_TRANS_FAULT_FAIL;
 466         }
 467
 468         if (tm_record_access(tm, error.write, fault_addr) == -1) {
 469             TM_ERR(core,WRITE,"could not record access!\n");
 470             return ERR_TRANS_FAULT_FAIL;
 471         }
 472
 473         if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
 474             TM_ERR(core,WRITE,"could not translate gva to hva for transaction read\n");
 475             return ERR_TRANS_FAULT_FAIL;
 476         }
 477
 478         // Copy existing values to the staging page, populating that field
 479         // This avoids errors in optimized code such as ++, where the original
 480         // value is not read, but simply incremented
 481         data_loc = (void*)((addr_t)(tm->staging_page) + (shadow_addr % PAGE_SIZE_4KB));
 482
 483         if (v3_hpa_to_hva((addr_t)(data_loc), &virt_data_loc) == -1) {
 484             TM_ERR(core,WRITE,"Could not convert address on staging page to virt addr\n");
 485             return ERR_TRANS_FAULT_FAIL;
 486         }
 487
 488         TM_DBG(core,WRITE,"\tValue being copied (core %d): %p\n", core->vcpu_id, *((void**)(virt_data_loc)));
 489         //memcpy((void*)virt_data_loc, (void*)shadow_addr, sizeof(uint64_t));
 490         *(uint64_t*)virt_data_loc = *(uint64_t*)shadow_addr;
 491
 492         return (addr_t)(tm->staging_page);
 493 }
 494
 495
 496 static addr_t
 497 tm_handle_fault_extern_ifetch (struct guest_info * core,
 498                                struct v3_trans_mem * tm,
 499                                addr_t fault_addr,
 500                                pf_error_t error)
 501 {
 502     int sto;
 503
 504     // system is in tm state, record the access
 505     TM_DBG(core,IFETCH,"Page fault caused by IFETCH: we are not in TM, recording.\n");
 506
 507     sto = v3_store_next_instr(core,tm);
 508
 509     if (sto == ERR_STORE_FAIL) {
 510         TM_ERR(core,Error,"Could not store next instruction in transaction\n");
 511         return ERR_TRANS_FAULT_FAIL;
 512
 513     } else if (sto == ERR_STORE_MUST_ABORT) {
 514         TM_ERR(core,IFETCH,"decode failed, going out of single stepping\n");
 515         v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
 516         return TRANS_FAULT_OK;
 517     }
 518
 519     if (v3_overwrite_next_instr(core, tm) == -1) {
 520         TM_ERR(core,IFETCH,"could not overwrite next instr!\n");
 521         return ERR_TRANS_FAULT_FAIL;
 522     }
 523
 524     tm->TM_STATE = TM_EXEC;
 525
 526     if (tm_record_access(tm, error.write, fault_addr) == -1) {
 527         TM_ERR(core,IFETCH,"could not record access!\n");
 528         return ERR_TRANS_FAULT_FAIL;
 529     }
 530
 531     return TRANS_FAULT_OK;
 532 }
 533
 534
 535 static addr_t
 536 tm_handle_fault_extern_access (struct guest_info * core,
 537                                struct v3_trans_mem * tm,
 538                                addr_t fault_addr,
 539                                pf_error_t error)
 540 {
 541     TM_DBG(core,PF_HANDLE,"recording access\n");
 542     if (tm_record_access(tm, error.write, fault_addr) == -1) {
 543         TM_ERR(core,PF_HANDLE,"could not record access!\n");
 544         return ERR_TRANS_FAULT_FAIL;
 545     }
 546
 547     return TRANS_FAULT_OK;
 548 }
 549
 550
 551 static addr_t
 552 tm_handle_fault_tmoff (struct guest_info * core)
 553 {
 554     TM_DBG(core,PF_HANDLE, "in pf handler but noone is in tm mode anymore (core %d), i should try to eliminate hypercalls\n", core->vcpu_id);
 555
 556     if (v3_restore_dirty_instr(core) == -1) {
 557         TM_ERR(core,PF_HANDLE,"could not restore dirty instr!\n");
 558         return ERR_TRANS_FAULT_FAIL;
 559     }
 560
 561     return TRANS_FAULT_OK;
 562 }
 563
 564
 565 /* entry points:
 566  *
 567  * called from MMU - should mean at least tms->TM_MODE is on
 568  *
 569  * tm->on : ifetch -> store instr, overwrite instr
 570  *          r/w    -> record hash, write log, store instr, overwrite instr
 571  * tm->off: ifetch -> store instr, overwrite instr
 572  *          r/w    -> record hash, store instr, overwrite instr
 573  *
 574  *          returns ERR_TRANS_FAULT_FAIL on error
 575  *          TRANS_FAULT_OK when things are fine
 576  *          addr when we're passing back a staging page
 577  *
 578  */
 579 addr_t
 580 v3_handle_trans_mem_fault (struct guest_info * core,
 581                                   addr_t fault_addr,
 582                                   pf_error_t error)
 583 {
 584     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 585     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
 586
 587     if (!tm) {
 588         TM_ERR(core,ERROR,": coudln't get core state\n");
 589         return ERR_TRANS_FAULT_FAIL;
 590     }
 591
 592     if (!tms) {
 593         TM_ERR(core,ERROR,": couldn't get vm trans_mem state\n");
 594         return ERR_TRANS_FAULT_FAIL;
 595     }
 596
 597     TM_DBG(core,PF,"PF handler core->mode : %d, system->mode : %d\n", tm->TM_MODE, tms->TM_MODE);
 598
 599     if ((tm->TM_MODE == TM_ON) &&
 600         ((void *)fault_addr == (void *)(core->rip))) {
 601
 602         return tm_handle_fault_ifetch(core, tm);
 603
 604     } else if ((tm->TM_MODE == TM_ON)    &&
 605                (tm->TM_STATE == TM_EXEC) &&
 606                (error.write == 0)) {
 607
 608         return tm_handle_fault_read(core, tm, fault_addr, error);
 609
 610     } else if ((tm->TM_MODE == TM_ON)    &&
 611                (tm->TM_STATE == TM_EXEC) &&
 612                (error.write == 1)) {
 613
 614         return tm_handle_fault_write(core, tm, fault_addr, error);
 615
 616
 617     } else if ((tms->TM_MODE == TM_ON) &&
 618               ((void *)fault_addr == (void *)(core->rip))) {
 619
 620         return tm_handle_fault_extern_ifetch(core, tm, fault_addr, error);
 621
 622     } else if ((tms->TM_MODE == TM_ON) &&
 623                (tm->TM_STATE == TM_EXEC)) {
 624
 625         return tm_handle_fault_extern_access(core, tm, fault_addr, error);
 626     } else {
 627
 628         return tm_handle_fault_tmoff(core);
 629
 630     }
 631
 632     return TRANS_FAULT_OK;
 633 }
 634
 635
 636 static int
 637 tm_handle_hcall_tmoff (struct guest_info * core, struct v3_trans_mem * tm)
 638 {
 639     if (tm->TM_MODE == TM_ON) {
 640         TM_ERR(core,EXIT,"we are in tm mode but system is not!\n");
 641         return TRANS_HCALL_FAIL;
 642     }
 643
 644     // we got to an exit when things were off!
 645     TM_DBG(core,EXIT,"system is off, restore the instruction and go away\n");
 646
 647     if (v3_restore_dirty_instr(core) == -1) {
 648         TM_ERR(core,HCALL,"could not restore dirty instr!\n");
 649         return TRANS_HCALL_FAIL;
 650     }
 651
 652     tm->TM_STATE = TM_NULL;
 653
 654     return TRANS_HCALL_OK;
 655 }
 656
 657
 658 static int
 659 tm_handle_hcall_dec_abort (struct guest_info * core,
 660                            struct v3_trans_mem * tm)
 661 {
 662     // only ever get here from TM DECODE
 663     TM_DBG(core,EXIT,"we are in ABORT, call the abort handler\n");
 664     tm->TM_ABORT = 0;
 665
 666     v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
 667
 668     TM_DBG(core,EXIT,"RIP after abort: %p\n", ((void*)(core->rip)));
 669
 670     return TRANS_HCALL_OK;
 671 }
 672
 673
 674 static int
 675 tm_handle_hcall_ifetch_start (struct guest_info * core,
 676                               struct v3_trans_mem * tm)
 677 {
 678     tm->TM_STATE = TM_IFETCH;
 679
 680     TM_DBG(core,EXIT,"VMEXIT after TM_EXEC, blast away VTLB and go into TM_IFETCH\n");
 681
 682     // Finally, invalidate the shadow page table
 683     v3_invalidate_shadow_pts(core);
 684
 685     return TRANS_HCALL_OK;
 686 }
 687
 688
 689 static int
 690 tm_check_list_conflict (struct guest_info * core,
 691                         struct v3_trans_mem * tm,
 692                         struct list_head * access_list,
 693                         v3_tm_op_t op_type)
 694 {
 695     struct mem_op * curr = NULL;
 696     struct mem_op * tmp  = NULL;
 697     int conflict = 0;
 698
 699     list_for_each_entry_safe(curr, tmp, access_list, op_node) {
 700
 701         conflict = tm_check_conflict(tm->ginfo->vm_info, curr->guest_addr, op_type, core->vcpu_id, tm->t_num);
 702
 703         if (conflict == ERR_CHECK_FAIL) {
 704
 705             TM_ERR(core,EXIT,"error checking for conflicts\n");
 706             return TRANS_HCALL_FAIL;
 707
 708         } else if (conflict == CHECK_IS_CONFLICT) {
 709
 710             TM_DBG(core,EXIT,"we have a conflict, aborting\n");
 711             v3_handle_trans_abort(core, TM_ABORT_CONFLICT, 0);
 712             return CHECK_MUST_ABORT;
 713
 714         }
 715
 716     }
 717
 718     return TRANS_HCALL_OK;
 719 }
 720
 721
 722 static int
 723 tm_handle_hcall_check_conflicts (struct guest_info * core,
 724                                  struct v3_trans_mem * tm)
 725 {
 726     int ret;
 727
 728     TM_DBG(core,EXIT,"still TM_ON\n");
 729     TM_DBG(core,EXIT,"checking for conflicts\n");
 730
 731     if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_w_list), OP_TYPE_WRITE)) == TRANS_HCALL_FAIL) {
 732         return TRANS_HCALL_FAIL;
 733     } else if (ret == CHECK_MUST_ABORT) {
 734         return TRANS_HCALL_OK;
 735     }
 736
 737     if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_r_list), OP_TYPE_READ)) == TRANS_HCALL_FAIL) {
 738         return TRANS_HCALL_FAIL;
 739     } else if (ret == CHECK_MUST_ABORT) {
 740         return TRANS_HCALL_OK;
 741     }
 742
 743     tm->TM_STATE = TM_IFETCH;
 744
 745     return TRANS_HCALL_OK;
 746 }
 747
 748
 749 /* trans mem hypercall handler
 750  * entry points:
 751  *
 752  * running mime (tm or tms on)
 753  *   update record log
 754  *   restore instr
 755  *   overwrite instr
 756  *   check for conflicts
 757  *   flush vtlb
 758  * abort (due to quix86)
 759  *   restore instr
 760  *   set all to abort
 761  */
 762 static int
 763 tm_handle_hcall (struct guest_info * core,
 764                  unsigned int hcall_id,
 765                  void * priv_data)
 766 {
 767     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 768     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
 769
 770     if (tms->TM_MODE == TM_OFF) {
 771         return tm_handle_hcall_tmoff(core, tm);
 772     }
 773
 774     // Previous instruction has finished, copy staging page back into linked list!
 775     if (update_list(tm, &(tm->trans_w_list)) == -1) {
 776         TM_ERR(core,HCALL,"could not update_list!\n");
 777         return TRANS_HCALL_FAIL;
 778     }
 779
 780     // Done handling previous instruction, must put back the next instruction, reset %rip and go back to IFETCH state
 781     TM_DBG(core,EXIT,"saw VMEXIT, need to restore previous state and proceed\n");
 782
 783     if (v3_restore_dirty_instr(core) == -1) {
 784         TM_ERR(core,HCALL,"could not restore dirty instr!\n");
 785         return TRANS_HCALL_FAIL;
 786     }
 787
 788     /* Check TM_STATE */
 789     if (tm->TM_ABORT == 1 &&
 790         tms->TM_MODE == TM_ON) {
 791
 792         return tm_handle_hcall_dec_abort(core, tm);
 793
 794     } else if (tm->TM_STATE == TM_EXEC) {
 795         return tm_handle_hcall_ifetch_start(core, tm);
 796     }
 797
 798     /* Check TM_MODE */
 799     if (tm->TM_MODE == TM_ON &&
 800         tms->TM_MODE == TM_ON) {
 801
 802         return tm_handle_hcall_check_conflicts(core, tm);
 803
 804     } else if (tm->TM_MODE == TM_OFF) {
 805         TM_DBG(core,EXIT,"we are in TM_OFF\n");
 806     }
 807
 808     return TRANS_HCALL_OK;
 809 }
 810
 811
 812 int
 813 v3_tm_inc_tnum (struct v3_trans_mem * tm)
 814 {
 815     addr_t irqstate;
 816     uint64_t new_ctxt;
 817     uint64_t * lt;
 818
 819     lt = tm_global_state->last_trans;
 820
 821     // grab global last_trans
 822     irqstate = v3_lock_irqsave(tm_global_state->lock);
 823     new_ctxt = ++(lt[tm->ginfo->vcpu_id]);
 824     v3_unlock_irqrestore(tm_global_state->lock, irqstate);
 825
 826     tm->t_num++;
 827     /*
 828     TM_DBG(tm->ginfo,INC TNUM,"global state is |%d|%d|, my tnum is %d\n", (int)lt[0],
 829                                                                         (int)lt[1], (int)tm->t_num);
 830                                                                         */
 831     if (new_ctxt != tm->t_num) {
 832         TM_ERR(tm->ginfo,TM_INC_TNUM,"misaligned global and local context value\n");
 833         return -1;
 834     }
 835
 836     return 0;
 837 }
 838
 839
 840 static void
 841 tm_set_abort_status (struct guest_info * core,
 842                      tm_abrt_cause_t cause,
 843                      uint8_t xabort_reason)
 844 {
 845     core->vm_regs.rax = 0;
 846
 847     switch (cause) {
 848         case TM_ABORT_XABORT:
 849             // we put the xabort immediate in eax 31:24
 850             // cause is zero
 851             core->vm_regs.rax |= (xabort_reason << 24);
 852             break;
 853         case TM_ABORT_CONFLICT:
 854             // if this was a conflict from another core, it may work
 855             // if we try again
 856             core->vm_regs.rax |= (1 << ABORT_CONFLICT) | (1 << ABORT_RETRY);
 857             break;
 858         case TM_ABORT_INTERNAL:
 859         case TM_ABORT_BKPT:
 860             core->vm_regs.rax |= (1 << cause);
 861             break;
 862         case TM_ABORT_UNSPECIFIED:
 863             // just return 0 in EAX
 864             break;
 865         default:
 866             TM_ERR(core, ABORT, "invalid abort cause\n");
 867             break;
 868     }
 869 }
 870
 871
 872 // xabort_reason is only used for XABORT instruction
 873 int
 874 v3_handle_trans_abort (struct guest_info * core,
 875                        tm_abrt_cause_t cause,
 876                        uint8_t xabort_reason)
 877 {
 878     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 879
 880     // Free the staging page
 881     if (v3_free_staging_page(tm) == -1) {
 882         TM_ERR(core,ABORT,"problem freeing staging page\n");
 883         return -1;
 884     }
 885
 886     // Clear the VTLB which still has our staging page in it
 887     if (v3_clr_vtlb(core) == -1) {
 888         TM_ERR(core,ABORT,"problem clearing vtlb\n");
 889         return -1;
 890     }
 891
 892     // Free the lists
 893     v3_clear_tm_lists(tm);
 894
 895     TM_DBG(core,ABORT -- handler,"TM_MODE: %d | RIP: %llx | XABORT RIP: %llx\n", tm->TM_MODE, (uint64_t)core->rip, (uint64_t)tm->fail_call);
 896
 897     if (tm->TM_MODE == TM_ON) {
 898         TM_DBG(core,ABORT,"Setting RIP to %llx\n", (uint64_t)tm->fail_call);
 899         core->rip = tm->fail_call;
 900
 901         // Turn TM off
 902         v3_clr_tm(tm);
 903
 904         // transaction # ++
 905         v3_tm_inc_tnum(tm);
 906     }
 907
 908     tm_set_abort_status(core, cause, xabort_reason);
 909
 910     // time to garbage collect
 911     if (tm_hash_gc(tm) == -1) {
 912         TM_ERR(core,GC,"could not gc!\n");
 913         return -1;
 914     }
 915
 916     return 0;
 917 }
 918
 919
 920 static uint_t
 921 tm_hash_fn (addr_t key)
 922 {
 923     return v3_hash_long(key, sizeof(void *));
 924 }
 925
 926
 927 static int
 928 tm_eq_fn (addr_t key1, addr_t key2)
 929 {
 930     return (key1 == key2);
 931 }
 932
 933
 934 static uint_t
 935 tm_hash_buf_fn (addr_t key)
 936 {
 937     return v3_hash_long(key, sizeof(addr_t));
 938 }
 939
 940
 941 static int
 942 tm_eq_buf_fn(addr_t key1, addr_t key2)
 943 {
 944     return (key1 == key2);
 945 }
 946
 947
 948 /* this checks if the remote access was done on the same
 949  * local transaction number as the current one */
 950 static int
 951 tm_check_context (struct v3_vm_info * vm,
 952                   addr_t gva,
 953                   uint64_t core_num,
 954                   uint64_t curr_ctxt,
 955                   uint64_t * curr_lt,
 956                   v3_tm_op_t op_type)
 957 {
 958     uint64_t  core_id_sub;
 959     struct v3_tm_access_type * type = NULL;
 960
 961     for (core_id_sub = 0; core_id_sub < vm->num_cores; core_id_sub++) {
 962         struct v3_trans_mem * remote_tm;
 963         void * buf[3];
 964         addr_t key;
 965
 966         /* skip the core that's doing the checking */
 967         if (core_id_sub == core_num) {
 968             continue;
 969         }
 970
 971         remote_tm = v3_get_ext_core_state(&(vm->cores[core_id_sub]), "trans_mem");
 972         if (!remote_tm) {
 973             PrintError(vm, VCORE_NONE, "Could not get ext core state for core %llu\n", core_id_sub);
 974             return ERR_CHECK_FAIL;
 975         }
 976
 977         buf[0] = (void *)gva;
 978         buf[1] = (void *)core_id_sub;
 979         buf[2] = (void *)curr_lt[core_id_sub];
 980
 981         key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
 982
 983         type = (struct v3_tm_access_type *)HTABLE_SEARCH(remote_tm->access_type, key);
 984
 985         if (type) {
 986             // conflict!
 987             if ( (op_type == OP_TYPE_WRITE && (type->w || type->r)) || // so basically if write?
 988                     (op_type != OP_TYPE_WRITE && type->w)) {
 989                 return CHECK_IS_CONFLICT;
 990             }
 991         }
 992     }
 993
 994     return CHECK_NO_CONFLICT;
 995 }
 996
 997
 998 /* check all the contexts in the list for a conflict */
 999 static int
1000 tm_check_all_contexts (struct v3_vm_info * vm,
1001                        struct list_head * hash_list,
1002                        addr_t   gva,
1003                        v3_tm_op_t  op_type,
1004                        uint64_t core_num,
1005                        uint64_t curr_ctxt)
1006 {
1007     struct hash_chain * curr = NULL;
1008     struct hash_chain * tmp  = NULL;
1009     uint64_t * curr_lt       = NULL;
1010     int ret = 0;
1011
1012     list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1013
1014         curr_lt = curr->curr_lt;
1015
1016         if (curr_lt[core_num] == curr_ctxt) {
1017
1018             ret = tm_check_context(vm, gva, core_num, curr_ctxt, curr_lt, op_type);
1019
1020             if (ret == ERR_CHECK_FAIL) {
1021                 return ERR_CHECK_FAIL;
1022             } else if (ret == CHECK_IS_CONFLICT) {
1023                 return CHECK_IS_CONFLICT;
1024             }
1025
1026         }
1027
1028     }
1029
1030     return CHECK_NO_CONFLICT;
1031 }
1032
1033
1034 /* The following access patterns trigger an abort:
1035  * We: Read     |   Anyone Else: Write
1036  * We: Write    |   Anyone Else: Read, Write
1037  *
1038  * (pg 8-2 of haswell manual)
1039  *
1040  * returns ERR_CHECK_FAIL on error
1041  * returns CHECK_IS_CONFLICT if there is a conflict
1042  * returns CHECK_NO_CONFLICT  if there isn't
1043  */
1044 int
1045 tm_check_conflict (struct v3_vm_info * vm,
1046                    addr_t gva,
1047                    v3_tm_op_t op_type,
1048                    uint64_t core_num,
1049                    uint64_t curr_ctxt)
1050 {
1051     uint64_t core_id;
1052
1053     /* loop over other cores -> core_id */
1054     for (core_id = 0; core_id < vm->num_cores; core_id++) {
1055
1056         struct guest_info * core = NULL;
1057         struct v3_trans_mem * tm = NULL;
1058         struct list_head * hash_list;
1059
1060         /* only check other cores */
1061         if (core_id == core_num) {
1062             continue;
1063         }
1064
1065         core = &(vm->cores[core_id]);
1066         tm = (struct v3_trans_mem*)v3_get_ext_core_state(core, "trans_mem");
1067
1068         if (!tm) {
1069             PrintError(vm, VCORE_NONE, "+++ TM ERROR +++ Couldn't get core state for core %llu\n", core_id);
1070             return ERR_CHECK_FAIL;
1071         }
1072
1073         /* this core didn't access the address, move on */
1074         if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1075             continue;
1076
1077         } else {
1078
1079             /* loop over chained hash for gva, find fields with curr_ctxt -> curr_lt*/
1080             int ret = tm_check_all_contexts(vm, hash_list, gva, op_type, core_num, curr_ctxt);
1081
1082             if (ret == ERR_CHECK_FAIL) {
1083                 return ERR_CHECK_FAIL;
1084             } else if (ret == CHECK_IS_CONFLICT) {
1085                 return CHECK_IS_CONFLICT;
1086             }
1087
1088         }
1089     }
1090
1091     return CHECK_NO_CONFLICT;
1092 }
1093
1094
1095 static int
1096 tm_need_to_gc (struct v3_trans_mem * tm,
1097                struct hash_chain * curr,
1098                uint64_t * lt_copy,
1099                uint64_t tmoff)
1100 {
1101     uint64_t to_gc = 1;
1102     uint64_t i;
1103
1104     /* if none of the cores are in transactional context,
1105      * we know we can collect this context
1106      */
1107     if (!tmoff) {
1108
1109         for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1110             /* if *any* of the cores are active in a transaction
1111              * number that is current (listed in this context),
1112              * we know we can't collect this context, as it
1113              * will be needed when that core's transaction ends
1114              */
1115             if (curr->curr_lt[i] >= lt_copy[i]) {
1116                 to_gc = 0;
1117                 break;
1118             }
1119         }
1120
1121     }
1122     return to_gc;
1123 }
1124
1125
1126 static void
1127 tm_del_stale_ctxt (struct hash_chain * curr)
1128 {
1129         list_del(&(curr->lt_node));
1130         V3_Free(curr->curr_lt);
1131         V3_Free(curr);
1132 }
1133
1134
1135 static void
1136 tm_del_acc_entry (struct v3_trans_mem * tm, addr_t key)
1137 {
1138     v3_htable_remove(tm->access_type, key, 0);
1139     (tm->access_type_entries)--;
1140 }
1141
1142
1143 static int
1144 tm_collect_context (struct v3_trans_mem * tm,
1145                     struct hashtable_iter * ctxt_iter,
1146                     struct hash_chain * curr,
1147                     uint64_t * begin_time,
1148                     uint64_t * end_time,
1149                     addr_t gva)
1150 {
1151         uint64_t i;
1152
1153         for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1154             void * buf[3];
1155             struct v3_tm_access_type * type;
1156             addr_t key;
1157
1158             rdtscll(*end_time);
1159             if ((*end_time - *begin_time) > 100000000) {
1160                 TM_ERR(tm->ginfo,GC,"time threshhold exceeded, exiting!!!\n");
1161                 return -1;
1162             }
1163
1164             buf[0] = (void *)gva;
1165             buf[1] = (void *)i;
1166             buf[2] = (void *)curr->curr_lt[i];
1167
1168             key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
1169
1170             type = (struct v3_tm_access_type *)v3_htable_search(tm->access_type, key);
1171
1172             if (!type) { // something has gone terribly wrong
1173                 TM_ERR(tm->ginfo,GC,"could not find accesstype entry to gc, THIS! IS! WRONG!\n");
1174                 return -1;
1175             }
1176
1177             /* delete the access type entry */
1178             tm_del_acc_entry(tm, key);
1179         }
1180
1181         /* delete the stale context */
1182         tm_del_stale_ctxt(curr);
1183
1184         return 0;
1185 }
1186
1187
1188 static int
1189 tm_collect_all_contexts (struct v3_trans_mem * tm,
1190                          struct hashtable_iter * ctxt_iter,
1191                          uint64_t tmoff,
1192                          uint64_t * lt_copy,
1193                          uint64_t * begin_time,
1194                          uint64_t * end_time)
1195 {
1196     struct hash_chain * tmp;
1197     struct hash_chain * curr;
1198     struct list_head * chain_list;
1199     addr_t gva;
1200
1201     gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1202
1203     chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1204
1205     /* this is a chained hash, so for each address, we will have
1206      * a list of contexts. We now check each context to see
1207      * whether or not it can be collected
1208      */
1209     list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1210
1211         uint64_t to_gc = tm_need_to_gc(tm, curr, lt_copy, tmoff);
1212
1213         /* not garbage, go on to the next context in the list */
1214         if (!to_gc) {
1215             TM_DBG(tm->ginfo,GC,"not garbage collecting entries for address %llx\n", (uint64_t)gva);
1216             continue;
1217         }
1218
1219         TM_DBG(tm->ginfo,GC,"garbage collecting entries for address %llx\n", (uint64_t)gva);
1220
1221         /* found one, delete corresponding entries in access_type */
1222         if (tm_collect_context(tm, ctxt_iter, curr, begin_time, end_time, gva) == -1) {
1223             TM_ERR(tm->ginfo,GC,"ERROR collecting context\n");
1224             return -1;
1225         }
1226
1227     }
1228
1229     /* if context list (hash chain) is now empty, remove the hash entry */
1230     if (list_empty(chain_list)) {
1231         v3_htable_iter_remove(ctxt_iter, 0);
1232         (tm->addr_ctxt_entries)--;
1233     } else {
1234         v3_htable_iter_advance(ctxt_iter);
1235     }
1236
1237     /* give the CPU away NONONO NEVER YIELD WHILE HOLDING A LOCK */
1238     //V3_Yield();
1239
1240     return 0;
1241 }
1242
1243
1244 int
1245 tm_hash_gc (struct v3_trans_mem * tm)
1246 {
1247     addr_t irqstate, irqstate2;
1248     int rc = 0;
1249     uint64_t begin_time, end_time, tmoff;
1250     uint64_t * lt_copy;
1251     struct v3_tm_state * tms = NULL;
1252     struct hashtable_iter * ctxt_iter = NULL;
1253
1254     tms = (struct v3_tm_state *)v3_get_extension_state(tm->ginfo->vm_info, "trans_mem");
1255     if (!tms) {
1256         TM_ERR(tm->ginfo,GC,"could not alloc tms\n");
1257         return -1;
1258     }
1259
1260     TM_DBG(tm->ginfo,GC,"beginning garbage collection\n");
1261     TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (pre)\n", (int)v3_htable_count(tm->addr_ctxt));
1262     TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (pre)\n", (int)v3_htable_count(tm->access_type));
1263
1264     tmoff = (tms->cores_active == 0);
1265
1266     lt_copy = V3_Malloc(sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1267     if (!lt_copy) {
1268         TM_ERR(tm->ginfo,GC,"Could not allocate space for lt_copy\n");
1269         return -1;
1270     }
1271
1272     memset(lt_copy, 0, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1273
1274     rdtscll(begin_time);
1275
1276     /* lt_copy holds the last transaction number for each core */
1277     irqstate = v3_lock_irqsave(tm_global_state->lock);
1278     memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1279     v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1280
1281     /* lock both hashes */
1282     irqstate = v3_lock_irqsave(tm->addr_ctxt_lock);
1283     irqstate2 = v3_lock_irqsave(tm->access_type_lock);
1284
1285     /* loop over hash entries in addr_ctxt */
1286     ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1287     if (!ctxt_iter) {
1288         TM_ERR(tm->ginfo,GC,"could not create htable iterator\n");
1289         rc = -1;
1290         goto out;
1291     }
1292
1293     /* we check each address stored in the hash */
1294     while (ctxt_iter->entry) {
1295         /* NOTE: this call advances the hash iterator */
1296         if (tm_collect_all_contexts(tm, ctxt_iter, tmoff, lt_copy, &begin_time, &end_time) == -1) {
1297             rc = -1;
1298             goto out1;
1299         }
1300     }
1301
1302 out1:
1303     v3_destroy_htable_iter(ctxt_iter);
1304 out:
1305     V3_Free(lt_copy);
1306     v3_unlock_irqrestore(tm->access_type_lock, irqstate);
1307     v3_unlock_irqrestore(tm->addr_ctxt_lock, irqstate2);
1308
1309     rdtscll(end_time);
1310
1311     if (rc == -1) {
1312         TM_ERR(tm->ginfo,GC,"garbage collection failed, time spent: %d cycles\n", (int)(end_time - begin_time));
1313     } else {
1314         TM_DBG(tm->ginfo,GC,"ended garbage collection succesfuly, time spent: %d cycles\n", (int)(end_time - begin_time));
1315     }
1316
1317     TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (post)\n", (int)v3_htable_count(tm->addr_ctxt));
1318     TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (post)\n", (int)v3_htable_count(tm->access_type));
1319
1320     return rc;
1321 }
1322
1323
1324 /* TODO: break out the for loops in these functions */
1325 static int
1326 tm_update_ctxt_list (struct v3_trans_mem * tm,
1327                      uint64_t * lt_copy,
1328                      addr_t gva,
1329                      uint8_t write,
1330                      struct list_head * hash_list)
1331 {
1332     struct hash_chain * curr = NULL;
1333     struct hash_chain * tmp  = NULL;
1334     uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1335     uint64_t core_id;
1336     uint_t new_le = 1;
1337     uint_t new_e;
1338
1339     list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1340         uint_t i;
1341         uint8_t same = 1;
1342
1343         for (i = 0; i < num_cores; i++) {
1344             if (curr->curr_lt[i] != lt_copy[i]) {
1345                 same = 0;
1346                 break;
1347             }
1348         }
1349
1350         if (same) {
1351             new_le = 0;
1352             break;
1353         }
1354
1355     }
1356
1357     if (new_le) {
1358         struct hash_chain * new_l = V3_Malloc(sizeof(struct hash_chain));
1359
1360         if (!new_l) {
1361             TM_ERR(tm->ginfo,HASH,"Could not allocate new list\n");
1362             return -1;
1363         }
1364
1365         memset(new_l, 0, sizeof(struct hash_chain));
1366
1367         new_l->curr_lt = lt_copy;
1368
1369         list_add_tail(&(new_l->lt_node), hash_list);
1370     }
1371
1372     for (core_id = 0; core_id < num_cores; core_id++) {
1373         struct v3_tm_access_type * type;
1374         struct v3_ctxt_tuple tup;
1375         tup.gva     = (void*)gva;
1376         tup.core_id = (void*)core_id;
1377         tup.core_lt = (void*)lt_copy[core_id];
1378         addr_t key;
1379
1380         key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1381
1382         new_e = 0;
1383
1384         type = (struct v3_tm_access_type *)HTABLE_SEARCH(tm->access_type, key);
1385
1386         if (!type) {
1387             // no entry yet
1388             new_e = 1;
1389             type = V3_Malloc(sizeof(struct v3_tm_access_type));
1390
1391             if (!type) {
1392                 TM_ERR(tm->ginfo,HASH,"could not allocate type access struct\n");
1393                 return -1;
1394             }
1395         }
1396
1397         if (write) {
1398             type->w = 1;
1399         } else {
1400             type->r = 1;
1401         }
1402
1403         if (new_e) {
1404             if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1405                 TM_ERR(tm->ginfo,HASH,"problem inserting new mem access in htable\n");
1406                 return -1;
1407             }
1408             (tm->access_type_entries)++;
1409         }
1410     }
1411
1412     return 0;
1413 }
1414
1415
1416 /* no entry in addr-ctxt yet, create one */
1417 static int
1418 tm_create_ctxt_key (struct v3_trans_mem * tm,
1419                     uint64_t * lt_copy,
1420                     addr_t gva,
1421                     uint8_t write)
1422 {
1423     struct list_head * hash_list = NULL;
1424     struct hash_chain * new_l = NULL;
1425     uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1426
1427     hash_list = (struct list_head *)V3_Malloc(sizeof(struct list_head));
1428
1429     if (!hash_list) {
1430         TM_ERR(tm->ginfo,HASH,"Problem allocating hash_list\n");
1431         return -1;
1432     }
1433
1434     INIT_LIST_HEAD(hash_list);
1435
1436     new_l = V3_Malloc(sizeof(struct hash_chain));
1437
1438     if (!new_l) {
1439         TM_ERR(tm->ginfo,HASH,"Problem allocating hash_chain\n");
1440         goto out_err;
1441     }
1442
1443     memset(new_l, 0, sizeof(struct hash_chain));
1444
1445     new_l->curr_lt = lt_copy;
1446
1447     /* add the context to the hash chain */
1448     list_add_tail(&(new_l->lt_node), hash_list);
1449
1450     if (!(HTABLE_INSERT(tm->addr_ctxt, gva, hash_list))) {
1451         TM_ERR(tm->ginfo,HASH CHAIN,"problem inserting new chain into hash\n");
1452         goto out_err1;
1453     }
1454
1455     (tm->addr_ctxt_entries)++;
1456
1457     uint64_t core_id;
1458     /* TODO: we need a way to unwind and deallocate for all cores on failure here */
1459     for (core_id = 0; core_id < num_cores; core_id++) {
1460         struct v3_tm_access_type * type = NULL;
1461         struct v3_ctxt_tuple tup;
1462         tup.gva     = (void*)gva;
1463         tup.core_id = (void*)core_id;
1464         tup.core_lt = (void*)lt_copy[core_id];
1465         addr_t key;
1466
1467         type = V3_Malloc(sizeof(struct v3_tm_access_type));
1468
1469         if (!type) {
1470             TM_ERR(tm->ginfo,HASH,"could not allocate access type struct\n");
1471             goto out_err1;
1472         }
1473
1474         if (write) {
1475             type->w = 1;
1476         } else {
1477             type->r = 1;
1478         }
1479
1480         key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1481
1482         if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1483             TM_ERR(tm->ginfo,HASH,"TM: problem inserting new mem access in htable\n");
1484             goto out_err1;
1485         }
1486         (tm->access_type_entries)++;
1487     }
1488
1489     return 0;
1490
1491 out_err1:
1492     list_del(&(new_l->lt_node));
1493     V3_Free(new_l);
1494 out_err:
1495     V3_Free(hash_list);
1496     return -1;
1497 }
1498
1499
1500 /* entry points:
1501  *
1502  * called during MIME execution
1503  * record memory access in conflict logs
1504  *   this locks the table during insertion
1505  */
1506 int
1507 tm_record_access (struct  v3_trans_mem * tm,
1508                   uint8_t write,
1509                   addr_t  gva)
1510 {
1511     uint64_t * lt_copy;
1512     struct list_head * hash_list;
1513     addr_t irqstate;
1514     uint64_t num_cores;
1515
1516     num_cores = tm->ginfo->vm_info->num_cores;
1517
1518     TM_DBG(tm->ginfo,REC,"recording addr %llx, addr-ctxt.cnt = %d, access-type.cnt = %d\n", (uint64_t)gva,
1519                                         (int)v3_htable_count(tm->addr_ctxt), (int)v3_htable_count(tm->access_type));
1520     //PrintDebug(tm->ginfo->vm_info, tm->ginfo,"\tWe think that addr-ctxt.cnt = %d, access-type.cnt = %d\n",(int)tm->addr_ctxt_entries,(int)tm->access_type_entries);
1521
1522     lt_copy = V3_Malloc(sizeof(uint64_t)*num_cores);
1523     if (!lt_copy) {
1524         TM_ERR(tm->ginfo,REC,"Allocating array failed\n");
1525         return -1;
1526     }
1527
1528     memset(lt_copy, 0, sizeof(uint64_t)*num_cores);
1529
1530     irqstate = v3_lock_irqsave(tm_global_state->lock);
1531     memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*num_cores);
1532     v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1533
1534     if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1535         /* we haven't created a context list for this address yet, go do it */
1536         return tm_create_ctxt_key(tm, lt_copy, gva, write);
1537
1538     } else {
1539         /* we have a context list for this addres already, do we need to create a new context? */
1540         return tm_update_ctxt_list(tm, lt_copy, gva, write, hash_list);
1541     }
1542
1543     return 0;
1544 }
1545
1546
1547 static void
1548 tm_prepare_cpuid (struct v3_vm_info * vm)
1549 {
1550
1551     V3_Print(vm, VCORE_NONE, "TM INIT | enabling RTM cap in CPUID\n");
1552
1553     /* increase max CPUID function to 7 (extended feature flags enumeration) */
1554     v3_cpuid_add_fields(vm,0x0,
1555             0xf, 0x7,
1556             0, 0,
1557             0, 0,
1558             0, 0);
1559
1560
1561     /* do the same for AMD */
1562     v3_cpuid_add_fields(vm,0x80000000,
1563             0xffffffff, 0x80000007,
1564             0, 0,
1565             0, 0,
1566             0, 0);
1567
1568
1569     /* enable RTM (CPUID.07H.EBX.RTM = 1) */
1570     v3_cpuid_add_fields(vm, 0x07, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1571     v3_cpuid_add_fields(vm, 0x80000007, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1572 }
1573
1574
1575 static int
1576 init_trans_mem (struct v3_vm_info * vm,
1577                 v3_cfg_tree_t * cfg,
1578                 void ** priv_data)
1579 {
1580     struct v3_tm_state * tms;
1581
1582     PrintDebug(vm, VCORE_NONE, "Trans Mem. Init\n");
1583
1584     tms = V3_Malloc(sizeof(struct v3_tm_state));
1585     if (!tms) {
1586         PrintError(vm, VCORE_NONE, "Problem allocating v3_tm_state\n");
1587         return -1;
1588     }
1589
1590     memset(tms, 0, sizeof(struct v3_tm_state));
1591
1592     if (v3_register_hypercall(vm, TM_KICKBACK_CALL, tm_handle_hcall, NULL) == -1) {
1593       PrintError(vm, VCORE_NONE, "TM could not register hypercall\n");
1594       goto out_err;
1595     }
1596
1597     v3_lock_init(&(tms->lock));
1598
1599     tms->TM_MODE      = TM_OFF;
1600     tms->cores_active = 0;
1601
1602     uint64_t * lt = V3_Malloc(sizeof(uint64_t) * vm->num_cores);
1603     if (!lt) {
1604         PrintError(vm, VCORE_NONE, "Problem allocating last_trans array\n");
1605         goto out_err1;
1606     }
1607
1608     memset(lt, 0, sizeof(uint64_t) * vm->num_cores);
1609
1610     int i;
1611     for (i = 0; i < vm->num_cores; i++) {
1612         lt[i] = 0;
1613     }
1614
1615     tms->last_trans = lt;
1616
1617     *priv_data = tms;
1618     tm_global_state = tms;
1619
1620     tm_prepare_cpuid(vm);
1621
1622     return 0;
1623
1624 out_err1:
1625     v3_lock_deinit(&(tms->lock));
1626     v3_remove_hypercall(vm, TM_KICKBACK_CALL);
1627 out_err:
1628     V3_Free(tms);
1629     return -1;
1630 }
1631
1632
1633 static int
1634 init_trans_mem_core (struct guest_info * core,
1635                      void * priv_data,
1636                      void ** core_data)
1637 {
1638     struct v3_trans_mem * tm = V3_Malloc(sizeof(struct v3_trans_mem));
1639
1640     TM_DBG(core,INIT, "Trans Mem. Core Init\n");
1641
1642     if (!tm) {
1643         TM_ERR(core,INIT, "Problem allocating TM state\n");
1644         return -1;
1645     }
1646
1647     memset(tm, 0, sizeof(struct v3_trans_mem));
1648
1649     INIT_LIST_HEAD(&tm->trans_r_list);
1650     INIT_LIST_HEAD(&tm->trans_w_list);
1651
1652     tm->addr_ctxt  = v3_create_htable(0, tm_hash_fn, tm_eq_fn);
1653     if (!(tm->addr_ctxt)) {
1654         TM_ERR(core,INIT,"problem creating addr_ctxt\n");
1655         goto out_err;
1656     }
1657
1658     tm->access_type = v3_create_htable(0, tm_hash_buf_fn, tm_eq_buf_fn);
1659     if (!(tm->access_type)) {
1660         TM_ERR(core,INIT,"problem creating access_type\n");
1661         goto out_err1;
1662     }
1663
1664     v3_lock_init(&(tm->addr_ctxt_lock));
1665     v3_lock_init(&(tm->access_type_lock));
1666
1667     tm->TM_STATE = TM_NULL;
1668     tm->TM_MODE  = TM_OFF;
1669     tm->TM_ABORT = 0;
1670     tm->ginfo    = core;
1671     tm->t_num = 0;
1672     tm->to_branch = 0;
1673     tm->offset = 0;
1674     tm->access_type_entries = 0;
1675     tm->addr_ctxt_entries = 0;
1676     tm->dirty_instr_flag = 0;
1677
1678     /* TODO: Cache Model */
1679     //tm->box = (struct cache_box *)V3_Malloc(sizeof(struct cache_box *));
1680     //tm->box->init = init_cache;
1681     //tm->box->init(sample_spec, tm->box);
1682
1683     *core_data = tm;
1684
1685     return 0;
1686
1687 out_err1:
1688     v3_free_htable(tm->addr_ctxt, 0, 0);
1689 out_err:
1690     V3_Free(tm);
1691     return -1;
1692 }
1693
1694
1695 static int
1696 deinit_trans_mem (struct v3_vm_info * vm, void * priv_data)
1697 {
1698     struct v3_tm_state * tms = (struct v3_tm_state *)priv_data;
1699
1700     if (v3_remove_hypercall(vm, TM_KICKBACK_CALL) == -1) {
1701         PrintError(vm, VCORE_NONE, "Problem removing TM hypercall\n");
1702         return -1;
1703     }
1704
1705     v3_lock_deinit(&(tms->lock));
1706
1707     if (tms) {
1708         V3_Free(tms);
1709     }
1710
1711     return 0;
1712 }
1713
1714
1715 static int
1716 deinit_trans_mem_core (struct guest_info * core,
1717                        void * priv_data,
1718                        void * core_data)
1719 {
1720     struct v3_trans_mem * tm = (struct v3_trans_mem *)core_data;
1721     struct hashtable_iter * ctxt_iter = NULL;
1722
1723     v3_clear_tm_lists(tm);
1724
1725     if (tm->staging_page) {
1726         TM_ERR(core,DEINIT CORE,"WARNING: staging page not freed!\n");
1727     }
1728
1729     ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1730     if (!ctxt_iter) {
1731         TM_DBG(core,DEINIT_CORE,"could not create htable iterator\n");
1732         return -1;
1733     }
1734
1735     /* delete all context entries for each hashed address */
1736     while (ctxt_iter->entry) {
1737         struct hash_chain * tmp;
1738         struct hash_chain * curr;
1739         struct list_head * chain_list;
1740         addr_t gva;
1741
1742         gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1743         chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1744
1745         /* delete the context */
1746         list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1747             tm_del_stale_ctxt(curr);
1748         }
1749
1750         v3_htable_iter_advance(ctxt_iter);
1751     }
1752
1753     v3_destroy_htable_iter(ctxt_iter);
1754
1755     /* we've already deleted the values in this one */
1756     v3_free_htable(tm->addr_ctxt, 0, 0);
1757
1758     /* KCH WARNING: we may not want to free access type values here */
1759     v3_free_htable(tm->access_type, 1, 0);
1760
1761     v3_lock_deinit(&(tm->addr_ctxt_lock));
1762     v3_lock_deinit(&(tm->access_type_lock));
1763
1764     if (tm) {
1765         V3_Free(tm);
1766     }
1767
1768     return 0;
1769 }
1770
1771
1772 static struct v3_extension_impl trans_mem_impl = {
1773     .name = "trans_mem",
1774     .init = NULL,
1775     .vm_init = init_trans_mem,
1776     .vm_deinit = deinit_trans_mem,
1777     .core_init = init_trans_mem_core,
1778     .core_deinit = deinit_trans_mem_core,
1779     .on_entry = NULL,
1780     .on_exit = NULL
1781 };
1782
1783 register_extension(&trans_mem_impl);
1784
1785
1786 /* entry conditions
1787  * tms->on => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1788  * tms->off => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1789  */
1790 static int
1791 tm_handle_xend (struct guest_info * core,
1792                 struct v3_trans_mem * tm)
1793 {
1794     rdtscll(tm->exit_time);
1795
1796     /* XEND should raise a GPF when RTM mode is not on */
1797     if (tm->TM_MODE != TM_ON) {
1798         TM_ERR(core, UD, "Encountered XEND while not in a transactional region\n");
1799         v3_free_staging_page(tm);
1800         v3_clr_vtlb(core);
1801         v3_clear_tm_lists(tm);
1802         v3_raise_exception(core, GPF_EXCEPTION);
1803         return 0;
1804     }
1805
1806     /* Our transaction finished! */
1807     /* Copy over data from the staging page */
1808     TM_DBG(core, UD,"Copying data from our staging page back into 'real' memory\n");
1809
1810     if (commit_list(core, tm) == -1) {
1811         TM_ERR(core,UD,"error commiting tm list to memory\n");
1812         return -1;
1813     }
1814
1815     TM_DBG(core,UD,"Freeing staging page and internal data structures\n");
1816
1817     // Free the staging page
1818     if (v3_free_staging_page(tm) == -1) {
1819         TM_ERR(core,XEND,"couldnt free staging page\n");
1820         return -1;
1821     }
1822
1823     // clear vtlb, as it may still contain our staging page
1824     if (v3_clr_vtlb(core) == -1) {
1825         TM_ERR(core,XEND,"couldnt clear vtlb\n");
1826         return -1;
1827     }
1828
1829     // Clear the lists
1830     v3_clear_tm_lists(tm);
1831
1832     /* Set the state and advance the RIP */
1833     TM_DBG(core,XEND,"advancing rip to %llx\n", core->rip + XEND_INSTR_LEN);
1834     core->rip += XEND_INSTR_LEN;
1835
1836     v3_clr_tm(tm);
1837
1838     // time to garbage collect
1839     v3_tm_inc_tnum(tm);
1840     if (tm_hash_gc(tm) == -1) {
1841         TM_ERR(core,XEND,"could not gc!\n");
1842         return -1;
1843     }
1844
1845     return 0;
1846 }
1847
1848
1849 /* entry conditions
1850  * tms->on => handle our abort code, handle_trans_abort will clear necessary state
1851  * tms->off => handle our abort code, handle_trans_abort will clear necessary state
1852  */
1853 static int
1854 tm_handle_xabort (struct guest_info * core,
1855                   struct v3_trans_mem * tm,
1856                   uchar_t * instr)
1857 {
1858         uint8_t reason;
1859
1860         // we must reflect the immediate back into EAX 31:24
1861         reason = *(uint8_t*)(instr+2);
1862
1863         /* TODO: this probably needs to move somewhere else */
1864         rdtscll(tm->exit_time);
1865
1866         // Error checking! make sure that we have gotten here in a legitimate manner
1867         if (tm->TM_MODE != TM_ON) {
1868             TM_DBG(core, UD, "We got here while not in a transactional core!\n");
1869             v3_raise_exception(core, UD_EXCEPTION);
1870         }
1871
1872         TM_DBG(core,UD,"aborting\n");
1873
1874         if (tm->TM_STATE != TM_NULL) {
1875             v3_restore_dirty_instr(core);
1876         }
1877
1878         // Handle the exit
1879         v3_handle_trans_abort(core, TM_ABORT_XABORT, reason);
1880
1881         return 0;
1882 }
1883
1884
1885 /* entry conditions
1886  * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1887  * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1888  */
1889 static int
1890 tm_handle_xbegin (struct guest_info * core,
1891                   struct v3_trans_mem * tm,
1892                   uchar_t * instr)
1893 {
1894     sint32_t rel_addr = 0;
1895     uint8_t out_of_bounds = 0;
1896     uint8_t in_compat_no_long = 0;
1897
1898     if (tm->TM_MODE == TM_ON) {
1899         /* TODO: this is actually an indication of nesting, we'll fix this later */
1900         TM_ERR(core,UD,"We don't support nested transactions yet!\n");
1901         v3_raise_exception(core, UD_EXCEPTION);
1902         return -1;
1903     }
1904
1905     // Save the fail_call address (first 2 bytes = opcode, last 4 = fail call addr)
1906     rel_addr = *(sint32_t*)(instr+2);
1907
1908     /* raise a GPF if we're trying to set a fail call outside of code segment */
1909     in_compat_no_long = (core->cpu_mode == LONG_32_COMPAT) || ((struct efer_64*)&(core->ctrl_regs.efer))->lma == 0;
1910     out_of_bounds     = (core->rip + rel_addr > core->segments.cs.base + core->segments.cs.limit ||
1911                          core->rip + rel_addr < core->segments.cs.base);
1912
1913     if (in_compat_no_long && out_of_bounds) {
1914         v3_raise_exception(core, GPF_EXCEPTION);
1915         return 0;
1916     }
1917
1918     /* TODO: also raise GPF if we're in long mode and failcall isn't canonical */
1919
1920     /* TODO: put this elsewhere */
1921     rdtscll(tm->entry_time);
1922     tm->entry_exits = core->num_exits;
1923
1924     /* set the tm_mode for this core */
1925     v3_set_tm(tm);
1926
1927     TM_DBG(core,UD,"Set the system in TM Mode, save fallback address");
1928
1929
1930     tm->fail_call = core->rip + XBEGIN_INSTR_LEN + rel_addr;
1931
1932     TM_DBG(core,UD,"we set fail_call to %llx, rip is %llx, rel_addr is %x", (uint64_t)tm->fail_call,(uint64_t)core->rip,rel_addr);
1933
1934     /* flush the shadow page tables */
1935     TM_DBG(core,UD,"Throwing out the shadow table");
1936     v3_clr_vtlb(core);
1937
1938     // Increase RIP, ready to go to next instruction
1939     core->rip += XBEGIN_INSTR_LEN;
1940
1941     return 0;
1942 }
1943
1944
1945 /* entry conditions
1946  * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1947  * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1948  */
1949 static int
1950 tm_handle_xtest (struct guest_info * core,
1951                  struct v3_trans_mem * tm)
1952 {
1953     struct rflags * rf = (struct rflags*)&(core->ctrl_regs.rflags);
1954
1955     // if we are in tm mode, set zf to 0, otherwise 1
1956     if (tm->TM_MODE == TM_ON) {
1957         rf->zf = 0;
1958     } else {
1959         rf->zf = 1;
1960     }
1961
1962     rf->cf = 0;
1963     rf->of = 0;
1964     rf->sf = 0;
1965     rf->pf = 0;
1966     rf->af = 0;
1967
1968     core->rip += XTEST_INSTR_LEN;
1969
1970     return 0;
1971 }
1972
1973
1974 /* instructions:
1975  * XBEGIN c7 f8 rel32
1976  * XABORT c6 f8 imm8
1977  * XEND   0f 01 d5
1978  */
1979 static int
1980 tm_handle_ud (struct guest_info * core)
1981 {
1982     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
1983     uchar_t instr[INSTR_BUF_SZ];
1984     uint8_t byte1, byte2, byte3;
1985
1986     tm_read_instr(core, (addr_t)core->rip, instr, INSTR_BUF_SZ);
1987
1988     byte1 = *(uint8_t *)((addr_t)instr);
1989     byte2 = *(uint8_t *)((addr_t)instr + 1);
1990     byte3 = *(uint8_t *)((addr_t)instr + 2);
1991
1992
1993     if (byte1 == 0xc7 && byte2 == 0xf8) {  /* third byte is an immediate */
1994
1995         TM_DBG(core,UD,"Encountered Haswell-specific XBEGIN %x %x %d at %llx", byte1, byte2, byte3, (uint64_t)core->rip);
1996
1997         if (tm_handle_xbegin(core, tm, instr) == -1) {
1998             TM_ERR(core, UD, "Problem handling XBEGIN\n");
1999             return -1;
2000         }
2001
2002     } else if (byte1 == 0xc6 && byte2 == 0xf8) { /* third byte is an immediate */
2003
2004         TM_DBG(core, UD, "Encountered Haswell-specific XABORT %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
2005
2006         if (tm_handle_xabort(core, tm, instr) == -1) {
2007             TM_ERR(core, UD, "Problem handling XABORT\n");
2008             return -1;
2009         }
2010
2011     } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd5) {
2012
2013         TM_DBG(core, UD, "Encountered Haswell-specific XEND %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
2014
2015         if (tm_handle_xend(core, tm) == -1) {
2016             TM_ERR(core, UD, "Problem handling XEND\n");
2017             return -1;
2018         }
2019
2020
2021     } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd6) {  /* third byte is an immediate */
2022
2023         TM_DBG(core,UD,"Encountered Haswell-specific XTEST %x %x %x at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
2024
2025         if (tm_handle_xtest(core, tm) == -1) {
2026             TM_ERR(core, UD, "Problem handling XTEST\n");
2027             return -1;
2028         }
2029
2030     } else {
2031
2032         /* oh no, this is still unknown, pass the error back to the guest! */
2033         TM_DBG(core,UD,"Encountered:%x %x %x\n", byte1, byte2, byte3);
2034         v3_raise_exception(core, UD_EXCEPTION);
2035     }
2036
2037     return 0;
2038 }
2039
2040
2041 int
2042 v3_tm_handle_exception (struct guest_info * info,
2043                         addr_t exit_code)
2044 {
2045     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2046
2047     if (!tm) {
2048         TM_ERR(info,ERR,"TM extension state not found\n");
2049         return -1;
2050     }
2051
2052     switch (exit_code) {
2053         /* any of these exceptions should abort current transactions */
2054         case SVM_EXIT_EXCP6:
2055             if (tm_handle_ud(info) == -1) {
2056                 return -1;
2057             }
2058             break;
2059         case SVM_EXIT_EXCP0:
2060             if (tm->TM_MODE != TM_ON) {
2061                 v3_raise_exception(info, DE_EXCEPTION);
2062             }
2063             else {
2064                 TM_DBG(info,EXCP,"aborting due to DE exception\n");
2065                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2066             }
2067             break;
2068         case SVM_EXIT_EXCP1:
2069             if (tm->TM_MODE != TM_ON) {
2070                 v3_raise_exception(info, DB_EXCEPTION);
2071             }
2072             else {
2073                 TM_DBG(info,EXCP,"aborting due to DB exception\n");
2074                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2075             }
2076             break;
2077         case SVM_EXIT_EXCP3:
2078             if (tm->TM_MODE != TM_ON) {
2079                 v3_raise_exception(info, BP_EXCEPTION);
2080             }
2081             else {
2082                 TM_DBG(info,EXCP,"aborting due to BP exception\n");
2083                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2084             }
2085             break;
2086         case SVM_EXIT_EXCP4:
2087             if (tm->TM_MODE != TM_ON) {
2088                 v3_raise_exception(info, OF_EXCEPTION);
2089             }
2090             else {
2091                 TM_DBG(info,EXCP,"aborting due to OF exception\n");
2092                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2093             }
2094             break;
2095         case SVM_EXIT_EXCP5:
2096             if (tm->TM_MODE != TM_ON) {
2097                 v3_raise_exception(info, BR_EXCEPTION);
2098             }
2099             else {
2100                 TM_DBG(info,EXCP,"aborting due to BR exception\n");
2101                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2102             }
2103             break;
2104         case SVM_EXIT_EXCP7:
2105             if (tm->TM_MODE != TM_ON) {
2106                 v3_raise_exception(info, NM_EXCEPTION);
2107             }
2108             else {
2109                 TM_DBG(info,EXCP,"aborting due to NM exception\n");
2110                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2111             }
2112             break;
2113         case SVM_EXIT_EXCP10:
2114             if (tm->TM_MODE != TM_ON) {
2115                 v3_raise_exception(info, TS_EXCEPTION);
2116             }
2117             else {
2118                 TM_DBG(info,EXCP,"aborting due to TS exception\n");
2119                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2120             }
2121             break;
2122         case SVM_EXIT_EXCP11:
2123             if (tm->TM_MODE != TM_ON) {
2124                 v3_raise_exception(info, NP_EXCEPTION);
2125             }
2126             else {
2127                 TM_DBG(info,EXCP,"aborting due to NP exception\n");
2128                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2129             }
2130             break;
2131         case SVM_EXIT_EXCP12:
2132             if (tm->TM_MODE != TM_ON) {
2133                 v3_raise_exception(info, SS_EXCEPTION);
2134             }
2135             else {
2136                 TM_DBG(info,EXCP,"aborting due to SS exception\n");
2137                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2138             }
2139             break;
2140         case SVM_EXIT_EXCP13:
2141             if (tm->TM_MODE != TM_ON) {
2142                 v3_raise_exception(info, GPF_EXCEPTION);
2143             }
2144             else {
2145                 TM_DBG(info,EXCP,"aborting due to GPF exception\n");
2146                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2147             }
2148             break;
2149         case SVM_EXIT_EXCP16:
2150             if (tm->TM_MODE != TM_ON) {
2151                 v3_raise_exception(info, MF_EXCEPTION);
2152             }
2153             else {
2154                 TM_DBG(info,EXCP,"aborting due to MF exception\n");
2155                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2156             }
2157             break;
2158         case SVM_EXIT_EXCP17:
2159             if (tm->TM_MODE != TM_ON) {
2160                 v3_raise_exception(info, AC_EXCEPTION);
2161             }
2162             else {
2163                 TM_DBG(info,EXCP,"aborting due to AC exception\n");
2164                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2165             }
2166             break;
2167         case SVM_EXIT_EXCP19:
2168             if (tm->TM_MODE != TM_ON) {
2169                 v3_raise_exception(info, XF_EXCEPTION);
2170             }
2171             else {
2172                 TM_DBG(info,EXCP,"aborting due to XF exception\n");
2173                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2174             }
2175             break;
2176
2177             TM_DBG(info,EXCP,"exception # %d\n", (int)exit_code - 0x40);
2178     }
2179
2180     return 0;
2181 }
2182
2183
2184 void
2185 v3_tm_set_excp_intercepts (vmcb_ctrl_t * ctrl_area)
2186 {
2187     ctrl_area->exceptions.de = 1; // 0  : divide by zero
2188     ctrl_area->exceptions.db = 1; // 1  : debug
2189     ctrl_area->exceptions.bp = 1; // 3  : breakpoint
2190     ctrl_area->exceptions.of = 1; // 4  : overflow
2191     ctrl_area->exceptions.br = 1; // 5  : bound range
2192     ctrl_area->exceptions.ud = 1; // 6  : undefined opcode
2193     ctrl_area->exceptions.nm = 1; // 7  : device not available
2194     ctrl_area->exceptions.ts = 1; // 10 : invalid tss
2195     ctrl_area->exceptions.np = 1; // 11 : segment not present
2196     ctrl_area->exceptions.ss = 1; // 12 : stack
2197     ctrl_area->exceptions.gp = 1; // 13 : general protection
2198     ctrl_area->exceptions.mf = 1; // 16 : x87 exception pending
2199     ctrl_area->exceptions.ac = 1; // 17 : alignment check
2200     ctrl_area->exceptions.xf = 1; // 19 : simd floating point
2201 }
2202
2203
2204 extern void v3_stgi();
2205 extern void v3_clgi();
2206
2207 /* 441-tm: if we are in TM mode, we need to check for any interrupts here,
2208  * and if there are any, need to do some aborting! Make sure not to die here
2209  * if we are already 'aborting', this results in infiloop
2210  */
2211 void
2212 v3_tm_check_intr_state (struct guest_info * info,
2213                         vmcb_ctrl_t * guest_ctrl,
2214                         vmcb_saved_state_t * guest_state)
2215
2216 {
2217     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2218
2219     if (!tm) {
2220         TM_ERR(info,INTR,"TM extension state not found\n");
2221         return;
2222     }
2223
2224     if ((tm->TM_MODE == TM_ON) &&
2225         (tm->TM_ABORT != 1)) {
2226
2227         if (guest_ctrl->guest_ctrl.V_IRQ ||
2228             guest_ctrl->EVENTINJ.valid) {
2229
2230             // We do indeed have pending interrupts
2231             v3_stgi();
2232
2233             TM_DBG(info,INTR,"we have a pending interrupt\n");
2234
2235             v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2236
2237             // Copy new RIP state into arch dependent structure
2238             guest_state->rip = info->rip;
2239
2240             //TM_DBG(info,INTR,"currently guest state rip is %llx\n",(uint64_t)guest_state->rip);
2241             v3_clgi();
2242         }
2243
2244     }
2245
2246 }
2247
2248
2249 int
2250 v3_tm_handle_pf_64 (struct guest_info * info,
2251                     pf_error_t error_code,
2252                     addr_t fault_addr,
2253                     addr_t * page_to_use)
2254 {
2255     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2256     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2257
2258     if (!tm) {
2259         TM_ERR(info,HANDLE_PF, "couldn't get tm core state\n");
2260         return -1;
2261     }
2262
2263     if (!tms) {
2264         TM_ERR(info,HANDLE_PF, "couldn't get tm global state\n");
2265         return -1;
2266     }
2267
2268     if ((tms->TM_MODE == TM_ON) &&
2269             (error_code.user == 1)) {
2270
2271         TM_DBG(info,PF,"Core reporting in, got a #PF (tms->mode is %d)\n", tms->TM_MODE);
2272
2273         *page_to_use = v3_handle_trans_mem_fault(info, fault_addr,  error_code);
2274
2275         if (*page_to_use == ERR_TRANS_FAULT_FAIL){
2276             TM_ERR(info,HANDLE_PF, "could not handle transaction page fault\n");
2277             return -1;
2278         }
2279
2280         if ((tm->TM_MODE == TM_ON) &&
2281                 (tm->staging_page == NULL)) {
2282
2283             tm->staging_page = V3_AllocPages(1);
2284
2285             if (!(tm->staging_page)) {
2286                 TM_ERR(info,MMU,"Problem allocating staging page\n");
2287                 return -1;
2288             }
2289
2290             TM_DBG(info,MMU,"Created staging page at %p\n", (void *)tm->staging_page);
2291         }
2292     }
2293
2294     return 0;
2295 }
2296
2297
2298 void
2299 v3_tm_handle_usr_tlb_miss (struct guest_info * info,
2300                            pf_error_t error_code,
2301                            addr_t page_to_use,
2302                            addr_t * shadow_pa)
2303 {
2304     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2305
2306     /* TLB miss from user */
2307     if ((tm->TM_MODE == TM_ON) &&
2308             (error_code.user == 1)) {
2309
2310         if (page_to_use > TRANS_FAULT_OK) {
2311             TM_DBG(info,MMU, "Using alternate page at: %llx\n", (uint64_t)page_to_use);
2312             *shadow_pa = page_to_use;
2313         }
2314
2315     }
2316
2317 }
2318
2319
2320 void
2321 v3_tm_handle_read_fault (struct guest_info * info,
2322                          pf_error_t error_code,
2323                          pte64_t * shadow_pte)
2324 {
2325     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2326     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2327
2328     // If we are about to read, make it read only
2329     if ((tms->TM_MODE == TM_ON) &&
2330         (tm->TM_STATE == TM_EXEC) &&
2331         (error_code.write == 0) &&
2332         (error_code.user == 1)) {
2333
2334         TM_DBG(info,MMU, "Flagging the page read only\n");
2335         shadow_pte->writable = 0;
2336     }
2337 }
2338
2339
2340 int
2341 v3_tm_decode_rtm_instrs (struct guest_info * info,
2342                          addr_t instr_ptr,
2343                          struct x86_instr * instr)
2344 {
2345     uint8_t byte1, byte2, byte3;
2346     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2347
2348     if (tm->TM_MODE == TM_ON) {
2349
2350         byte1 = *(uint8_t *)(instr_ptr);
2351         byte2 = *(uint8_t *)(instr_ptr + 1);
2352         byte3 = *(uint8_t *)(instr_ptr + 2);
2353
2354         if (byte1 == 0xc7 &&
2355             byte2 == 0xf8) {  /* third byte is an immediate */
2356
2357             TM_DBG(info, DECODE,"Decoding XBEGIN %x %x %d\n", byte1, byte2, byte3);
2358             instr->instr_length = 6;
2359             return 0;
2360
2361         } else if (byte1 == 0xc6 &&
2362                    byte2 == 0xf8) { /* third byte is an immediate */
2363
2364             TM_DBG(info, DECODE, "Decoding XABORT %x %x %d\n", byte1, byte2, byte3);
2365             instr->instr_length = 3;
2366             return 0;
2367
2368         } else if (byte1 == 0x0f &&
2369                    byte2 == 0x01 &&
2370                    byte3 == 0xd5) {
2371
2372             TM_DBG(info, DECODE, "Decoding XEND %x %x %x\n", byte1, byte2, byte3);
2373             instr->instr_length = 3;
2374             return 0;
2375
2376         }
2377
2378     }
2379
2380     return 0;
2381 }
2382
2383