palacios/src/extensions/ext_trans_mem.c

   1 /*
   2  * This file is part of the Palacios Virtual Machine Monitor developed
   3  * by the V3VEE Project with funding from the United States National
   4  * Science Foundation and the Department of Energy.
   5  *
   6  * The V3VEE Project is a joint project between Northwestern University
   7  * and the University of New Mexico.  You can find out more at
   8  * http://www.v3vee.org
   9  *
  10  * Copyright (c) 2012, NWU EECS 441 Transactional Memory Team
  11  * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org>
  12  * All rights reserved.
  13  *
  14  * Author:  Maciek Swiech <dotpyfe@u.northwestern.edu>
  15  *          Kyle C. Hale <kh@u.northwestern.edu>
  16  *          Marcel Flores <marcel-flores@u.northwestern.edu>
  17  *          Zachary Bischof <zbischof@u.northwestern.edu>
  18  *
  19  *
  20  * This is free software.  You are permitted to use,
  21  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  22  */
  23
  24 #include <palacios/vmm_mem.h>
  25 #include <palacios/vmm.h>
  26 #include <palacios/vmcb.h>
  27 #include <palacios/vmm_decoder.h>
  28 #include <palacios/vm_guest_mem.h>
  29 #include <palacios/vmm_ctrl_regs.h>
  30 #include <palacios/vmm_paging.h>
  31 #include <palacios/vmm_direct_paging.h>
  32 #include <palacios/svm.h>
  33 #include <palacios/svm_handler.h>
  34 #include <palacios/vmm_excp.h>
  35 #include <palacios/vmm_extensions.h>
  36 #include <palacios/vmm_sprintf.h>
  37 #include <palacios/vmm_hashtable.h>
  38
  39 #include <extensions/trans_mem.h>
  40 #include <extensions/tm_util.h>
  41
  42 #if !V3_CONFIG_DEBUG_TM_FUNC
  43 #undef PrintDebug
  44 #define PrintDebug(fmt, args...)
  45 #endif
  46
  47
  48 /* this includes a mov to rax */
  49 static const char * vmmcall_bytes = "\x48\xc7\xc0\x37\x13\x00\x00\x0f\x01\xd9";
  50 static struct v3_tm_state * tm_global_state = NULL;
  51
  52
  53 static void
  54 tm_translate_rip (struct guest_info * core, addr_t * target)
  55 {
  56
  57     if (core->mem_mode == PHYSICAL_MEM) {
  58         v3_gpa_to_hva(core,
  59                 get_addr_linear(core, core->rip, &(core->segments.cs)),
  60                 target);
  61     } else if (core->mem_mode == VIRTUAL_MEM) {
  62         v3_gva_to_hva(core,
  63                 get_addr_linear(core, core->rip, &(core->segments.cs)),
  64                 target);
  65     }
  66
  67 }
  68
  69
  70 static void
  71 tm_read_instr (struct guest_info * core,
  72                            addr_t addr,
  73                            uchar_t * dst,
  74                            uint_t size)
  75 {
  76
  77     if (core->mem_mode == PHYSICAL_MEM) {
  78         v3_read_gpa_memory(core,
  79                 get_addr_linear(core, addr , &(core->segments.cs)),
  80                 size,
  81                 dst);
  82
  83     } else {
  84        v3_read_gva_memory(core,
  85                 get_addr_linear(core, addr, &(core->segments.cs)),
  86                 size,
  87                 dst);
  88     }
  89
  90 }
  91
  92
  93 static int
  94 tm_handle_decode_fail (struct guest_info * core)
  95 {
  96     addr_t cur_rip;
  97     uint_t core_num;
  98
  99     tm_translate_rip(core, &cur_rip);
 100
 101 #ifdef V3_CONFIG_DEBUG_TM_FUNC
 102     v3_dump_mem((uint8_t *)cur_rip, INSTR_BUF_SZ);
 103 #endif
 104
 105     /* If we can't decode an instruction, we treat it as a catastrophic event, aborting *everyone* */
 106     for (core_num = 0; core_num < core->vm_info->num_cores; core_num++ ) {
 107         struct v3_trans_mem * remote_tm;
 108
 109         /* skip local core */
 110         if (core_num == core->vcpu_id) {
 111             continue;
 112         }
 113
 114         remote_tm = v3_get_ext_core_state(&(core->vm_info->cores[core_num]), "trans_mem");
 115         if (!remote_tm) {
 116             TM_ERR(core,DECODE,"couldnt get remote_tm\n");
 117             return -1;
 118         }
 119
 120         /* skip cores who aren't in transacitonal context */
 121         if (remote_tm->TM_MODE == TM_OFF) {
 122             continue;
 123         }
 124
 125         TM_DBG(core,DECODE,"setting abort for core %d due to decoding error\n", core_num);
 126         remote_tm->TM_ABORT = 1;
 127     }
 128
 129     return 0;
 130 }
 131
 132
 133 /* special casing for control-flow instructions
 134  * returns 1 if we need to jump
 135  * returns -1 on error
 136  */
 137 static int
 138 tm_handle_ctrl_flow (struct guest_info * core,
 139                                  struct v3_trans_mem * tm,
 140                                  addr_t * instr_location,
 141                                  struct x86_instr * struct_instr)
 142
 143 {
 144     /* special casing for control flow instructions */
 145     struct rflags * flags = (struct rflags *)&(core->ctrl_regs.rflags);
 146     addr_t offset;
 147     int to_jmp = 0;
 148
 149     switch (struct_instr->op_type) {
 150
 151         case V3_OP_JLE:
 152             TM_DBG(core,DECODE, "!!++ JLE\n");
 153             to_jmp = (flags->zf || flags->sf != flags->of);
 154             offset = struct_instr->dst_operand.operand;
 155
 156             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 157             tm->offset = offset;
 158             tm->to_branch = to_jmp;
 159             break;
 160         case V3_OP_JAE:
 161             TM_DBG(core,DECODE,"!!++ JAE\n");
 162             to_jmp = (flags->cf == 0);
 163             offset = struct_instr->dst_operand.operand;
 164
 165             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 166             tm->offset = offset;
 167             tm->to_branch = to_jmp;
 168             break;
 169         case V3_OP_JMP:
 170             TM_DBG(core,DECODE,"!!++ JMP\n");
 171             to_jmp = 1;
 172             offset = struct_instr->dst_operand.operand;
 173
 174             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 175             tm->offset = offset;
 176             tm->to_branch = to_jmp;
 177             break;
 178         case V3_OP_JNZ:
 179             TM_DBG(core,DECODE,"!!++ JNZ\n");
 180             to_jmp = (flags->zf == 0);
 181             offset = struct_instr->dst_operand.operand;
 182
 183             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 184             tm->offset = offset;
 185             tm->to_branch = to_jmp;
 186             break;
 187         case V3_OP_JL:
 188             TM_DBG(core,DECODE,"!!++ JL\n");
 189             to_jmp = (flags->sf != flags->of);
 190             offset = struct_instr->dst_operand.operand;
 191
 192             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 193             tm->offset = offset;
 194             tm->to_branch = to_jmp;
 195             break;
 196         case V3_OP_JNS:
 197             TM_DBG(core,DECODE,"!!++ JNS\n");
 198             to_jmp = (flags->sf == 0);
 199             offset = struct_instr->dst_operand.operand;
 200
 201             *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
 202             tm->offset = offset;
 203             tm->to_branch = to_jmp;
 204             break;
 205         default:
 206             *instr_location = core->rip + tm->cur_instr_len;
 207             break;
 208     }
 209     return to_jmp;
 210 }
 211
 212
 213 /* entry points :
 214  *
 215  * called inside #UD and VMMCALL handlers
 216  * only affects global state in case of quix86 fall over
 217  *  -> set other cores TM_ABORT to 1, return -2
 218  */
 219 static int
 220 v3_store_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
 221 {
 222     struct x86_instr struct_instr;
 223     uchar_t cur_instr[INSTR_BUF_SZ];
 224     addr_t  instr_location;
 225
 226     // Fetch the current instruction
 227     tm_read_instr(core, core->rip, cur_instr, INSTR_BUF_SZ);
 228
 229     TM_DBG(core,STORE,"storing next instruction, current rip: %llx\n", (uint64_t)core->rip);
 230
 231     /* Attempt to decode current instruction to determine its length */
 232     if (v3_decode(core, (addr_t)cur_instr, &struct_instr) == ERR_DECODE_FAIL) {
 233
 234         TM_ERR(core,Error,"Could not decode currrent instruction (at %llx)\n", (uint64_t)core->rip);
 235
 236         /* this will attempt to abort all the remote cores */
 237         if (tm_handle_decode_fail(core) == -1) {
 238             TM_ERR(core,Error,"Could not handle failed decode\n");
 239             return ERR_STORE_FAIL;
 240         }
 241
 242         /* we need to trigger a local abort */
 243         return ERR_STORE_MUST_ABORT;
 244     }
 245
 246
 247     /* we can't currently handle REP prefixes, abort */
 248     if (struct_instr.op_type != V3_INVALID_OP &&
 249             (struct_instr.prefixes.repne ||
 250              struct_instr.prefixes.repnz ||
 251              struct_instr.prefixes.rep   ||
 252              struct_instr.prefixes.repe  ||
 253              struct_instr.prefixes.repz)) {
 254
 255         TM_ERR(core,DECODE,"Encountered REP prefix, aborting\n");
 256         return ERR_STORE_MUST_ABORT;
 257     }
 258
 259     tm->cur_instr_len = struct_instr.instr_length;
 260
 261     /* handle jump instructions */
 262     tm_handle_ctrl_flow(core, tm, &instr_location, &struct_instr);
 263
 264     /* save next 10 bytes after current instruction, we'll put vmmcall here */
 265     tm_read_instr(core, instr_location, cur_instr, INSTR_INJECT_LEN);
 266
 267     /* store the next instruction and its length in info */
 268     memcpy(tm->dirty_instr, cur_instr, INSTR_INJECT_LEN);
 269
 270     return 0;
 271 }
 272
 273
 274 static int
 275 v3_overwrite_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
 276 {
 277     addr_t ptr;
 278
 279     // save rax
 280     tm->clobbered_rax = (core->vm_regs).rax;
 281
 282     ptr = core->rip;
 283
 284     /* we can't currently handle instructions that span page boundaries */
 285     if ((ptr + tm->cur_instr_len) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
 286         TM_ERR(core,OVERWRITE,"emulated instr straddling page boundary\n");
 287         return -1;
 288     }
 289
 290     ptr = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
 291
 292     if ((ptr + INSTR_INJECT_LEN) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
 293         TM_ERR(core,OVERWRITE,"injected instr straddling page boundary\n");
 294         return -1;
 295     }
 296
 297     if (v3_gva_to_hva(core,
 298                 get_addr_linear(core, ptr, &(core->segments.cs)),
 299                 &ptr) == -1) {
 300
 301         TM_ERR(core,Error,"Calculating next rip hva failed\n");
 302         return -1;
 303     }
 304
 305     TM_DBG(core,REPLACE,"Replacing next instruction at addr %llx with vmm hyper call, len=%d\n",
 306             core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0), (int)tm->cur_instr_len );
 307
 308     /* Copy VMM call into the memory address of beginning of next instruction (ptr) */
 309     memcpy((char*)ptr, vmmcall_bytes, INSTR_INJECT_LEN);
 310
 311     /* KCH: flag that we've dirtied an instruction, and store its host address */
 312     tm->dirty_instr_flag = 1;
 313     tm->dirty_gva        = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
 314     tm->dirty_hva        = ptr;
 315     tm->to_branch        = 0;
 316
 317     return 0;
 318 }
 319
 320
 321 /* entry points:
 322  *
 323  * this should only be called if TM_STATE == TM_NULL, additionally we check if our dirtied flag if set
 324  */
 325 int
 326 v3_restore_dirty_instr (struct guest_info * core)
 327 {
 328     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 329
 330     /* Restore next instruction, transition to IFETCH state */
 331     TM_DBG(core,RESTORE,"Restoring next instruction.\n");
 332
 333     /* check if we've actually done an instruction overwrite */
 334     if (!(tm->dirty_instr_flag)) {
 335         TM_DBG(core,RESTORE,"nothing to restore here...\n");
 336         return 0;
 337     }
 338
 339     // Actually restore instruction
 340     memcpy((char*)tm->dirty_hva, tm->dirty_instr, INSTR_INJECT_LEN);
 341
 342     // Put rax back
 343     (core->vm_regs).rax = tm->clobbered_rax;
 344
 345     // Scoot rip back up
 346     TM_DBG(core,RESTORE,"RIP in vmmcall: %llx\n", core->rip);
 347     core->rip = tm->dirty_gva;
 348
 349     // clean up
 350     tm->dirty_instr_flag = 0;
 351     tm->dirty_gva = 0;
 352     tm->dirty_hva = 0;
 353     memset(tm->dirty_instr, 0, 15);
 354
 355     TM_DBG(core,RESTORE,"RIP after scooting it back up: %llx\n", core->rip);
 356
 357     return 0;
 358 }
 359
 360
 361 static addr_t
 362 tm_handle_fault_ifetch (struct guest_info * core,
 363                         struct v3_trans_mem * tm)
 364 {
 365     int sto;
 366
 367     TM_DBG(core,IFETCH,"Page fault caused by IFETCH: rip is the same as faulting address, we must be at an ifetch.\n");
 368
 369     sto = v3_store_next_instr(core, tm);
 370
 371     if (sto == ERR_STORE_FAIL) {
 372         TM_ERR(core,EXIT,"Could not store next instruction in transaction\n");
 373         return ERR_TRANS_FAULT_FAIL;
 374     } else if (sto == ERR_STORE_MUST_ABORT) {
 375         TM_DBG(core,EXIT,"aborting for some reason\n");
 376         v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
 377         return TRANS_FAULT_OK;
 378     }
 379
 380     if (v3_overwrite_next_instr(core, tm) == -1) {
 381         TM_ERR(core,PF,"problem overwriting instruction\n");
 382         return ERR_TRANS_FAULT_FAIL;
 383     }
 384
 385     tm->TM_STATE = TM_EXEC;
 386
 387     return TRANS_FAULT_OK;
 388 }
 389
 390
 391 static addr_t
 392 tm_handle_fault_read (struct guest_info * core,
 393                       struct v3_trans_mem * tm,
 394                       addr_t fault_addr,
 395                       pf_error_t error)
 396
 397 {
 398     // This page fault was caused by a read to memory in the current instruction for a core in TM mode
 399     TM_DBG(core,DATA,"Page fault caused by read.\n");
 400     TM_DBG(core,PF,"Adding %p to read list and hash\n", (void*)fault_addr);
 401
 402     if (add_mem_op_to_list(&(tm->trans_r_list), fault_addr) == -1) {
 403         TM_ERR(core,PF,"problem adding to list\n");
 404         return ERR_TRANS_FAULT_FAIL;
 405     }
 406
 407     if (tm_record_access(tm, error.write, fault_addr) == -1) {
 408         TM_ERR(core,PF,"problem recording access\n");
 409         return ERR_TRANS_FAULT_FAIL;
 410     }
 411
 412     /* if we have previously written to this address, we need to update our
 413      * staging page and map it in */
 414     if (list_contains_guest_addr(&(tm->trans_w_list), fault_addr)) {
 415
 416         TM_DBG(core,PF,"Saw a read from something in the write list\n");
 417
 418         /* write the value from linked list to staging page */
 419         if (stage_entry(tm, &(tm->trans_w_list), fault_addr) == -1) {
 420             TM_ERR(core,PF, "could not stage entry!\n");
 421             return ERR_TRANS_FAULT_FAIL;
 422         }
 423
 424         /* Hand it the staging page */
 425         return (addr_t)(tm->staging_page);
 426
 427     } else {
 428
 429         //Add it to the read set
 430         addr_t shadow_addr = 0;
 431
 432         TM_DBG(core,PF,"Saw a read from a fresh address\n");
 433
 434         if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
 435             TM_ERR(core,PF,"Could not translate gva to hva for transaction read\n");
 436             return ERR_TRANS_FAULT_FAIL;
 437         }
 438
 439     }
 440
 441     return TRANS_FAULT_OK;
 442 }
 443
 444
 445 static addr_t
 446 tm_handle_fault_write (struct guest_info * core,
 447                        struct v3_trans_mem * tm,
 448                        addr_t fault_addr,
 449                        pf_error_t error)
 450 {
 451         void * data_loc;
 452         addr_t virt_data_loc;
 453         addr_t shadow_addr = 0;
 454
 455         TM_DBG(core,DATA,"Page fault cause by write\n");
 456         TM_DBG(core,PF,"Adding %p to write list and hash\n", (void*)fault_addr);
 457
 458         if (add_mem_op_to_list(&(tm->trans_w_list), fault_addr) == -1) {
 459             TM_ERR(core,WRITE,"could not add to list!\n");
 460             return ERR_TRANS_FAULT_FAIL;
 461         }
 462
 463         if (tm_record_access(tm, error.write, fault_addr) == -1) {
 464             TM_ERR(core,WRITE,"could not record access!\n");
 465             return ERR_TRANS_FAULT_FAIL;
 466         }
 467
 468         if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
 469             TM_ERR(core,WRITE,"could not translate gva to hva for transaction read\n");
 470             return ERR_TRANS_FAULT_FAIL;
 471         }
 472
 473         // Copy existing values to the staging page, populating that field
 474         // This avoids errors in optimized code such as ++, where the original
 475         // value is not read, but simply incremented
 476         data_loc = (void*)((addr_t)(tm->staging_page) + (shadow_addr % PAGE_SIZE_4KB));
 477
 478         if (v3_hpa_to_hva((addr_t)(data_loc), &virt_data_loc) == -1) {
 479             TM_ERR(core,WRITE,"Could not convert address on staging page to virt addr\n");
 480             return ERR_TRANS_FAULT_FAIL;
 481         }
 482
 483         TM_DBG(core,WRITE,"\tValue being copied (core %d): %p\n", core->vcpu_id, *((void**)(virt_data_loc)));
 484         //memcpy((void*)virt_data_loc, (void*)shadow_addr, sizeof(uint64_t));
 485         *(uint64_t*)virt_data_loc = *(uint64_t*)shadow_addr;
 486
 487         return (addr_t)(tm->staging_page);
 488 }
 489
 490
 491 static addr_t
 492 tm_handle_fault_extern_ifetch (struct guest_info * core,
 493                                struct v3_trans_mem * tm,
 494                                addr_t fault_addr,
 495                                pf_error_t error)
 496 {
 497     int sto;
 498
 499     // system is in tm state, record the access
 500     TM_DBG(core,IFETCH,"Page fault caused by IFETCH: we are not in TM, recording.\n");
 501
 502     sto = v3_store_next_instr(core,tm);
 503
 504     if (sto == ERR_STORE_FAIL) {
 505         TM_ERR(core,Error,"Could not store next instruction in transaction\n");
 506         return ERR_TRANS_FAULT_FAIL;
 507
 508     } else if (sto == ERR_STORE_MUST_ABORT) {
 509         TM_ERR(core,IFETCH,"decode failed, going out of single stepping\n");
 510         v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
 511         return TRANS_FAULT_OK;
 512     }
 513
 514     if (v3_overwrite_next_instr(core, tm) == -1) {
 515         TM_ERR(core,IFETCH,"could not overwrite next instr!\n");
 516         return ERR_TRANS_FAULT_FAIL;
 517     }
 518
 519     tm->TM_STATE = TM_EXEC;
 520
 521     if (tm_record_access(tm, error.write, fault_addr) == -1) {
 522         TM_ERR(core,IFETCH,"could not record access!\n");
 523         return ERR_TRANS_FAULT_FAIL;
 524     }
 525
 526     return TRANS_FAULT_OK;
 527 }
 528
 529
 530 static addr_t
 531 tm_handle_fault_extern_access (struct guest_info * core,
 532                                struct v3_trans_mem * tm,
 533                                addr_t fault_addr,
 534                                pf_error_t error)
 535 {
 536     TM_DBG(core,PF_HANDLE,"recording access\n");
 537     if (tm_record_access(tm, error.write, fault_addr) == -1) {
 538         TM_ERR(core,PF_HANDLE,"could not record access!\n");
 539         return ERR_TRANS_FAULT_FAIL;
 540     }
 541
 542     return TRANS_FAULT_OK;
 543 }
 544
 545
 546 static addr_t
 547 tm_handle_fault_tmoff (struct guest_info * core)
 548 {
 549     TM_DBG(core,PF_HANDLE, "in pf handler but noone is in tm mode anymore (core %d), i should try to eliminate hypercalls\n", core->vcpu_id);
 550
 551     if (v3_restore_dirty_instr(core) == -1) {
 552         TM_ERR(core,PF_HANDLE,"could not restore dirty instr!\n");
 553         return ERR_TRANS_FAULT_FAIL;
 554     }
 555
 556     return TRANS_FAULT_OK;
 557 }
 558
 559
 560 /* entry points:
 561  *
 562  * called from MMU - should mean at least tms->TM_MODE is on
 563  *
 564  * tm->on : ifetch -> store instr, overwrite instr
 565  *          r/w    -> record hash, write log, store instr, overwrite instr
 566  * tm->off: ifetch -> store instr, overwrite instr
 567  *          r/w    -> record hash, store instr, overwrite instr
 568  *
 569  *          returns ERR_TRANS_FAULT_FAIL on error
 570  *          TRANS_FAULT_OK when things are fine
 571  *          addr when we're passing back a staging page
 572  *
 573  */
 574 addr_t
 575 v3_handle_trans_mem_fault (struct guest_info * core,
 576                                   addr_t fault_addr,
 577                                   pf_error_t error)
 578 {
 579     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 580     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
 581
 582     if (!tm) {
 583         TM_ERR(core,ERROR,": coudln't get core state\n");
 584         return ERR_TRANS_FAULT_FAIL;
 585     }
 586
 587     if (!tms) {
 588         TM_ERR(core,ERROR,": couldn't get vm trans_mem state\n");
 589         return ERR_TRANS_FAULT_FAIL;
 590     }
 591
 592     TM_DBG(core,PF,"PF handler core->mode : %d, system->mode : %d\n", tm->TM_MODE, tms->TM_MODE);
 593
 594     if ((tm->TM_MODE == TM_ON) &&
 595         ((void *)fault_addr == (void *)(core->rip))) {
 596
 597         return tm_handle_fault_ifetch(core, tm);
 598
 599     } else if ((tm->TM_MODE == TM_ON)    &&
 600                (tm->TM_STATE == TM_EXEC) &&
 601                (error.write == 0)) {
 602
 603         return tm_handle_fault_read(core, tm, fault_addr, error);
 604
 605     } else if ((tm->TM_MODE == TM_ON)    &&
 606                (tm->TM_STATE == TM_EXEC) &&
 607                (error.write == 1)) {
 608
 609         return tm_handle_fault_write(core, tm, fault_addr, error);
 610
 611
 612     } else if ((tms->TM_MODE == TM_ON) &&
 613               ((void *)fault_addr == (void *)(core->rip))) {
 614
 615         return tm_handle_fault_extern_ifetch(core, tm, fault_addr, error);
 616
 617     } else if ((tms->TM_MODE == TM_ON) &&
 618                (tm->TM_STATE == TM_EXEC)) {
 619
 620         return tm_handle_fault_extern_access(core, tm, fault_addr, error);
 621     } else {
 622
 623         return tm_handle_fault_tmoff(core);
 624
 625     }
 626
 627     return TRANS_FAULT_OK;
 628 }
 629
 630
 631 static int
 632 tm_handle_hcall_tmoff (struct guest_info * core, struct v3_trans_mem * tm)
 633 {
 634     if (tm->TM_MODE == TM_ON) {
 635         TM_ERR(core,EXIT,"we are in tm mode but system is not!\n");
 636         return TRANS_HCALL_FAIL;
 637     }
 638
 639     // we got to an exit when things were off!
 640     TM_DBG(core,EXIT,"system is off, restore the instruction and go away\n");
 641
 642     if (v3_restore_dirty_instr(core) == -1) {
 643         TM_ERR(core,HCALL,"could not restore dirty instr!\n");
 644         return TRANS_HCALL_FAIL;
 645     }
 646
 647     tm->TM_STATE = TM_NULL;
 648
 649     return TRANS_HCALL_OK;
 650 }
 651
 652
 653 static int
 654 tm_handle_hcall_dec_abort (struct guest_info * core,
 655                            struct v3_trans_mem * tm)
 656 {
 657     // only ever get here from TM DECODE
 658     TM_DBG(core,EXIT,"we are in ABORT, call the abort handler\n");
 659     tm->TM_ABORT = 0;
 660
 661     v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
 662
 663     TM_DBG(core,EXIT,"RIP after abort: %p\n", ((void*)(core->rip)));
 664
 665     return TRANS_HCALL_OK;
 666 }
 667
 668
 669 static int
 670 tm_handle_hcall_ifetch_start (struct guest_info * core,
 671                               struct v3_trans_mem * tm)
 672 {
 673     tm->TM_STATE = TM_IFETCH;
 674
 675     TM_DBG(core,EXIT,"VMEXIT after TM_EXEC, blast away VTLB and go into TM_IFETCH\n");
 676
 677     // Finally, invalidate the shadow page table
 678     v3_invalidate_shadow_pts(core);
 679
 680     return TRANS_HCALL_OK;
 681 }
 682
 683
 684 static int
 685 tm_check_list_conflict (struct guest_info * core,
 686                         struct v3_trans_mem * tm,
 687                         struct list_head * access_list,
 688                         v3_tm_op_t op_type)
 689 {
 690     struct mem_op * curr = NULL;
 691     struct mem_op * tmp  = NULL;
 692     int conflict = 0;
 693
 694     list_for_each_entry_safe(curr, tmp, access_list, op_node) {
 695
 696         conflict = tm_check_conflict(tm->ginfo->vm_info, curr->guest_addr, op_type, core->vcpu_id, tm->t_num);
 697
 698         if (conflict == ERR_CHECK_FAIL) {
 699
 700             TM_ERR(core,EXIT,"error checking for conflicts\n");
 701             return TRANS_HCALL_FAIL;
 702
 703         } else if (conflict == CHECK_IS_CONFLICT) {
 704
 705             TM_DBG(core,EXIT,"we have a conflict, aborting\n");
 706             v3_handle_trans_abort(core, TM_ABORT_CONFLICT, 0);
 707             return CHECK_MUST_ABORT;
 708
 709         }
 710
 711     }
 712
 713     return TRANS_HCALL_OK;
 714 }
 715
 716
 717 static int
 718 tm_handle_hcall_check_conflicts (struct guest_info * core,
 719                                  struct v3_trans_mem * tm)
 720 {
 721     int ret;
 722
 723     TM_DBG(core,EXIT,"still TM_ON\n");
 724     TM_DBG(core,EXIT,"checking for conflicts\n");
 725
 726     if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_w_list), OP_TYPE_WRITE)) == TRANS_HCALL_FAIL) {
 727         return TRANS_HCALL_FAIL;
 728     } else if (ret == CHECK_MUST_ABORT) {
 729         return TRANS_HCALL_OK;
 730     }
 731
 732     if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_r_list), OP_TYPE_READ)) == TRANS_HCALL_FAIL) {
 733         return TRANS_HCALL_FAIL;
 734     } else if (ret == CHECK_MUST_ABORT) {
 735         return TRANS_HCALL_OK;
 736     }
 737
 738     tm->TM_STATE = TM_IFETCH;
 739
 740     return TRANS_HCALL_OK;
 741 }
 742
 743
 744 /* trans mem hypercall handler
 745  * entry points:
 746  *
 747  * running mime (tm or tms on)
 748  *   update record log
 749  *   restore instr
 750  *   overwrite instr
 751  *   check for conflicts
 752  *   flush vtlb
 753  * abort (due to quix86)
 754  *   restore instr
 755  *   set all to abort
 756  */
 757 static int
 758 tm_handle_hcall (struct guest_info * core,
 759                  unsigned int hcall_id,
 760                  void * priv_data)
 761 {
 762     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 763     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
 764
 765     if (tms->TM_MODE == TM_OFF) {
 766         return tm_handle_hcall_tmoff(core, tm);
 767     }
 768
 769     // Previous instruction has finished, copy staging page back into linked list!
 770     if (update_list(tm, &(tm->trans_w_list)) == -1) {
 771         TM_ERR(core,HCALL,"could not update_list!\n");
 772         return TRANS_HCALL_FAIL;
 773     }
 774
 775     // Done handling previous instruction, must put back the next instruction, reset %rip and go back to IFETCH state
 776     TM_DBG(core,EXIT,"saw VMEXIT, need to restore previous state and proceed\n");
 777
 778     if (v3_restore_dirty_instr(core) == -1) {
 779         TM_ERR(core,HCALL,"could not restore dirty instr!\n");
 780         return TRANS_HCALL_FAIL;
 781     }
 782
 783     /* Check TM_STATE */
 784     if (tm->TM_ABORT == 1 &&
 785         tms->TM_MODE == TM_ON) {
 786
 787         return tm_handle_hcall_dec_abort(core, tm);
 788
 789     } else if (tm->TM_STATE == TM_EXEC) {
 790         return tm_handle_hcall_ifetch_start(core, tm);
 791     }
 792
 793     /* Check TM_MODE */
 794     if (tm->TM_MODE == TM_ON &&
 795         tms->TM_MODE == TM_ON) {
 796
 797         return tm_handle_hcall_check_conflicts(core, tm);
 798
 799     } else if (tm->TM_MODE == TM_OFF) {
 800         TM_DBG(core,EXIT,"we are in TM_OFF\n");
 801     }
 802
 803     return TRANS_HCALL_OK;
 804 }
 805
 806
 807 int
 808 v3_tm_inc_tnum (struct v3_trans_mem * tm)
 809 {
 810     addr_t irqstate;
 811     uint64_t new_ctxt;
 812     uint64_t * lt;
 813
 814     lt = tm_global_state->last_trans;
 815
 816     // grab global last_trans
 817     irqstate = v3_lock_irqsave(tm_global_state->lock);
 818     new_ctxt = ++(lt[tm->ginfo->vcpu_id]);
 819     v3_unlock_irqrestore(tm_global_state->lock, irqstate);
 820
 821     tm->t_num++;
 822     /*
 823     TM_DBG(tm->ginfo,INC TNUM,"global state is |%d|%d|, my tnum is %d\n", (int)lt[0],
 824                                                                         (int)lt[1], (int)tm->t_num);
 825                                                                         */
 826     if (new_ctxt != tm->t_num) {
 827         TM_ERR(tm->ginfo,TM_INC_TNUM,"misaligned global and local context value\n");
 828         return -1;
 829     }
 830
 831     return 0;
 832 }
 833
 834
 835 static void
 836 tm_set_abort_status (struct guest_info * core,
 837                      tm_abrt_cause_t cause,
 838                      uint8_t xabort_reason)
 839 {
 840     core->vm_regs.rax = 0;
 841
 842     switch (cause) {
 843         case TM_ABORT_XABORT:
 844             // we put the xabort immediate in eax 31:24
 845             // cause is zero
 846             core->vm_regs.rax |= (xabort_reason << 24);
 847             break;
 848         case TM_ABORT_CONFLICT:
 849             // if this was a conflict from another core, it may work
 850             // if we try again
 851             core->vm_regs.rax |= (1 << ABORT_CONFLICT) | (1 << ABORT_RETRY);
 852             break;
 853         case TM_ABORT_INTERNAL:
 854         case TM_ABORT_BKPT:
 855             core->vm_regs.rax |= (1 << cause);
 856             break;
 857         case TM_ABORT_UNSPECIFIED:
 858             // just return 0 in EAX
 859             break;
 860         default:
 861             TM_ERR(core, ABORT, "invalid abort cause\n");
 862             break;
 863     }
 864 }
 865
 866
 867 // xabort_reason is only used for XABORT instruction
 868 int
 869 v3_handle_trans_abort (struct guest_info * core,
 870                        tm_abrt_cause_t cause,
 871                        uint8_t xabort_reason)
 872 {
 873     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
 874
 875     // Free the staging page
 876     if (v3_free_staging_page(tm) == -1) {
 877         TM_ERR(core,ABORT,"problem freeing staging page\n");
 878         return -1;
 879     }
 880
 881     // Clear the VTLB which still has our staging page in it
 882     if (v3_clr_vtlb(core) == -1) {
 883         TM_ERR(core,ABORT,"problem clearing vtlb\n");
 884         return -1;
 885     }
 886
 887     // Free the lists
 888     v3_clear_tm_lists(tm);
 889
 890     TM_DBG(core,ABORT -- handler,"TM_MODE: %d | RIP: %llx | XABORT RIP: %llx\n", tm->TM_MODE, (uint64_t)core->rip, (uint64_t)tm->fail_call);
 891
 892     if (tm->TM_MODE == TM_ON) {
 893         TM_DBG(core,ABORT,"Setting RIP to %llx\n", (uint64_t)tm->fail_call);
 894         core->rip = tm->fail_call;
 895
 896         // Turn TM off
 897         v3_clr_tm(tm);
 898
 899         // transaction # ++
 900         v3_tm_inc_tnum(tm);
 901     }
 902
 903     tm_set_abort_status(core, cause, xabort_reason);
 904
 905     // time to garbage collect
 906     if (tm_hash_gc(tm) == -1) {
 907         TM_ERR(core,GC,"could not gc!\n");
 908         return -1;
 909     }
 910
 911     return 0;
 912 }
 913
 914
 915 static uint_t
 916 tm_hash_fn (addr_t key)
 917 {
 918     return v3_hash_long(key, sizeof(void *));
 919 }
 920
 921
 922 static int
 923 tm_eq_fn (addr_t key1, addr_t key2)
 924 {
 925     return (key1 == key2);
 926 }
 927
 928
 929 static uint_t
 930 tm_hash_buf_fn (addr_t key)
 931 {
 932     return v3_hash_long(key, sizeof(addr_t));
 933 }
 934
 935
 936 static int
 937 tm_eq_buf_fn(addr_t key1, addr_t key2)
 938 {
 939     return (key1 == key2);
 940 }
 941
 942
 943 /* this checks if the remote access was done on the same
 944  * local transaction number as the current one */
 945 static int
 946 tm_check_context (struct v3_vm_info * vm,
 947                   addr_t gva,
 948                   uint64_t core_num,
 949                   uint64_t curr_ctxt,
 950                   uint64_t * curr_lt,
 951                   v3_tm_op_t op_type)
 952 {
 953     uint64_t  core_id_sub;
 954     struct v3_tm_access_type * type = NULL;
 955
 956     for (core_id_sub = 0; core_id_sub < vm->num_cores; core_id_sub++) {
 957         struct v3_trans_mem * remote_tm;
 958         void * buf[3];
 959         addr_t key;
 960
 961         /* skip the core that's doing the checking */
 962         if (core_id_sub == core_num) {
 963             continue;
 964         }
 965
 966         remote_tm = v3_get_ext_core_state(&(vm->cores[core_id_sub]), "trans_mem");
 967         if (!remote_tm) {
 968             PrintError(vm, VCORE_NONE, "Could not get ext core state for core %llu\n", core_id_sub);
 969             return ERR_CHECK_FAIL;
 970         }
 971
 972         buf[0] = (void *)gva;
 973         buf[1] = (void *)core_id_sub;
 974         buf[2] = (void *)curr_lt[core_id_sub];
 975
 976         key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
 977
 978         type = (struct v3_tm_access_type *)HTABLE_SEARCH(remote_tm->access_type, key);
 979
 980         if (type) {
 981             // conflict!
 982             if ( (op_type == OP_TYPE_WRITE && (type->w || type->r)) || // so basically if write?
 983                     (op_type != OP_TYPE_WRITE && type->w)) {
 984                 return CHECK_IS_CONFLICT;
 985             }
 986         }
 987     }
 988
 989     return CHECK_NO_CONFLICT;
 990 }
 991
 992
 993 /* check all the contexts in the list for a conflict */
 994 static int
 995 tm_check_all_contexts (struct v3_vm_info * vm,
 996                        struct list_head * hash_list,
 997                        addr_t   gva,
 998                        v3_tm_op_t  op_type,
 999                        uint64_t core_num,
1000                        uint64_t curr_ctxt)
1001 {
1002     struct hash_chain * curr = NULL;
1003     struct hash_chain * tmp  = NULL;
1004     uint64_t * curr_lt       = NULL;
1005     int ret = 0;
1006
1007     list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1008
1009         curr_lt = curr->curr_lt;
1010
1011         if (curr_lt[core_num] == curr_ctxt) {
1012
1013             ret = tm_check_context(vm, gva, core_num, curr_ctxt, curr_lt, op_type);
1014
1015             if (ret == ERR_CHECK_FAIL) {
1016                 return ERR_CHECK_FAIL;
1017             } else if (ret == CHECK_IS_CONFLICT) {
1018                 return CHECK_IS_CONFLICT;
1019             }
1020
1021         }
1022
1023     }
1024
1025     return CHECK_NO_CONFLICT;
1026 }
1027
1028
1029 /* The following access patterns trigger an abort:
1030  * We: Read     |   Anyone Else: Write
1031  * We: Write    |   Anyone Else: Read, Write
1032  *
1033  * (pg 8-2 of haswell manual)
1034  *
1035  * returns ERR_CHECK_FAIL on error
1036  * returns CHECK_IS_CONFLICT if there is a conflict
1037  * returns CHECK_NO_CONFLICT  if there isn't
1038  */
1039 int
1040 tm_check_conflict (struct v3_vm_info * vm,
1041                    addr_t gva,
1042                    v3_tm_op_t op_type,
1043                    uint64_t core_num,
1044                    uint64_t curr_ctxt)
1045 {
1046     uint64_t core_id;
1047
1048     /* loop over other cores -> core_id */
1049     for (core_id = 0; core_id < vm->num_cores; core_id++) {
1050
1051         struct guest_info * core = NULL;
1052         struct v3_trans_mem * tm = NULL;
1053         struct list_head * hash_list;
1054
1055         /* only check other cores */
1056         if (core_id == core_num) {
1057             continue;
1058         }
1059
1060         core = &(vm->cores[core_id]);
1061         tm = (struct v3_trans_mem*)v3_get_ext_core_state(core, "trans_mem");
1062
1063         if (!tm) {
1064             PrintError(vm, VCORE_NONE, "+++ TM ERROR +++ Couldn't get core state for core %llu\n", core_id);
1065             return ERR_CHECK_FAIL;
1066         }
1067
1068         /* this core didn't access the address, move on */
1069         if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1070             continue;
1071
1072         } else {
1073
1074             /* loop over chained hash for gva, find fields with curr_ctxt -> curr_lt*/
1075             int ret = tm_check_all_contexts(vm, hash_list, gva, op_type, core_num, curr_ctxt);
1076
1077             if (ret == ERR_CHECK_FAIL) {
1078                 return ERR_CHECK_FAIL;
1079             } else if (ret == CHECK_IS_CONFLICT) {
1080                 return CHECK_IS_CONFLICT;
1081             }
1082
1083         }
1084     }
1085
1086     return CHECK_NO_CONFLICT;
1087 }
1088
1089
1090 static int
1091 tm_need_to_gc (struct v3_trans_mem * tm,
1092                struct hash_chain * curr,
1093                uint64_t * lt_copy,
1094                uint64_t tmoff)
1095 {
1096     uint64_t to_gc = 1;
1097     uint64_t i;
1098
1099     /* if none of the cores are in transactional context,
1100      * we know we can collect this context
1101      */
1102     if (!tmoff) {
1103
1104         for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1105             /* if *any* of the cores are active in a transaction
1106              * number that is current (listed in this context),
1107              * we know we can't collect this context, as it
1108              * will be needed when that core's transaction ends
1109              */
1110             if (curr->curr_lt[i] >= lt_copy[i]) {
1111                 to_gc = 0;
1112                 break;
1113             }
1114         }
1115
1116     }
1117     return to_gc;
1118 }
1119
1120
1121 static void
1122 tm_del_stale_ctxt (struct hash_chain * curr)
1123 {
1124         list_del(&(curr->lt_node));
1125         V3_Free(curr->curr_lt);
1126         V3_Free(curr);
1127 }
1128
1129
1130 static void
1131 tm_del_acc_entry (struct v3_trans_mem * tm, addr_t key)
1132 {
1133     v3_htable_remove(tm->access_type, key, 0);
1134     (tm->access_type_entries)--;
1135 }
1136
1137
1138 static int
1139 tm_collect_context (struct v3_trans_mem * tm,
1140                     struct hashtable_iter * ctxt_iter,
1141                     struct hash_chain * curr,
1142                     addr_t gva)
1143 {
1144         uint64_t i;
1145
1146         for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1147             struct v3_ctxt_tuple tup;
1148             struct v3_tm_access_type * type;
1149             addr_t key;
1150
1151             tup.gva     = (void *)gva;
1152             tup.core_id = (void *)i;
1153             tup.core_lt = (void *)curr->curr_lt[i];
1154
1155             key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1156
1157             type = (struct v3_tm_access_type *)v3_htable_search(tm->access_type, key);
1158
1159             if (!type) { // something has gone terribly wrong
1160                 TM_ERR(tm->ginfo,GC,"could not find accesstype entry to gc, THIS! IS! WRONG!\n");
1161                 return -1;
1162             }
1163
1164             /* delete the access type entry */
1165             tm_del_acc_entry(tm, key);
1166         }
1167
1168         /* delete the stale context */
1169         tm_del_stale_ctxt(curr);
1170
1171         return 0;
1172 }
1173
1174
1175 static int
1176 tm_collect_all_contexts (struct v3_trans_mem * tm,
1177                          struct hashtable_iter * ctxt_iter,
1178                          uint64_t tmoff,
1179                          uint64_t * lt_copy)
1180 {
1181     struct hash_chain * tmp;
1182     struct hash_chain * curr;
1183     struct list_head * chain_list;
1184     addr_t gva;
1185
1186     gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1187
1188     chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1189
1190     /* this is a chained hash, so for each address, we will have
1191      * a list of contexts. We now check each context to see
1192      * whether or not it can be collected
1193      */
1194     list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1195
1196         uint64_t to_gc = tm_need_to_gc(tm, curr, lt_copy, tmoff);
1197
1198         /* not garbage, go on to the next context in the list */
1199         if (!to_gc) {
1200             TM_DBG(tm->ginfo,GC,"not garbage collecting entries for address %llx\n", (uint64_t)gva);
1201             continue;
1202         }
1203
1204         TM_DBG(tm->ginfo,GC,"garbage collecting entries for address %llx\n", (uint64_t)gva);
1205
1206         /* found one, delete corresponding entries in access_type */
1207         if (tm_collect_context(tm, ctxt_iter, curr, gva) == -1) {
1208             TM_ERR(tm->ginfo,GC,"ERROR collecting context\n");
1209             return -1;
1210         }
1211
1212     }
1213
1214     /* if context list (hash chain) is now empty, remove the hash entry */
1215     if (list_empty(chain_list)) {
1216         v3_htable_iter_remove(ctxt_iter, 0);
1217         (tm->addr_ctxt_entries)--;
1218     } else {
1219         v3_htable_iter_advance(ctxt_iter);
1220     }
1221
1222     /* give the CPU away NONONO NEVER YIELD WHILE HOLDING A LOCK */
1223     //V3_Yield();
1224
1225     return 0;
1226 }
1227
1228
1229 int
1230 tm_hash_gc (struct v3_trans_mem * tm)
1231 {
1232     addr_t irqstate, irqstate2;
1233     int rc = 0;
1234     uint64_t tmoff;
1235     uint64_t * lt_copy;
1236     struct v3_tm_state * tms = NULL;
1237     struct hashtable_iter * ctxt_iter = NULL;
1238
1239     tms = (struct v3_tm_state *)v3_get_extension_state(tm->ginfo->vm_info, "trans_mem");
1240     if (!tms) {
1241         TM_ERR(tm->ginfo,GC,"could not alloc tms\n");
1242         return -1;
1243     }
1244
1245     TM_DBG(tm->ginfo,GC,"beginning garbage collection\n");
1246     TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (pre)\n", (int)v3_htable_count(tm->addr_ctxt));
1247     TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (pre)\n", (int)v3_htable_count(tm->access_type));
1248
1249     tmoff = (tms->cores_active == 0);
1250
1251     lt_copy = V3_Malloc(sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1252     if (!lt_copy) {
1253         TM_ERR(tm->ginfo,GC,"Could not allocate space for lt_copy\n");
1254         return -1;
1255     }
1256
1257     memset(lt_copy, 0, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1258
1259     /* lt_copy holds the last transaction number for each core */
1260     irqstate = v3_lock_irqsave(tm_global_state->lock);
1261     memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1262     v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1263
1264     /* lock both hashes */
1265     irqstate = v3_lock_irqsave(tm->addr_ctxt_lock);
1266     irqstate2 = v3_lock_irqsave(tm->access_type_lock);
1267
1268     /* loop over hash entries in addr_ctxt */
1269     ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1270     if (!ctxt_iter) {
1271         TM_ERR(tm->ginfo,GC,"could not create htable iterator\n");
1272         rc = -1;
1273         goto out;
1274     }
1275
1276     /* we check each address stored in the hash */
1277     while (ctxt_iter->entry) {
1278         /* NOTE: this call advances the hash iterator */
1279         if (tm_collect_all_contexts(tm, ctxt_iter, tmoff, lt_copy) == -1) {
1280             rc = -1;
1281             goto out1;
1282         }
1283     }
1284
1285 out1:
1286     v3_destroy_htable_iter(ctxt_iter);
1287 out:
1288     V3_Free(lt_copy);
1289     v3_unlock_irqrestore(tm->access_type_lock, irqstate);
1290     v3_unlock_irqrestore(tm->addr_ctxt_lock, irqstate2);
1291
1292     if (rc == -1) {
1293         TM_ERR(tm->ginfo,GC,"garbage collection failed\n");
1294     } else {
1295         TM_DBG(tm->ginfo,GC,"ended garbage collection succesfuly\n");
1296     }
1297
1298     TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (post)\n", (int)v3_htable_count(tm->addr_ctxt));
1299     TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (post)\n", (int)v3_htable_count(tm->access_type));
1300
1301     return rc;
1302 }
1303
1304
1305 /* TODO: break out the for loops in these functions */
1306 static int
1307 tm_update_ctxt_list (struct v3_trans_mem * tm,
1308                      uint64_t * lt_copy,
1309                      addr_t gva,
1310                      uint8_t write,
1311                      struct list_head * hash_list)
1312 {
1313     struct hash_chain * curr = NULL;
1314     struct hash_chain * tmp  = NULL;
1315     uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1316     uint64_t core_id;
1317     uint_t new_le = 1;
1318     uint_t new_e;
1319
1320     list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1321         uint_t i;
1322         uint8_t same = 1;
1323
1324         for (i = 0; i < num_cores; i++) {
1325             if (curr->curr_lt[i] != lt_copy[i]) {
1326                 same = 0;
1327                 break;
1328             }
1329         }
1330
1331         if (same) {
1332             new_le = 0;
1333             break;
1334         }
1335
1336     }
1337
1338     if (new_le) {
1339         struct hash_chain * new_l = V3_Malloc(sizeof(struct hash_chain));
1340
1341         if (!new_l) {
1342             TM_ERR(tm->ginfo,HASH,"Could not allocate new list\n");
1343             return -1;
1344         }
1345
1346         memset(new_l, 0, sizeof(struct hash_chain));
1347
1348         new_l->curr_lt = lt_copy;
1349
1350         list_add_tail(&(new_l->lt_node), hash_list);
1351     }
1352
1353     for (core_id = 0; core_id < num_cores; core_id++) {
1354         struct v3_tm_access_type * type;
1355         struct v3_ctxt_tuple tup;
1356         tup.gva     = (void*)gva;
1357         tup.core_id = (void*)core_id;
1358         tup.core_lt = (void*)lt_copy[core_id];
1359         addr_t key;
1360
1361         key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1362
1363         new_e = 0;
1364
1365         type = (struct v3_tm_access_type *)HTABLE_SEARCH(tm->access_type, key);
1366
1367         if (!type) {
1368             // no entry yet
1369             new_e = 1;
1370             type = V3_Malloc(sizeof(struct v3_tm_access_type));
1371
1372             if (!type) {
1373                 TM_ERR(tm->ginfo,HASH,"could not allocate type access struct\n");
1374                 return -1;
1375             }
1376         }
1377
1378         if (write) {
1379             type->w = 1;
1380         } else {
1381             type->r = 1;
1382         }
1383
1384         if (new_e) {
1385             if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1386                 TM_ERR(tm->ginfo,HASH,"problem inserting new mem access in htable\n");
1387                 return -1;
1388             }
1389             (tm->access_type_entries)++;
1390         }
1391     }
1392
1393     return 0;
1394 }
1395
1396
1397 /* no entry in addr-ctxt yet, create one */
1398 static int
1399 tm_create_ctxt_key (struct v3_trans_mem * tm,
1400                     uint64_t * lt_copy,
1401                     addr_t gva,
1402                     uint8_t write)
1403 {
1404     struct list_head * hash_list = NULL;
1405     struct hash_chain * new_l = NULL;
1406     uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1407
1408     hash_list = (struct list_head *)V3_Malloc(sizeof(struct list_head));
1409
1410     if (!hash_list) {
1411         TM_ERR(tm->ginfo,HASH,"Problem allocating hash_list\n");
1412         return -1;
1413     }
1414
1415     INIT_LIST_HEAD(hash_list);
1416
1417     new_l = V3_Malloc(sizeof(struct hash_chain));
1418
1419     if (!new_l) {
1420         TM_ERR(tm->ginfo,HASH,"Problem allocating hash_chain\n");
1421         goto out_err;
1422     }
1423
1424     memset(new_l, 0, sizeof(struct hash_chain));
1425
1426     new_l->curr_lt = lt_copy;
1427
1428     /* add the context to the hash chain */
1429     list_add_tail(&(new_l->lt_node), hash_list);
1430
1431     if (!(HTABLE_INSERT(tm->addr_ctxt, gva, hash_list))) {
1432         TM_ERR(tm->ginfo,HASH CHAIN,"problem inserting new chain into hash\n");
1433         goto out_err1;
1434     }
1435
1436     (tm->addr_ctxt_entries)++;
1437
1438     uint64_t core_id;
1439     /* TODO: we need a way to unwind and deallocate for all cores on failure here */
1440     for (core_id = 0; core_id < num_cores; core_id++) {
1441         struct v3_tm_access_type * type = NULL;
1442         struct v3_ctxt_tuple tup;
1443         tup.gva     = (void*)gva;
1444         tup.core_id = (void*)core_id;
1445         tup.core_lt = (void*)lt_copy[core_id];
1446         addr_t key;
1447
1448         type = V3_Malloc(sizeof(struct v3_tm_access_type));
1449
1450         if (!type) {
1451             TM_ERR(tm->ginfo,HASH,"could not allocate access type struct\n");
1452             goto out_err1;
1453         }
1454
1455         if (write) {
1456             type->w = 1;
1457         } else {
1458             type->r = 1;
1459         }
1460
1461         key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1462
1463         if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1464             TM_ERR(tm->ginfo,HASH,"TM: problem inserting new mem access in htable\n");
1465             goto out_err1;
1466         }
1467         (tm->access_type_entries)++;
1468     }
1469
1470     return 0;
1471
1472 out_err1:
1473     list_del(&(new_l->lt_node));
1474     V3_Free(new_l);
1475 out_err:
1476     V3_Free(hash_list);
1477     return -1;
1478 }
1479
1480
1481 /* entry points:
1482  *
1483  * called during MIME execution
1484  * record memory access in conflict logs
1485  *   this locks the table during insertion
1486  */
1487 int
1488 tm_record_access (struct  v3_trans_mem * tm,
1489                   uint8_t write,
1490                   addr_t  gva)
1491 {
1492     uint64_t * lt_copy;
1493     struct list_head * hash_list;
1494     addr_t irqstate;
1495     uint64_t num_cores;
1496
1497     num_cores = tm->ginfo->vm_info->num_cores;
1498
1499     TM_DBG(tm->ginfo,REC,"recording addr %llx, addr-ctxt.cnt = %d, access-type.cnt = %d\n", (uint64_t)gva,
1500                                         (int)v3_htable_count(tm->addr_ctxt), (int)v3_htable_count(tm->access_type));
1501     //PrintDebug(tm->ginfo->vm_info, tm->ginfo,"\tWe think that addr-ctxt.cnt = %d, access-type.cnt = %d\n",(int)tm->addr_ctxt_entries,(int)tm->access_type_entries);
1502
1503     lt_copy = V3_Malloc(sizeof(uint64_t)*num_cores);
1504     if (!lt_copy) {
1505         TM_ERR(tm->ginfo,REC,"Allocating array failed\n");
1506         return -1;
1507     }
1508
1509     memset(lt_copy, 0, sizeof(uint64_t)*num_cores);
1510
1511     irqstate = v3_lock_irqsave(tm_global_state->lock);
1512     memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*num_cores);
1513     v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1514
1515     if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1516         /* we haven't created a context list for this address yet, go do it */
1517         return tm_create_ctxt_key(tm, lt_copy, gva, write);
1518
1519     } else {
1520         /* we have a context list for this addres already, do we need to create a new context? */
1521         return tm_update_ctxt_list(tm, lt_copy, gva, write, hash_list);
1522     }
1523
1524     return 0;
1525 }
1526
1527
1528 static void
1529 tm_prepare_cpuid (struct v3_vm_info * vm)
1530 {
1531
1532     V3_Print(vm, VCORE_NONE, "TM INIT | enabling RTM cap in CPUID\n");
1533
1534     /* increase max CPUID function to 7 (extended feature flags enumeration) */
1535     v3_cpuid_add_fields(vm,0x0,
1536             0xf, 0x7,
1537             0, 0,
1538             0, 0,
1539             0, 0);
1540
1541
1542     /* do the same for AMD */
1543     v3_cpuid_add_fields(vm,0x80000000,
1544             0xffffffff, 0x80000007,
1545             0, 0,
1546             0, 0,
1547             0, 0);
1548
1549
1550     /* enable RTM (CPUID.07H.EBX.RTM = 1) */
1551     v3_cpuid_add_fields(vm, 0x07, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1552     v3_cpuid_add_fields(vm, 0x80000007, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1553 }
1554
1555
1556 static int
1557 init_trans_mem (struct v3_vm_info * vm,
1558                 v3_cfg_tree_t * cfg,
1559                 void ** priv_data)
1560 {
1561     struct v3_tm_state * tms;
1562
1563     PrintDebug(vm, VCORE_NONE, "Trans Mem. Init\n");
1564
1565     tms = V3_Malloc(sizeof(struct v3_tm_state));
1566     if (!tms) {
1567         PrintError(vm, VCORE_NONE, "Problem allocating v3_tm_state\n");
1568         return -1;
1569     }
1570
1571     memset(tms, 0, sizeof(struct v3_tm_state));
1572
1573     if (v3_register_hypercall(vm, TM_KICKBACK_CALL, tm_handle_hcall, NULL) == -1) {
1574       PrintError(vm, VCORE_NONE, "TM could not register hypercall\n");
1575       goto out_err;
1576     }
1577
1578     v3_lock_init(&(tms->lock));
1579
1580     tms->TM_MODE      = TM_OFF;
1581     tms->cores_active = 0;
1582
1583     uint64_t * lt = V3_Malloc(sizeof(uint64_t) * vm->num_cores);
1584     if (!lt) {
1585         PrintError(vm, VCORE_NONE, "Problem allocating last_trans array\n");
1586         goto out_err1;
1587     }
1588
1589     memset(lt, 0, sizeof(uint64_t) * vm->num_cores);
1590
1591     int i;
1592     for (i = 0; i < vm->num_cores; i++) {
1593         lt[i] = 0;
1594     }
1595
1596     tms->last_trans = lt;
1597
1598     *priv_data = tms;
1599     tm_global_state = tms;
1600
1601     tm_prepare_cpuid(vm);
1602
1603     return 0;
1604
1605 out_err1:
1606     v3_lock_deinit(&(tms->lock));
1607     v3_remove_hypercall(vm, TM_KICKBACK_CALL);
1608 out_err:
1609     V3_Free(tms);
1610     return -1;
1611 }
1612
1613
1614 static int
1615 init_trans_mem_core (struct guest_info * core,
1616                      void * priv_data,
1617                      void ** core_data)
1618 {
1619     struct v3_trans_mem * tm = V3_Malloc(sizeof(struct v3_trans_mem));
1620
1621     TM_DBG(core,INIT, "Trans Mem. Core Init\n");
1622
1623     if (!tm) {
1624         TM_ERR(core,INIT, "Problem allocating TM state\n");
1625         return -1;
1626     }
1627
1628     memset(tm, 0, sizeof(struct v3_trans_mem));
1629
1630     INIT_LIST_HEAD(&tm->trans_r_list);
1631     INIT_LIST_HEAD(&tm->trans_w_list);
1632
1633     tm->addr_ctxt  = v3_create_htable(0, tm_hash_fn, tm_eq_fn);
1634     if (!(tm->addr_ctxt)) {
1635         TM_ERR(core,INIT,"problem creating addr_ctxt\n");
1636         goto out_err;
1637     }
1638
1639     tm->access_type = v3_create_htable(0, tm_hash_buf_fn, tm_eq_buf_fn);
1640     if (!(tm->access_type)) {
1641         TM_ERR(core,INIT,"problem creating access_type\n");
1642         goto out_err1;
1643     }
1644
1645     v3_lock_init(&(tm->addr_ctxt_lock));
1646     v3_lock_init(&(tm->access_type_lock));
1647
1648     tm->TM_STATE = TM_NULL;
1649     tm->TM_MODE  = TM_OFF;
1650     tm->TM_ABORT = 0;
1651     tm->ginfo    = core;
1652     tm->t_num = 0;
1653     tm->to_branch = 0;
1654     tm->offset = 0;
1655     tm->access_type_entries = 0;
1656     tm->addr_ctxt_entries = 0;
1657     tm->dirty_instr_flag = 0;
1658
1659     /* TODO: Cache Model */
1660     //tm->box = (struct cache_box *)V3_Malloc(sizeof(struct cache_box *));
1661     //tm->box->init = init_cache;
1662     //tm->box->init(sample_spec, tm->box);
1663
1664     *core_data = tm;
1665
1666     return 0;
1667
1668 out_err1:
1669     v3_free_htable(tm->addr_ctxt, 0, 0);
1670 out_err:
1671     V3_Free(tm);
1672     return -1;
1673 }
1674
1675
1676 static int
1677 deinit_trans_mem (struct v3_vm_info * vm, void * priv_data)
1678 {
1679     struct v3_tm_state * tms = (struct v3_tm_state *)priv_data;
1680
1681     if (v3_remove_hypercall(vm, TM_KICKBACK_CALL) == -1) {
1682         PrintError(vm, VCORE_NONE, "Problem removing TM hypercall\n");
1683         return -1;
1684     }
1685
1686     v3_lock_deinit(&(tms->lock));
1687
1688     if (tms) {
1689         V3_Free(tms);
1690     }
1691
1692     return 0;
1693 }
1694
1695
1696 static int
1697 deinit_trans_mem_core (struct guest_info * core,
1698                        void * priv_data,
1699                        void * core_data)
1700 {
1701     struct v3_trans_mem * tm = (struct v3_trans_mem *)core_data;
1702     struct hashtable_iter * ctxt_iter = NULL;
1703
1704     v3_clear_tm_lists(tm);
1705
1706     if (tm->staging_page) {
1707         TM_ERR(core,DEINIT CORE,"WARNING: staging page not freed!\n");
1708     }
1709
1710     ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1711     if (!ctxt_iter) {
1712         TM_DBG(core,DEINIT_CORE,"could not create htable iterator\n");
1713         return -1;
1714     }
1715
1716     /* delete all context entries for each hashed address */
1717     while (ctxt_iter->entry) {
1718         struct hash_chain * tmp;
1719         struct hash_chain * curr;
1720         struct list_head * chain_list;
1721         addr_t gva;
1722
1723         gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1724         chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1725
1726         /* delete the context */
1727         list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1728             tm_del_stale_ctxt(curr);
1729         }
1730
1731         v3_htable_iter_advance(ctxt_iter);
1732     }
1733
1734     v3_destroy_htable_iter(ctxt_iter);
1735
1736     /* we've already deleted the values in this one */
1737     v3_free_htable(tm->addr_ctxt, 0, 0);
1738
1739     /* KCH WARNING: we may not want to free access type values here */
1740     v3_free_htable(tm->access_type, 1, 0);
1741
1742     v3_lock_deinit(&(tm->addr_ctxt_lock));
1743     v3_lock_deinit(&(tm->access_type_lock));
1744
1745     V3_Free(tm);
1746
1747     return 0;
1748 }
1749
1750
1751 static struct v3_extension_impl trans_mem_impl = {
1752     .name = "trans_mem",
1753     .init = NULL,
1754     .vm_init = init_trans_mem,
1755     .vm_deinit = deinit_trans_mem,
1756     .core_init = init_trans_mem_core,
1757     .core_deinit = deinit_trans_mem_core,
1758     .on_entry = NULL,
1759     .on_exit = NULL
1760 };
1761
1762 register_extension(&trans_mem_impl);
1763
1764
1765 /* entry conditions
1766  * tms->on => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1767  * tms->off => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1768  */
1769 static int
1770 tm_handle_xend (struct guest_info * core,
1771                 struct v3_trans_mem * tm)
1772 {
1773
1774     /* XEND should raise a GPF when RTM mode is not on */
1775     if (tm->TM_MODE != TM_ON) {
1776         TM_ERR(core, UD, "Encountered XEND while not in a transactional region\n");
1777
1778         v3_raise_exception(core, GPF_EXCEPTION);
1779         return 0;
1780
1781     }
1782
1783     /* Our transaction finished! */
1784     /* Copy over data from the staging page */
1785     TM_DBG(core, UD,"Copying data from our staging page back into 'real' memory\n");
1786
1787     if (commit_list(core, tm) == -1) {
1788         TM_ERR(core,UD,"error commiting tm list to memory\n");
1789         return -1;
1790     }
1791
1792     TM_DBG(core,UD,"Freeing staging page and internal data structures\n");
1793
1794     // Free the staging page
1795     if (v3_free_staging_page(tm) == -1) {
1796         TM_ERR(core,XEND,"couldnt free staging page\n");
1797         return -1;
1798     }
1799
1800     // clear vtlb, as it may still contain our staging page
1801     if (v3_clr_vtlb(core) == -1) {
1802         TM_ERR(core,XEND,"couldnt clear vtlb\n");
1803         return -1;
1804     }
1805
1806     // Clear the lists
1807     v3_clear_tm_lists(tm);
1808
1809     /* Set the state and advance the RIP */
1810     TM_DBG(core,XEND,"advancing rip to %llx\n", core->rip + XEND_INSTR_LEN);
1811     core->rip += XEND_INSTR_LEN;
1812
1813     v3_clr_tm(tm);
1814
1815     // time to garbage collect
1816     v3_tm_inc_tnum(tm);
1817     if (tm_hash_gc(tm) == -1) {
1818         TM_ERR(core,XEND,"could not gc!\n");
1819         return -1;
1820     }
1821
1822     return 0;
1823 }
1824
1825
1826 /* entry conditions
1827  * tms->on => handle our abort code, handle_trans_abort will clear necessary state
1828  * tms->off => handle our abort code, handle_trans_abort will clear necessary state
1829  */
1830 static int
1831 tm_handle_xabort (struct guest_info * core,
1832                   struct v3_trans_mem * tm,
1833                   uchar_t * instr)
1834 {
1835         uint8_t reason;
1836
1837         // we must reflect the immediate back into EAX 31:24
1838         reason = *(uint8_t*)(instr+2);
1839
1840         // Error checking! make sure that we have gotten here in a legitimate manner
1841         if (tm->TM_MODE != TM_ON) {
1842             TM_DBG(core, UD, "We got here while not in a transactional core!\n");
1843             v3_raise_exception(core, UD_EXCEPTION);
1844         }
1845
1846         TM_DBG(core,UD,"aborting\n");
1847
1848         if (tm->TM_STATE != TM_NULL) {
1849             v3_restore_dirty_instr(core);
1850         }
1851
1852         // Handle the exit
1853         v3_handle_trans_abort(core, TM_ABORT_XABORT, reason);
1854
1855         return 0;
1856 }
1857
1858
1859 /* entry conditions
1860  * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1861  * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1862  */
1863 static int
1864 tm_handle_xbegin (struct guest_info * core,
1865                   struct v3_trans_mem * tm,
1866                   uchar_t * instr)
1867 {
1868     sint32_t rel_addr = 0;
1869     uint8_t out_of_bounds = 0;
1870     uint8_t in_compat_no_long = 0;
1871
1872     if (tm->TM_MODE == TM_ON) {
1873         /* TODO: this is actually an indication of nesting, we'll fix this later */
1874         TM_ERR(core,UD,"We don't support nested transactions yet!\n");
1875         v3_raise_exception(core, UD_EXCEPTION);
1876         return -1;
1877     }
1878
1879     // Save the fail_call address (first 2 bytes = opcode, last 4 = fail call addr)
1880     rel_addr = *(sint32_t*)(instr+2);
1881
1882     /* raise a GPF if we're trying to set a fail call outside of code segment */
1883     in_compat_no_long = (core->cpu_mode == LONG_32_COMPAT) || ((struct efer_64*)&(core->ctrl_regs.efer))->lma == 0;
1884     out_of_bounds     = (core->rip + rel_addr > core->segments.cs.base + core->segments.cs.limit ||
1885                          core->rip + rel_addr < core->segments.cs.base);
1886
1887     if (in_compat_no_long && out_of_bounds) {
1888         v3_raise_exception(core, GPF_EXCEPTION);
1889         return 0;
1890     }
1891
1892     /* TODO: also raise GPF if we're in long mode and failcall isn't canonical */
1893
1894
1895     /* set the tm_mode for this core */
1896     v3_set_tm(tm);
1897
1898     TM_DBG(core,UD,"Set the system in TM Mode, save fallback address");
1899
1900
1901     tm->fail_call = core->rip + XBEGIN_INSTR_LEN + rel_addr;
1902
1903     TM_DBG(core,UD,"we set fail_call to %llx, rip is %llx, rel_addr is %x", (uint64_t)tm->fail_call,(uint64_t)core->rip,rel_addr);
1904
1905     /* flush the shadow page tables */
1906     TM_DBG(core,UD,"Throwing out the shadow table");
1907     v3_clr_vtlb(core);
1908
1909     // Increase RIP, ready to go to next instruction
1910     core->rip += XBEGIN_INSTR_LEN;
1911
1912     return 0;
1913 }
1914
1915
1916 /* entry conditions
1917  * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1918  * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1919  */
1920 static int
1921 tm_handle_xtest (struct guest_info * core,
1922                  struct v3_trans_mem * tm)
1923 {
1924     struct rflags * rf = (struct rflags*)&(core->ctrl_regs.rflags);
1925
1926     // if we are in tm mode, set zf to 0, otherwise 1
1927     if (tm->TM_MODE == TM_ON) {
1928         rf->zf = 0;
1929     } else {
1930         rf->zf = 1;
1931     }
1932
1933     rf->cf = 0;
1934     rf->of = 0;
1935     rf->sf = 0;
1936     rf->pf = 0;
1937     rf->af = 0;
1938
1939     core->rip += XTEST_INSTR_LEN;
1940
1941     return 0;
1942 }
1943
1944
1945 /* instructions:
1946  * XBEGIN c7 f8 rel32
1947  * XABORT c6 f8 imm8
1948  * XEND   0f 01 d5
1949  */
1950 static int
1951 tm_handle_ud (struct guest_info * core)
1952 {
1953     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
1954     uchar_t instr[INSTR_BUF_SZ];
1955     uint8_t byte1, byte2, byte3;
1956
1957     tm_read_instr(core, (addr_t)core->rip, instr, INSTR_BUF_SZ);
1958
1959     byte1 = *(uint8_t *)((addr_t)instr);
1960     byte2 = *(uint8_t *)((addr_t)instr + 1);
1961     byte3 = *(uint8_t *)((addr_t)instr + 2);
1962
1963
1964     if (byte1 == 0xc7 && byte2 == 0xf8) {  /* third byte is an immediate */
1965
1966         TM_DBG(core,UD,"Encountered Haswell-specific XBEGIN %x %x %d at %llx", byte1, byte2, byte3, (uint64_t)core->rip);
1967
1968         if (tm_handle_xbegin(core, tm, instr) == -1) {
1969             TM_ERR(core, UD, "Problem handling XBEGIN\n");
1970             return -1;
1971         }
1972
1973     } else if (byte1 == 0xc6 && byte2 == 0xf8) { /* third byte is an immediate */
1974
1975         TM_DBG(core, UD, "Encountered Haswell-specific XABORT %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1976
1977         if (tm_handle_xabort(core, tm, instr) == -1) {
1978             TM_ERR(core, UD, "Problem handling XABORT\n");
1979             return -1;
1980         }
1981
1982     } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd5) {
1983
1984         TM_DBG(core, UD, "Encountered Haswell-specific XEND %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1985
1986         if (tm_handle_xend(core, tm) == -1) {
1987             TM_ERR(core, UD, "Problem handling XEND\n");
1988             return -1;
1989         }
1990
1991
1992     } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd6) {  /* third byte is an immediate */
1993
1994         TM_DBG(core,UD,"Encountered Haswell-specific XTEST %x %x %x at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1995
1996         if (tm_handle_xtest(core, tm) == -1) {
1997             TM_ERR(core, UD, "Problem handling XTEST\n");
1998             return -1;
1999         }
2000
2001     } else {
2002
2003         /* oh no, this is still unknown, pass the error back to the guest! */
2004         TM_DBG(core,UD,"Encountered:%x %x %x\n", byte1, byte2, byte3);
2005         v3_raise_exception(core, UD_EXCEPTION);
2006     }
2007
2008     return 0;
2009 }
2010
2011
2012 int
2013 v3_tm_handle_exception (struct guest_info * info,
2014                         addr_t exit_code)
2015 {
2016     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2017
2018     if (!tm) {
2019         TM_ERR(info,ERR,"TM extension state not found\n");
2020         return -1;
2021     }
2022
2023     switch (exit_code) {
2024         /* any of these exceptions should abort current transactions */
2025         case SVM_EXIT_EXCP6:
2026             if (tm_handle_ud(info) == -1) {
2027                 return -1;
2028             }
2029             break;
2030         case SVM_EXIT_EXCP0:
2031             if (tm->TM_MODE != TM_ON) {
2032                 v3_raise_exception(info, DE_EXCEPTION);
2033             }
2034             else {
2035                 TM_DBG(info,EXCP,"aborting due to DE exception\n");
2036                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2037             }
2038             break;
2039         case SVM_EXIT_EXCP1:
2040             if (tm->TM_MODE != TM_ON) {
2041                 v3_raise_exception(info, DB_EXCEPTION);
2042             }
2043             else {
2044                 TM_DBG(info,EXCP,"aborting due to DB exception\n");
2045                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2046             }
2047             break;
2048         case SVM_EXIT_EXCP3:
2049             if (tm->TM_MODE != TM_ON) {
2050                 v3_raise_exception(info, BP_EXCEPTION);
2051             }
2052             else {
2053                 TM_DBG(info,EXCP,"aborting due to BP exception\n");
2054                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2055             }
2056             break;
2057         case SVM_EXIT_EXCP4:
2058             if (tm->TM_MODE != TM_ON) {
2059                 v3_raise_exception(info, OF_EXCEPTION);
2060             }
2061             else {
2062                 TM_DBG(info,EXCP,"aborting due to OF exception\n");
2063                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2064             }
2065             break;
2066         case SVM_EXIT_EXCP5:
2067             if (tm->TM_MODE != TM_ON) {
2068                 v3_raise_exception(info, BR_EXCEPTION);
2069             }
2070             else {
2071                 TM_DBG(info,EXCP,"aborting due to BR exception\n");
2072                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2073             }
2074             break;
2075         case SVM_EXIT_EXCP7:
2076             if (tm->TM_MODE != TM_ON) {
2077                 v3_raise_exception(info, NM_EXCEPTION);
2078             }
2079             else {
2080                 TM_DBG(info,EXCP,"aborting due to NM exception\n");
2081                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2082             }
2083             break;
2084         case SVM_EXIT_EXCP10:
2085             if (tm->TM_MODE != TM_ON) {
2086                 v3_raise_exception(info, TS_EXCEPTION);
2087             }
2088             else {
2089                 TM_DBG(info,EXCP,"aborting due to TS exception\n");
2090                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2091             }
2092             break;
2093         case SVM_EXIT_EXCP11:
2094             if (tm->TM_MODE != TM_ON) {
2095                 v3_raise_exception(info, NP_EXCEPTION);
2096             }
2097             else {
2098                 TM_DBG(info,EXCP,"aborting due to NP exception\n");
2099                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2100             }
2101             break;
2102         case SVM_EXIT_EXCP12:
2103             if (tm->TM_MODE != TM_ON) {
2104                 v3_raise_exception(info, SS_EXCEPTION);
2105             }
2106             else {
2107                 TM_DBG(info,EXCP,"aborting due to SS exception\n");
2108                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2109             }
2110             break;
2111         case SVM_EXIT_EXCP13:
2112             if (tm->TM_MODE != TM_ON) {
2113                 v3_raise_exception(info, GPF_EXCEPTION);
2114             }
2115             else {
2116                 TM_DBG(info,EXCP,"aborting due to GPF exception\n");
2117                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2118             }
2119             break;
2120         case SVM_EXIT_EXCP16:
2121             if (tm->TM_MODE != TM_ON) {
2122                 v3_raise_exception(info, MF_EXCEPTION);
2123             }
2124             else {
2125                 TM_DBG(info,EXCP,"aborting due to MF exception\n");
2126                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2127             }
2128             break;
2129         case SVM_EXIT_EXCP17:
2130             if (tm->TM_MODE != TM_ON) {
2131                 v3_raise_exception(info, AC_EXCEPTION);
2132             }
2133             else {
2134                 TM_DBG(info,EXCP,"aborting due to AC exception\n");
2135                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2136             }
2137             break;
2138         case SVM_EXIT_EXCP19:
2139             if (tm->TM_MODE != TM_ON) {
2140                 v3_raise_exception(info, XF_EXCEPTION);
2141             }
2142             else {
2143                 TM_DBG(info,EXCP,"aborting due to XF exception\n");
2144                 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2145             }
2146             break;
2147
2148             TM_DBG(info,EXCP,"exception # %d\n", (int)exit_code - 0x40);
2149     }
2150
2151     return 0;
2152 }
2153
2154
2155 void
2156 v3_tm_set_excp_intercepts (vmcb_ctrl_t * ctrl_area)
2157 {
2158     ctrl_area->exceptions.de = 1; // 0  : divide by zero
2159     ctrl_area->exceptions.db = 1; // 1  : debug
2160     ctrl_area->exceptions.bp = 1; // 3  : breakpoint
2161     ctrl_area->exceptions.of = 1; // 4  : overflow
2162     ctrl_area->exceptions.br = 1; // 5  : bound range
2163     ctrl_area->exceptions.ud = 1; // 6  : undefined opcode
2164     ctrl_area->exceptions.nm = 1; // 7  : device not available
2165     ctrl_area->exceptions.ts = 1; // 10 : invalid tss
2166     ctrl_area->exceptions.np = 1; // 11 : segment not present
2167     ctrl_area->exceptions.ss = 1; // 12 : stack
2168     ctrl_area->exceptions.gp = 1; // 13 : general protection
2169     ctrl_area->exceptions.mf = 1; // 16 : x87 exception pending
2170     ctrl_area->exceptions.ac = 1; // 17 : alignment check
2171     ctrl_area->exceptions.xf = 1; // 19 : simd floating point
2172 }
2173
2174
2175 extern void v3_stgi();
2176 extern void v3_clgi();
2177
2178 /* 441-tm: if we are in TM mode, we need to check for any interrupts here,
2179  * and if there are any, need to do some aborting! Make sure not to die here
2180  * if we are already 'aborting', this results in infiloop
2181  */
2182 void
2183 v3_tm_check_intr_state (struct guest_info * info,
2184                         vmcb_ctrl_t * guest_ctrl,
2185                         vmcb_saved_state_t * guest_state)
2186
2187 {
2188     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2189
2190     if (!tm) {
2191         TM_ERR(info,INTR,"TM extension state not found\n");
2192         return;
2193     }
2194
2195     if ((tm->TM_MODE == TM_ON) &&
2196         (tm->TM_ABORT != 1)) {
2197
2198         if (guest_ctrl->guest_ctrl.V_IRQ ||
2199             guest_ctrl->EVENTINJ.valid) {
2200
2201             // We do indeed have pending interrupts
2202             v3_stgi();
2203
2204             TM_DBG(info,INTR,"we have a pending interrupt\n");
2205
2206             v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2207
2208             // Copy new RIP state into arch dependent structure
2209             guest_state->rip = info->rip;
2210
2211             //TM_DBG(info,INTR,"currently guest state rip is %llx\n",(uint64_t)guest_state->rip);
2212             v3_clgi();
2213         }
2214
2215     }
2216
2217 }
2218
2219
2220 int
2221 v3_tm_handle_pf_64 (struct guest_info * info,
2222                     pf_error_t error_code,
2223                     addr_t fault_addr,
2224                     addr_t * page_to_use)
2225 {
2226     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2227     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2228
2229     if (!tm) {
2230         TM_ERR(info,HANDLE_PF, "couldn't get tm core state\n");
2231         return -1;
2232     }
2233
2234     if (!tms) {
2235         TM_ERR(info,HANDLE_PF, "couldn't get tm global state\n");
2236         return -1;
2237     }
2238
2239     if ((tms->TM_MODE == TM_ON) &&
2240             (error_code.user == 1)) {
2241
2242         TM_DBG(info,PF,"Core reporting in, got a #PF (tms->mode is %d)\n", tms->TM_MODE);
2243
2244         *page_to_use = v3_handle_trans_mem_fault(info, fault_addr,  error_code);
2245
2246         if (*page_to_use == ERR_TRANS_FAULT_FAIL){
2247             TM_ERR(info,HANDLE_PF, "could not handle transaction page fault\n");
2248             return -1;
2249         }
2250
2251         if ((tm->TM_MODE == TM_ON) &&
2252                 (tm->staging_page == NULL)) {
2253
2254             tm->staging_page = V3_AllocPages(1);
2255
2256             if (!(tm->staging_page)) {
2257                 TM_ERR(info,MMU,"Problem allocating staging page\n");
2258                 return -1;
2259             }
2260
2261             TM_DBG(info,MMU,"Created staging page at %p\n", (void *)tm->staging_page);
2262         }
2263     }
2264
2265     return 0;
2266 }
2267
2268
2269 void
2270 v3_tm_handle_usr_tlb_miss (struct guest_info * info,
2271                            pf_error_t error_code,
2272                            addr_t page_to_use,
2273                            addr_t * shadow_pa)
2274 {
2275     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2276
2277     /* TLB miss from user */
2278     if ((tm->TM_MODE == TM_ON) &&
2279             (error_code.user == 1)) {
2280
2281         if (page_to_use > TRANS_FAULT_OK) {
2282             TM_DBG(info,MMU, "Using alternate page at: %llx\n", (uint64_t)page_to_use);
2283             *shadow_pa = page_to_use;
2284         }
2285
2286     }
2287
2288 }
2289
2290
2291 void
2292 v3_tm_handle_read_fault (struct guest_info * info,
2293                          pf_error_t error_code,
2294                          pte64_t * shadow_pte)
2295 {
2296     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2297     struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2298
2299     // If we are about to read, make it read only
2300     if ((tms->TM_MODE == TM_ON) &&
2301         (tm->TM_STATE == TM_EXEC) &&
2302         (error_code.write == 0) &&
2303         (error_code.user == 1)) {
2304
2305         TM_DBG(info,MMU, "Flagging the page read only\n");
2306         shadow_pte->writable = 0;
2307     }
2308 }
2309
2310
2311 int
2312 v3_tm_decode_rtm_instrs (struct guest_info * info,
2313                          addr_t instr_ptr,
2314                          struct x86_instr * instr)
2315 {
2316     uint8_t byte1, byte2, byte3;
2317     struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2318
2319     if (tm->TM_MODE == TM_ON) {
2320
2321         byte1 = *(uint8_t *)(instr_ptr);
2322         byte2 = *(uint8_t *)(instr_ptr + 1);
2323         byte3 = *(uint8_t *)(instr_ptr + 2);
2324
2325         if (byte1 == 0xc7 &&
2326             byte2 == 0xf8) {  /* third byte is an immediate */
2327
2328             TM_DBG(info, DECODE,"Decoding XBEGIN %x %x %d\n", byte1, byte2, byte3);
2329             instr->instr_length = 6;
2330             return 0;
2331
2332         } else if (byte1 == 0xc6 &&
2333                    byte2 == 0xf8) { /* third byte is an immediate */
2334
2335             TM_DBG(info, DECODE, "Decoding XABORT %x %x %d\n", byte1, byte2, byte3);
2336             instr->instr_length = 3;
2337             return 0;
2338
2339         } else if (byte1 == 0x0f &&
2340                    byte2 == 0x01 &&
2341                    byte3 == 0xd5) {
2342
2343             TM_DBG(info, DECODE, "Decoding XEND %x %x %x\n", byte1, byte2, byte3);
2344             instr->instr_length = 3;
2345             return 0;
2346
2347         }
2348
2349     }
2350
2351     return 0;
2352 }
2353
2354