2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2012, NWU EECS 441 Transactional Memory Team
11 * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Maciek Swiech <dotpyfe@u.northwestern.edu>
15 * Kyle C. Hale <kh@u.northwestern.edu>
16 * Marcel Flores <marcel-flores@u.northwestern.edu>
17 * Zachary Bischof <zbischof@u.northwestern.edu>
20 * This is free software. You are permitted to use,
21 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
24 #include <palacios/vmm_mem.h>
25 #include <palacios/vmm.h>
26 #include <palacios/vmcb.h>
27 #include <palacios/vmm_decoder.h>
28 #include <palacios/vm_guest_mem.h>
29 #include <palacios/vmm_ctrl_regs.h>
30 #include <palacios/vmm_paging.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/svm.h>
33 #include <palacios/svm_handler.h>
34 #include <palacios/vmm_excp.h>
35 #include <palacios/vmm_extensions.h>
36 #include <palacios/vmm_sprintf.h>
37 #include <palacios/vmm_hashtable.h>
39 #include <extensions/trans_mem.h>
40 #include <extensions/tm_util.h>
42 #if !V3_CONFIG_DEBUG_TM_FUNC
44 #define PrintDebug(fmt, args...)
48 /* this includes a mov to rax */
49 static const char * vmmcall_bytes = "\x48\xc7\xc0\x37\x13\x00\x00\x0f\x01\xd9";
50 static struct v3_tm_state * tm_global_state = NULL;
54 tm_translate_rip (struct guest_info * core, addr_t * target)
57 if (core->mem_mode == PHYSICAL_MEM) {
59 get_addr_linear(core, core->rip, &(core->segments.cs)),
61 } else if (core->mem_mode == VIRTUAL_MEM) {
63 get_addr_linear(core, core->rip, &(core->segments.cs)),
71 tm_read_instr (struct guest_info * core,
77 if (core->mem_mode == PHYSICAL_MEM) {
78 v3_read_gpa_memory(core,
79 get_addr_linear(core, addr , &(core->segments.cs)),
84 v3_read_gva_memory(core,
85 get_addr_linear(core, addr, &(core->segments.cs)),
94 tm_handle_decode_fail (struct guest_info * core)
99 tm_translate_rip(core, &cur_rip);
101 #ifdef V3_CONFIG_DEBUG_TM_FUNC
102 v3_dump_mem((uint8_t *)cur_rip, INSTR_BUF_SZ);
105 /* If we can't decode an instruction, we treat it as a catastrophic event, aborting *everyone* */
106 for (core_num = 0; core_num < core->vm_info->num_cores; core_num++ ) {
107 struct v3_trans_mem * remote_tm;
109 /* skip local core */
110 if (core_num == core->vcpu_id) {
114 remote_tm = v3_get_ext_core_state(&(core->vm_info->cores[core_num]), "trans_mem");
116 TM_ERR(core,DECODE,"couldnt get remote_tm\n");
120 /* skip cores who aren't in transacitonal context */
121 if (remote_tm->TM_MODE == TM_OFF) {
125 TM_DBG(core,DECODE,"setting abort for core %d due to decoding error\n", core_num);
126 remote_tm->TM_ABORT = 1;
133 /* special casing for control-flow instructions
134 * returns 1 if we need to jump
135 * returns -1 on error
138 tm_handle_ctrl_flow (struct guest_info * core,
139 struct v3_trans_mem * tm,
140 addr_t * instr_location,
141 struct x86_instr * struct_instr)
144 /* special casing for control flow instructions */
145 struct rflags * flags = (struct rflags *)&(core->ctrl_regs.rflags);
149 switch (struct_instr->op_type) {
152 TM_DBG(core,DECODE, "!!++ JLE\n");
153 to_jmp = (flags->zf || flags->sf != flags->of);
154 offset = struct_instr->dst_operand.operand;
156 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
158 tm->to_branch = to_jmp;
161 TM_DBG(core,DECODE,"!!++ JAE\n");
162 to_jmp = (flags->cf == 0);
163 offset = struct_instr->dst_operand.operand;
165 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
167 tm->to_branch = to_jmp;
170 TM_DBG(core,DECODE,"!!++ JMP\n");
172 offset = struct_instr->dst_operand.operand;
174 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
176 tm->to_branch = to_jmp;
179 TM_DBG(core,DECODE,"!!++ JNZ\n");
180 to_jmp = (flags->zf == 0);
181 offset = struct_instr->dst_operand.operand;
183 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
185 tm->to_branch = to_jmp;
188 TM_DBG(core,DECODE,"!!++ JL\n");
189 to_jmp = (flags->sf != flags->of);
190 offset = struct_instr->dst_operand.operand;
192 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
194 tm->to_branch = to_jmp;
197 TM_DBG(core,DECODE,"!!++ JNS\n");
198 to_jmp = (flags->sf == 0);
199 offset = struct_instr->dst_operand.operand;
201 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
203 tm->to_branch = to_jmp;
206 *instr_location = core->rip + tm->cur_instr_len;
215 * called inside #UD and VMMCALL handlers
216 * only affects global state in case of quix86 fall over
217 * -> set other cores TM_ABORT to 1, return -2
220 v3_store_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
222 struct x86_instr struct_instr;
223 uchar_t cur_instr[INSTR_BUF_SZ];
224 addr_t instr_location;
226 // Fetch the current instruction
227 tm_read_instr(core, core->rip, cur_instr, INSTR_BUF_SZ);
229 TM_DBG(core,STORE,"storing next instruction, current rip: %llx\n", (uint64_t)core->rip);
231 /* Attempt to decode current instruction to determine its length */
232 if (v3_decode(core, (addr_t)cur_instr, &struct_instr) == ERR_DECODE_FAIL) {
234 TM_ERR(core,Error,"Could not decode currrent instruction (at %llx)\n", (uint64_t)core->rip);
236 /* this will attempt to abort all the remote cores */
237 if (tm_handle_decode_fail(core) == -1) {
238 TM_ERR(core,Error,"Could not handle failed decode\n");
239 return ERR_STORE_FAIL;
242 /* we need to trigger a local abort */
243 return ERR_STORE_MUST_ABORT;
247 /* we can't currently handle REP prefixes, abort */
248 if (struct_instr.op_type != V3_INVALID_OP &&
249 (struct_instr.prefixes.repne ||
250 struct_instr.prefixes.repnz ||
251 struct_instr.prefixes.rep ||
252 struct_instr.prefixes.repe ||
253 struct_instr.prefixes.repz)) {
255 TM_ERR(core,DECODE,"Encountered REP prefix, aborting\n");
256 return ERR_STORE_MUST_ABORT;
259 tm->cur_instr_len = struct_instr.instr_length;
261 /* handle jump instructions */
262 tm_handle_ctrl_flow(core, tm, &instr_location, &struct_instr);
264 /* save next 10 bytes after current instruction, we'll put vmmcall here */
265 tm_read_instr(core, instr_location, cur_instr, INSTR_INJECT_LEN);
267 /* store the next instruction and its length in info */
268 memcpy(tm->dirty_instr, cur_instr, INSTR_INJECT_LEN);
275 v3_overwrite_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
280 tm->clobbered_rax = (core->vm_regs).rax;
284 /* we can't currently handle instructions that span page boundaries */
285 if ((ptr + tm->cur_instr_len) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
286 TM_ERR(core,OVERWRITE,"emulated instr straddling page boundary\n");
290 ptr = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
292 if ((ptr + INSTR_INJECT_LEN) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
293 TM_ERR(core,OVERWRITE,"injected instr straddling page boundary\n");
297 if (v3_gva_to_hva(core,
298 get_addr_linear(core, ptr, &(core->segments.cs)),
301 TM_ERR(core,Error,"Calculating next rip hva failed\n");
305 TM_DBG(core,REPLACE,"Replacing next instruction at addr %llx with vmm hyper call, len=%d\n",
306 core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0), (int)tm->cur_instr_len );
308 /* Copy VMM call into the memory address of beginning of next instruction (ptr) */
309 memcpy((char*)ptr, vmmcall_bytes, INSTR_INJECT_LEN);
311 /* KCH: flag that we've dirtied an instruction, and store its host address */
312 tm->dirty_instr_flag = 1;
313 tm->dirty_gva = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
323 * this should only be called if TM_STATE == TM_NULL, additionally we check if our dirtied flag if set
326 v3_restore_dirty_instr (struct guest_info * core)
328 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
330 /* Restore next instruction, transition to IFETCH state */
331 TM_DBG(core,RESTORE,"Restoring next instruction.\n");
333 /* check if we've actually done an instruction overwrite */
334 if (!(tm->dirty_instr_flag)) {
335 TM_DBG(core,RESTORE,"nothing to restore here...\n");
339 // Actually restore instruction
340 memcpy((char*)tm->dirty_hva, tm->dirty_instr, INSTR_INJECT_LEN);
343 (core->vm_regs).rax = tm->clobbered_rax;
346 TM_DBG(core,RESTORE,"RIP in vmmcall: %llx\n", core->rip);
347 core->rip = tm->dirty_gva;
350 tm->dirty_instr_flag = 0;
353 memset(tm->dirty_instr, 0, 15);
355 TM_DBG(core,RESTORE,"RIP after scooting it back up: %llx\n", core->rip);
362 tm_handle_fault_ifetch (struct guest_info * core,
363 struct v3_trans_mem * tm)
367 TM_DBG(core,IFETCH,"Page fault caused by IFETCH: rip is the same as faulting address, we must be at an ifetch.\n");
369 sto = v3_store_next_instr(core, tm);
371 if (sto == ERR_STORE_FAIL) {
372 TM_ERR(core,EXIT,"Could not store next instruction in transaction\n");
373 return ERR_TRANS_FAULT_FAIL;
374 } else if (sto == ERR_STORE_MUST_ABORT) {
375 TM_DBG(core,EXIT,"aborting for some reason\n");
376 v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
377 return TRANS_FAULT_OK;
380 if (v3_overwrite_next_instr(core, tm) == -1) {
381 TM_ERR(core,PF,"problem overwriting instruction\n");
382 return ERR_TRANS_FAULT_FAIL;
385 tm->TM_STATE = TM_EXEC;
387 return TRANS_FAULT_OK;
392 tm_handle_fault_read (struct guest_info * core,
393 struct v3_trans_mem * tm,
398 // This page fault was caused by a read to memory in the current instruction for a core in TM mode
399 TM_DBG(core,DATA,"Page fault caused by read.\n");
400 TM_DBG(core,PF,"Adding %p to read list and hash\n", (void*)fault_addr);
402 if (add_mem_op_to_list(&(tm->trans_r_list), fault_addr) == -1) {
403 TM_ERR(core,PF,"problem adding to list\n");
404 return ERR_TRANS_FAULT_FAIL;
407 if (tm_record_access(tm, error.write, fault_addr) == -1) {
408 TM_ERR(core,PF,"problem recording access\n");
409 return ERR_TRANS_FAULT_FAIL;
412 /* if we have previously written to this address, we need to update our
413 * staging page and map it in */
414 if (list_contains_guest_addr(&(tm->trans_w_list), fault_addr)) {
416 TM_DBG(core,PF,"Saw a read from something in the write list\n");
418 /* write the value from linked list to staging page */
419 if (stage_entry(tm, &(tm->trans_w_list), fault_addr) == -1) {
420 TM_ERR(core,PF, "could not stage entry!\n");
421 return ERR_TRANS_FAULT_FAIL;
424 /* Hand it the staging page */
425 return (addr_t)(tm->staging_page);
429 //Add it to the read set
430 addr_t shadow_addr = 0;
432 TM_DBG(core,PF,"Saw a read from a fresh address\n");
434 if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
435 TM_ERR(core,PF,"Could not translate gva to hva for transaction read\n");
436 return ERR_TRANS_FAULT_FAIL;
441 return TRANS_FAULT_OK;
446 tm_handle_fault_write (struct guest_info * core,
447 struct v3_trans_mem * tm,
452 addr_t virt_data_loc;
453 addr_t shadow_addr = 0;
455 TM_DBG(core,DATA,"Page fault cause by write\n");
456 TM_DBG(core,PF,"Adding %p to write list and hash\n", (void*)fault_addr);
458 if (add_mem_op_to_list(&(tm->trans_w_list), fault_addr) == -1) {
459 TM_ERR(core,WRITE,"could not add to list!\n");
460 return ERR_TRANS_FAULT_FAIL;
463 if (tm_record_access(tm, error.write, fault_addr) == -1) {
464 TM_ERR(core,WRITE,"could not record access!\n");
465 return ERR_TRANS_FAULT_FAIL;
468 if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
469 TM_ERR(core,WRITE,"could not translate gva to hva for transaction read\n");
470 return ERR_TRANS_FAULT_FAIL;
473 // Copy existing values to the staging page, populating that field
474 // This avoids errors in optimized code such as ++, where the original
475 // value is not read, but simply incremented
476 data_loc = (void*)((addr_t)(tm->staging_page) + (shadow_addr % PAGE_SIZE_4KB));
478 if (v3_hpa_to_hva((addr_t)(data_loc), &virt_data_loc) == -1) {
479 TM_ERR(core,WRITE,"Could not convert address on staging page to virt addr\n");
480 return ERR_TRANS_FAULT_FAIL;
483 TM_DBG(core,WRITE,"\tValue being copied (core %d): %p\n", core->vcpu_id, *((void**)(virt_data_loc)));
484 //memcpy((void*)virt_data_loc, (void*)shadow_addr, sizeof(uint64_t));
485 *(uint64_t*)virt_data_loc = *(uint64_t*)shadow_addr;
487 return (addr_t)(tm->staging_page);
492 tm_handle_fault_extern_ifetch (struct guest_info * core,
493 struct v3_trans_mem * tm,
499 // system is in tm state, record the access
500 TM_DBG(core,IFETCH,"Page fault caused by IFETCH: we are not in TM, recording.\n");
502 sto = v3_store_next_instr(core,tm);
504 if (sto == ERR_STORE_FAIL) {
505 TM_ERR(core,Error,"Could not store next instruction in transaction\n");
506 return ERR_TRANS_FAULT_FAIL;
508 } else if (sto == ERR_STORE_MUST_ABORT) {
509 TM_ERR(core,IFETCH,"decode failed, going out of single stepping\n");
510 v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
511 return TRANS_FAULT_OK;
514 if (v3_overwrite_next_instr(core, tm) == -1) {
515 TM_ERR(core,IFETCH,"could not overwrite next instr!\n");
516 return ERR_TRANS_FAULT_FAIL;
519 tm->TM_STATE = TM_EXEC;
521 if (tm_record_access(tm, error.write, fault_addr) == -1) {
522 TM_ERR(core,IFETCH,"could not record access!\n");
523 return ERR_TRANS_FAULT_FAIL;
526 return TRANS_FAULT_OK;
531 tm_handle_fault_extern_access (struct guest_info * core,
532 struct v3_trans_mem * tm,
536 TM_DBG(core,PF_HANDLE,"recording access\n");
537 if (tm_record_access(tm, error.write, fault_addr) == -1) {
538 TM_ERR(core,PF_HANDLE,"could not record access!\n");
539 return ERR_TRANS_FAULT_FAIL;
542 return TRANS_FAULT_OK;
547 tm_handle_fault_tmoff (struct guest_info * core)
549 TM_DBG(core,PF_HANDLE, "in pf handler but noone is in tm mode anymore (core %d), i should try to eliminate hypercalls\n", core->vcpu_id);
551 if (v3_restore_dirty_instr(core) == -1) {
552 TM_ERR(core,PF_HANDLE,"could not restore dirty instr!\n");
553 return ERR_TRANS_FAULT_FAIL;
556 return TRANS_FAULT_OK;
562 * called from MMU - should mean at least tms->TM_MODE is on
564 * tm->on : ifetch -> store instr, overwrite instr
565 * r/w -> record hash, write log, store instr, overwrite instr
566 * tm->off: ifetch -> store instr, overwrite instr
567 * r/w -> record hash, store instr, overwrite instr
569 * returns ERR_TRANS_FAULT_FAIL on error
570 * TRANS_FAULT_OK when things are fine
571 * addr when we're passing back a staging page
575 v3_handle_trans_mem_fault (struct guest_info * core,
579 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
580 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
583 TM_ERR(core,ERROR,": coudln't get core state\n");
584 return ERR_TRANS_FAULT_FAIL;
588 TM_ERR(core,ERROR,": couldn't get vm trans_mem state\n");
589 return ERR_TRANS_FAULT_FAIL;
592 TM_DBG(core,PF,"PF handler core->mode : %d, system->mode : %d\n", tm->TM_MODE, tms->TM_MODE);
594 if ((tm->TM_MODE == TM_ON) &&
595 ((void *)fault_addr == (void *)(core->rip))) {
597 return tm_handle_fault_ifetch(core, tm);
599 } else if ((tm->TM_MODE == TM_ON) &&
600 (tm->TM_STATE == TM_EXEC) &&
601 (error.write == 0)) {
603 return tm_handle_fault_read(core, tm, fault_addr, error);
605 } else if ((tm->TM_MODE == TM_ON) &&
606 (tm->TM_STATE == TM_EXEC) &&
607 (error.write == 1)) {
609 return tm_handle_fault_write(core, tm, fault_addr, error);
612 } else if ((tms->TM_MODE == TM_ON) &&
613 ((void *)fault_addr == (void *)(core->rip))) {
615 return tm_handle_fault_extern_ifetch(core, tm, fault_addr, error);
617 } else if ((tms->TM_MODE == TM_ON) &&
618 (tm->TM_STATE == TM_EXEC)) {
620 return tm_handle_fault_extern_access(core, tm, fault_addr, error);
623 return tm_handle_fault_tmoff(core);
627 return TRANS_FAULT_OK;
632 tm_handle_hcall_tmoff (struct guest_info * core, struct v3_trans_mem * tm)
634 if (tm->TM_MODE == TM_ON) {
635 TM_ERR(core,EXIT,"we are in tm mode but system is not!\n");
636 return TRANS_HCALL_FAIL;
639 // we got to an exit when things were off!
640 TM_DBG(core,EXIT,"system is off, restore the instruction and go away\n");
642 if (v3_restore_dirty_instr(core) == -1) {
643 TM_ERR(core,HCALL,"could not restore dirty instr!\n");
644 return TRANS_HCALL_FAIL;
647 tm->TM_STATE = TM_NULL;
649 return TRANS_HCALL_OK;
654 tm_handle_hcall_dec_abort (struct guest_info * core,
655 struct v3_trans_mem * tm)
657 // only ever get here from TM DECODE
658 TM_DBG(core,EXIT,"we are in ABORT, call the abort handler\n");
661 v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
663 TM_DBG(core,EXIT,"RIP after abort: %p\n", ((void*)(core->rip)));
665 return TRANS_HCALL_OK;
670 tm_handle_hcall_ifetch_start (struct guest_info * core,
671 struct v3_trans_mem * tm)
673 tm->TM_STATE = TM_IFETCH;
675 TM_DBG(core,EXIT,"VMEXIT after TM_EXEC, blast away VTLB and go into TM_IFETCH\n");
677 // Finally, invalidate the shadow page table
678 v3_invalidate_shadow_pts(core);
680 return TRANS_HCALL_OK;
685 tm_check_list_conflict (struct guest_info * core,
686 struct v3_trans_mem * tm,
687 struct list_head * access_list,
690 struct mem_op * curr = NULL;
691 struct mem_op * tmp = NULL;
694 list_for_each_entry_safe(curr, tmp, access_list, op_node) {
696 conflict = tm_check_conflict(tm->ginfo->vm_info, curr->guest_addr, op_type, core->vcpu_id, tm->t_num);
698 if (conflict == ERR_CHECK_FAIL) {
700 TM_ERR(core,EXIT,"error checking for conflicts\n");
701 return TRANS_HCALL_FAIL;
703 } else if (conflict == CHECK_IS_CONFLICT) {
705 TM_DBG(core,EXIT,"we have a conflict, aborting\n");
706 v3_handle_trans_abort(core, TM_ABORT_CONFLICT, 0);
707 return CHECK_MUST_ABORT;
713 return TRANS_HCALL_OK;
718 tm_handle_hcall_check_conflicts (struct guest_info * core,
719 struct v3_trans_mem * tm)
723 TM_DBG(core,EXIT,"still TM_ON\n");
724 TM_DBG(core,EXIT,"checking for conflicts\n");
726 if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_w_list), OP_TYPE_WRITE)) == TRANS_HCALL_FAIL) {
727 return TRANS_HCALL_FAIL;
728 } else if (ret == CHECK_MUST_ABORT) {
729 return TRANS_HCALL_OK;
732 if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_r_list), OP_TYPE_READ)) == TRANS_HCALL_FAIL) {
733 return TRANS_HCALL_FAIL;
734 } else if (ret == CHECK_MUST_ABORT) {
735 return TRANS_HCALL_OK;
738 tm->TM_STATE = TM_IFETCH;
740 return TRANS_HCALL_OK;
744 /* trans mem hypercall handler
747 * running mime (tm or tms on)
751 * check for conflicts
753 * abort (due to quix86)
758 tm_handle_hcall (struct guest_info * core,
759 unsigned int hcall_id,
762 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
763 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
765 if (tms->TM_MODE == TM_OFF) {
766 return tm_handle_hcall_tmoff(core, tm);
769 // Previous instruction has finished, copy staging page back into linked list!
770 if (update_list(tm, &(tm->trans_w_list)) == -1) {
771 TM_ERR(core,HCALL,"could not update_list!\n");
772 return TRANS_HCALL_FAIL;
775 // Done handling previous instruction, must put back the next instruction, reset %rip and go back to IFETCH state
776 TM_DBG(core,EXIT,"saw VMEXIT, need to restore previous state and proceed\n");
778 if (v3_restore_dirty_instr(core) == -1) {
779 TM_ERR(core,HCALL,"could not restore dirty instr!\n");
780 return TRANS_HCALL_FAIL;
784 if (tm->TM_ABORT == 1 &&
785 tms->TM_MODE == TM_ON) {
787 return tm_handle_hcall_dec_abort(core, tm);
789 } else if (tm->TM_STATE == TM_EXEC) {
790 return tm_handle_hcall_ifetch_start(core, tm);
794 if (tm->TM_MODE == TM_ON &&
795 tms->TM_MODE == TM_ON) {
797 return tm_handle_hcall_check_conflicts(core, tm);
799 } else if (tm->TM_MODE == TM_OFF) {
800 TM_DBG(core,EXIT,"we are in TM_OFF\n");
803 return TRANS_HCALL_OK;
808 v3_tm_inc_tnum (struct v3_trans_mem * tm)
814 lt = tm_global_state->last_trans;
816 // grab global last_trans
817 irqstate = v3_lock_irqsave(tm_global_state->lock);
818 new_ctxt = ++(lt[tm->ginfo->vcpu_id]);
819 v3_unlock_irqrestore(tm_global_state->lock, irqstate);
823 TM_DBG(tm->ginfo,INC TNUM,"global state is |%d|%d|, my tnum is %d\n", (int)lt[0],
824 (int)lt[1], (int)tm->t_num);
826 if (new_ctxt != tm->t_num) {
827 TM_ERR(tm->ginfo,TM_INC_TNUM,"misaligned global and local context value\n");
836 tm_set_abort_status (struct guest_info * core,
837 tm_abrt_cause_t cause,
838 uint8_t xabort_reason)
840 core->vm_regs.rax = 0;
843 case TM_ABORT_XABORT:
844 // we put the xabort immediate in eax 31:24
846 core->vm_regs.rax |= (xabort_reason << 24);
848 case TM_ABORT_CONFLICT:
849 // if this was a conflict from another core, it may work
851 core->vm_regs.rax |= (1 << ABORT_CONFLICT) | (1 << ABORT_RETRY);
853 case TM_ABORT_INTERNAL:
855 core->vm_regs.rax |= (1 << cause);
857 case TM_ABORT_UNSPECIFIED:
858 // just return 0 in EAX
861 TM_ERR(core, ABORT, "invalid abort cause\n");
867 // xabort_reason is only used for XABORT instruction
869 v3_handle_trans_abort (struct guest_info * core,
870 tm_abrt_cause_t cause,
871 uint8_t xabort_reason)
873 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
875 // Free the staging page
876 if (v3_free_staging_page(tm) == -1) {
877 TM_ERR(core,ABORT,"problem freeing staging page\n");
881 // Clear the VTLB which still has our staging page in it
882 if (v3_clr_vtlb(core) == -1) {
883 TM_ERR(core,ABORT,"problem clearing vtlb\n");
888 v3_clear_tm_lists(tm);
890 TM_DBG(core,ABORT -- handler,"TM_MODE: %d | RIP: %llx | XABORT RIP: %llx\n", tm->TM_MODE, (uint64_t)core->rip, (uint64_t)tm->fail_call);
892 if (tm->TM_MODE == TM_ON) {
893 TM_DBG(core,ABORT,"Setting RIP to %llx\n", (uint64_t)tm->fail_call);
894 core->rip = tm->fail_call;
903 tm_set_abort_status(core, cause, xabort_reason);
905 // time to garbage collect
906 if (tm_hash_gc(tm) == -1) {
907 TM_ERR(core,GC,"could not gc!\n");
916 tm_hash_fn (addr_t key)
918 return v3_hash_long(key, sizeof(void *));
923 tm_eq_fn (addr_t key1, addr_t key2)
925 return (key1 == key2);
930 tm_hash_buf_fn (addr_t key)
932 return v3_hash_long(key, sizeof(addr_t));
937 tm_eq_buf_fn(addr_t key1, addr_t key2)
939 return (key1 == key2);
943 /* this checks if the remote access was done on the same
944 * local transaction number as the current one */
946 tm_check_context (struct v3_vm_info * vm,
953 uint64_t core_id_sub;
954 struct v3_tm_access_type * type = NULL;
956 for (core_id_sub = 0; core_id_sub < vm->num_cores; core_id_sub++) {
957 struct v3_trans_mem * remote_tm;
961 /* skip the core that's doing the checking */
962 if (core_id_sub == core_num) {
966 remote_tm = v3_get_ext_core_state(&(vm->cores[core_id_sub]), "trans_mem");
968 PrintError(vm, VCORE_NONE, "Could not get ext core state for core %llu\n", core_id_sub);
969 return ERR_CHECK_FAIL;
972 buf[0] = (void *)gva;
973 buf[1] = (void *)core_id_sub;
974 buf[2] = (void *)curr_lt[core_id_sub];
976 key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
978 type = (struct v3_tm_access_type *)HTABLE_SEARCH(remote_tm->access_type, key);
982 if ( (op_type == OP_TYPE_WRITE && (type->w || type->r)) || // so basically if write?
983 (op_type != OP_TYPE_WRITE && type->w)) {
984 return CHECK_IS_CONFLICT;
989 return CHECK_NO_CONFLICT;
993 /* check all the contexts in the list for a conflict */
995 tm_check_all_contexts (struct v3_vm_info * vm,
996 struct list_head * hash_list,
1002 struct hash_chain * curr = NULL;
1003 struct hash_chain * tmp = NULL;
1004 uint64_t * curr_lt = NULL;
1007 list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1009 curr_lt = curr->curr_lt;
1011 if (curr_lt[core_num] == curr_ctxt) {
1013 ret = tm_check_context(vm, gva, core_num, curr_ctxt, curr_lt, op_type);
1015 if (ret == ERR_CHECK_FAIL) {
1016 return ERR_CHECK_FAIL;
1017 } else if (ret == CHECK_IS_CONFLICT) {
1018 return CHECK_IS_CONFLICT;
1025 return CHECK_NO_CONFLICT;
1029 /* The following access patterns trigger an abort:
1030 * We: Read | Anyone Else: Write
1031 * We: Write | Anyone Else: Read, Write
1033 * (pg 8-2 of haswell manual)
1035 * returns ERR_CHECK_FAIL on error
1036 * returns CHECK_IS_CONFLICT if there is a conflict
1037 * returns CHECK_NO_CONFLICT if there isn't
1040 tm_check_conflict (struct v3_vm_info * vm,
1048 /* loop over other cores -> core_id */
1049 for (core_id = 0; core_id < vm->num_cores; core_id++) {
1051 struct guest_info * core = NULL;
1052 struct v3_trans_mem * tm = NULL;
1053 struct list_head * hash_list;
1055 /* only check other cores */
1056 if (core_id == core_num) {
1060 core = &(vm->cores[core_id]);
1061 tm = (struct v3_trans_mem*)v3_get_ext_core_state(core, "trans_mem");
1064 PrintError(vm, VCORE_NONE, "+++ TM ERROR +++ Couldn't get core state for core %llu\n", core_id);
1065 return ERR_CHECK_FAIL;
1068 /* this core didn't access the address, move on */
1069 if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1074 /* loop over chained hash for gva, find fields with curr_ctxt -> curr_lt*/
1075 int ret = tm_check_all_contexts(vm, hash_list, gva, op_type, core_num, curr_ctxt);
1077 if (ret == ERR_CHECK_FAIL) {
1078 return ERR_CHECK_FAIL;
1079 } else if (ret == CHECK_IS_CONFLICT) {
1080 return CHECK_IS_CONFLICT;
1086 return CHECK_NO_CONFLICT;
1091 tm_need_to_gc (struct v3_trans_mem * tm,
1092 struct hash_chain * curr,
1099 /* if none of the cores are in transactional context,
1100 * we know we can collect this context
1104 for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1105 /* if *any* of the cores are active in a transaction
1106 * number that is current (listed in this context),
1107 * we know we can't collect this context, as it
1108 * will be needed when that core's transaction ends
1110 if (curr->curr_lt[i] >= lt_copy[i]) {
1122 tm_del_stale_ctxt (struct hash_chain * curr)
1124 list_del(&(curr->lt_node));
1125 V3_Free(curr->curr_lt);
1131 tm_del_acc_entry (struct v3_trans_mem * tm, addr_t key)
1133 v3_htable_remove(tm->access_type, key, 0);
1134 (tm->access_type_entries)--;
1139 tm_collect_context (struct v3_trans_mem * tm,
1140 struct hashtable_iter * ctxt_iter,
1141 struct hash_chain * curr,
1146 for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1147 struct v3_ctxt_tuple tup;
1148 struct v3_tm_access_type * type;
1151 tup.gva = (void *)gva;
1152 tup.core_id = (void *)i;
1153 tup.core_lt = (void *)curr->curr_lt[i];
1155 key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1157 type = (struct v3_tm_access_type *)v3_htable_search(tm->access_type, key);
1159 if (!type) { // something has gone terribly wrong
1160 TM_ERR(tm->ginfo,GC,"could not find accesstype entry to gc, THIS! IS! WRONG!\n");
1164 /* delete the access type entry */
1165 tm_del_acc_entry(tm, key);
1168 /* delete the stale context */
1169 tm_del_stale_ctxt(curr);
1176 tm_collect_all_contexts (struct v3_trans_mem * tm,
1177 struct hashtable_iter * ctxt_iter,
1181 struct hash_chain * tmp;
1182 struct hash_chain * curr;
1183 struct list_head * chain_list;
1186 gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1188 chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1190 /* this is a chained hash, so for each address, we will have
1191 * a list of contexts. We now check each context to see
1192 * whether or not it can be collected
1194 list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1196 uint64_t to_gc = tm_need_to_gc(tm, curr, lt_copy, tmoff);
1198 /* not garbage, go on to the next context in the list */
1200 TM_DBG(tm->ginfo,GC,"not garbage collecting entries for address %llx\n", (uint64_t)gva);
1204 TM_DBG(tm->ginfo,GC,"garbage collecting entries for address %llx\n", (uint64_t)gva);
1206 /* found one, delete corresponding entries in access_type */
1207 if (tm_collect_context(tm, ctxt_iter, curr, gva) == -1) {
1208 TM_ERR(tm->ginfo,GC,"ERROR collecting context\n");
1214 /* if context list (hash chain) is now empty, remove the hash entry */
1215 if (list_empty(chain_list)) {
1216 v3_htable_iter_remove(ctxt_iter, 0);
1217 (tm->addr_ctxt_entries)--;
1219 v3_htable_iter_advance(ctxt_iter);
1222 /* give the CPU away NONONO NEVER YIELD WHILE HOLDING A LOCK */
1230 tm_hash_gc (struct v3_trans_mem * tm)
1232 addr_t irqstate, irqstate2;
1236 struct v3_tm_state * tms = NULL;
1237 struct hashtable_iter * ctxt_iter = NULL;
1239 tms = (struct v3_tm_state *)v3_get_extension_state(tm->ginfo->vm_info, "trans_mem");
1241 TM_ERR(tm->ginfo,GC,"could not alloc tms\n");
1245 TM_DBG(tm->ginfo,GC,"beginning garbage collection\n");
1246 TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (pre)\n", (int)v3_htable_count(tm->addr_ctxt));
1247 TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (pre)\n", (int)v3_htable_count(tm->access_type));
1249 tmoff = (tms->cores_active == 0);
1251 lt_copy = V3_Malloc(sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1253 TM_ERR(tm->ginfo,GC,"Could not allocate space for lt_copy\n");
1257 memset(lt_copy, 0, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1259 /* lt_copy holds the last transaction number for each core */
1260 irqstate = v3_lock_irqsave(tm_global_state->lock);
1261 memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1262 v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1264 /* lock both hashes */
1265 irqstate = v3_lock_irqsave(tm->addr_ctxt_lock);
1266 irqstate2 = v3_lock_irqsave(tm->access_type_lock);
1268 /* loop over hash entries in addr_ctxt */
1269 ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1271 TM_ERR(tm->ginfo,GC,"could not create htable iterator\n");
1276 /* we check each address stored in the hash */
1277 while (ctxt_iter->entry) {
1278 /* NOTE: this call advances the hash iterator */
1279 if (tm_collect_all_contexts(tm, ctxt_iter, tmoff, lt_copy) == -1) {
1286 v3_destroy_htable_iter(ctxt_iter);
1289 v3_unlock_irqrestore(tm->access_type_lock, irqstate);
1290 v3_unlock_irqrestore(tm->addr_ctxt_lock, irqstate2);
1293 TM_ERR(tm->ginfo,GC,"garbage collection failed\n");
1295 TM_DBG(tm->ginfo,GC,"ended garbage collection succesfuly\n");
1298 TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (post)\n", (int)v3_htable_count(tm->addr_ctxt));
1299 TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (post)\n", (int)v3_htable_count(tm->access_type));
1305 /* TODO: break out the for loops in these functions */
1307 tm_update_ctxt_list (struct v3_trans_mem * tm,
1311 struct list_head * hash_list)
1313 struct hash_chain * curr = NULL;
1314 struct hash_chain * tmp = NULL;
1315 uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1320 list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1324 for (i = 0; i < num_cores; i++) {
1325 if (curr->curr_lt[i] != lt_copy[i]) {
1339 struct hash_chain * new_l = V3_Malloc(sizeof(struct hash_chain));
1342 TM_ERR(tm->ginfo,HASH,"Could not allocate new list\n");
1346 memset(new_l, 0, sizeof(struct hash_chain));
1348 new_l->curr_lt = lt_copy;
1350 list_add_tail(&(new_l->lt_node), hash_list);
1353 for (core_id = 0; core_id < num_cores; core_id++) {
1354 struct v3_tm_access_type * type;
1355 struct v3_ctxt_tuple tup;
1356 tup.gva = (void*)gva;
1357 tup.core_id = (void*)core_id;
1358 tup.core_lt = (void*)lt_copy[core_id];
1361 key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1365 type = (struct v3_tm_access_type *)HTABLE_SEARCH(tm->access_type, key);
1370 type = V3_Malloc(sizeof(struct v3_tm_access_type));
1373 TM_ERR(tm->ginfo,HASH,"could not allocate type access struct\n");
1385 if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1386 TM_ERR(tm->ginfo,HASH,"problem inserting new mem access in htable\n");
1389 (tm->access_type_entries)++;
1397 /* no entry in addr-ctxt yet, create one */
1399 tm_create_ctxt_key (struct v3_trans_mem * tm,
1404 struct list_head * hash_list = NULL;
1405 struct hash_chain * new_l = NULL;
1406 uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1408 hash_list = (struct list_head *)V3_Malloc(sizeof(struct list_head));
1411 TM_ERR(tm->ginfo,HASH,"Problem allocating hash_list\n");
1415 INIT_LIST_HEAD(hash_list);
1417 new_l = V3_Malloc(sizeof(struct hash_chain));
1420 TM_ERR(tm->ginfo,HASH,"Problem allocating hash_chain\n");
1424 memset(new_l, 0, sizeof(struct hash_chain));
1426 new_l->curr_lt = lt_copy;
1428 /* add the context to the hash chain */
1429 list_add_tail(&(new_l->lt_node), hash_list);
1431 if (!(HTABLE_INSERT(tm->addr_ctxt, gva, hash_list))) {
1432 TM_ERR(tm->ginfo,HASH CHAIN,"problem inserting new chain into hash\n");
1436 (tm->addr_ctxt_entries)++;
1439 /* TODO: we need a way to unwind and deallocate for all cores on failure here */
1440 for (core_id = 0; core_id < num_cores; core_id++) {
1441 struct v3_tm_access_type * type = NULL;
1442 struct v3_ctxt_tuple tup;
1443 tup.gva = (void*)gva;
1444 tup.core_id = (void*)core_id;
1445 tup.core_lt = (void*)lt_copy[core_id];
1448 type = V3_Malloc(sizeof(struct v3_tm_access_type));
1451 TM_ERR(tm->ginfo,HASH,"could not allocate access type struct\n");
1461 key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1463 if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1464 TM_ERR(tm->ginfo,HASH,"TM: problem inserting new mem access in htable\n");
1467 (tm->access_type_entries)++;
1473 list_del(&(new_l->lt_node));
1483 * called during MIME execution
1484 * record memory access in conflict logs
1485 * this locks the table during insertion
1488 tm_record_access (struct v3_trans_mem * tm,
1493 struct list_head * hash_list;
1497 num_cores = tm->ginfo->vm_info->num_cores;
1499 TM_DBG(tm->ginfo,REC,"recording addr %llx, addr-ctxt.cnt = %d, access-type.cnt = %d\n", (uint64_t)gva,
1500 (int)v3_htable_count(tm->addr_ctxt), (int)v3_htable_count(tm->access_type));
1501 //PrintDebug(tm->ginfo->vm_info, tm->ginfo,"\tWe think that addr-ctxt.cnt = %d, access-type.cnt = %d\n",(int)tm->addr_ctxt_entries,(int)tm->access_type_entries);
1503 lt_copy = V3_Malloc(sizeof(uint64_t)*num_cores);
1505 TM_ERR(tm->ginfo,REC,"Allocating array failed\n");
1509 memset(lt_copy, 0, sizeof(uint64_t)*num_cores);
1511 irqstate = v3_lock_irqsave(tm_global_state->lock);
1512 memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*num_cores);
1513 v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1515 if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1516 /* we haven't created a context list for this address yet, go do it */
1517 return tm_create_ctxt_key(tm, lt_copy, gva, write);
1520 /* we have a context list for this addres already, do we need to create a new context? */
1521 return tm_update_ctxt_list(tm, lt_copy, gva, write, hash_list);
1529 tm_prepare_cpuid (struct v3_vm_info * vm)
1532 V3_Print(vm, VCORE_NONE, "TM INIT | enabling RTM cap in CPUID\n");
1534 /* increase max CPUID function to 7 (extended feature flags enumeration) */
1535 v3_cpuid_add_fields(vm,0x0,
1542 /* do the same for AMD */
1543 v3_cpuid_add_fields(vm,0x80000000,
1544 0xffffffff, 0x80000007,
1550 /* enable RTM (CPUID.07H.EBX.RTM = 1) */
1551 v3_cpuid_add_fields(vm, 0x07, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1552 v3_cpuid_add_fields(vm, 0x80000007, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1557 init_trans_mem (struct v3_vm_info * vm,
1558 v3_cfg_tree_t * cfg,
1561 struct v3_tm_state * tms;
1563 PrintDebug(vm, VCORE_NONE, "Trans Mem. Init\n");
1565 tms = V3_Malloc(sizeof(struct v3_tm_state));
1567 PrintError(vm, VCORE_NONE, "Problem allocating v3_tm_state\n");
1571 memset(tms, 0, sizeof(struct v3_tm_state));
1573 if (v3_register_hypercall(vm, TM_KICKBACK_CALL, tm_handle_hcall, NULL) == -1) {
1574 PrintError(vm, VCORE_NONE, "TM could not register hypercall\n");
1578 v3_lock_init(&(tms->lock));
1580 tms->TM_MODE = TM_OFF;
1581 tms->cores_active = 0;
1583 uint64_t * lt = V3_Malloc(sizeof(uint64_t) * vm->num_cores);
1585 PrintError(vm, VCORE_NONE, "Problem allocating last_trans array\n");
1589 memset(lt, 0, sizeof(uint64_t) * vm->num_cores);
1592 for (i = 0; i < vm->num_cores; i++) {
1596 tms->last_trans = lt;
1599 tm_global_state = tms;
1601 tm_prepare_cpuid(vm);
1606 v3_lock_deinit(&(tms->lock));
1607 v3_remove_hypercall(vm, TM_KICKBACK_CALL);
1615 init_trans_mem_core (struct guest_info * core,
1619 struct v3_trans_mem * tm = V3_Malloc(sizeof(struct v3_trans_mem));
1621 TM_DBG(core,INIT, "Trans Mem. Core Init\n");
1624 TM_ERR(core,INIT, "Problem allocating TM state\n");
1628 memset(tm, 0, sizeof(struct v3_trans_mem));
1630 INIT_LIST_HEAD(&tm->trans_r_list);
1631 INIT_LIST_HEAD(&tm->trans_w_list);
1633 tm->addr_ctxt = v3_create_htable(0, tm_hash_fn, tm_eq_fn);
1634 if (!(tm->addr_ctxt)) {
1635 TM_ERR(core,INIT,"problem creating addr_ctxt\n");
1639 tm->access_type = v3_create_htable(0, tm_hash_buf_fn, tm_eq_buf_fn);
1640 if (!(tm->access_type)) {
1641 TM_ERR(core,INIT,"problem creating access_type\n");
1645 v3_lock_init(&(tm->addr_ctxt_lock));
1646 v3_lock_init(&(tm->access_type_lock));
1648 tm->TM_STATE = TM_NULL;
1649 tm->TM_MODE = TM_OFF;
1655 tm->access_type_entries = 0;
1656 tm->addr_ctxt_entries = 0;
1657 tm->dirty_instr_flag = 0;
1659 /* TODO: Cache Model */
1660 //tm->box = (struct cache_box *)V3_Malloc(sizeof(struct cache_box *));
1661 //tm->box->init = init_cache;
1662 //tm->box->init(sample_spec, tm->box);
1669 v3_free_htable(tm->addr_ctxt, 0, 0);
1677 deinit_trans_mem (struct v3_vm_info * vm, void * priv_data)
1679 struct v3_tm_state * tms = (struct v3_tm_state *)priv_data;
1681 if (v3_remove_hypercall(vm, TM_KICKBACK_CALL) == -1) {
1682 PrintError(vm, VCORE_NONE, "Problem removing TM hypercall\n");
1686 v3_lock_deinit(&(tms->lock));
1697 deinit_trans_mem_core (struct guest_info * core,
1701 struct v3_trans_mem * tm = (struct v3_trans_mem *)core_data;
1702 struct hashtable_iter * ctxt_iter = NULL;
1704 v3_clear_tm_lists(tm);
1706 if (tm->staging_page) {
1707 TM_ERR(core,DEINIT CORE,"WARNING: staging page not freed!\n");
1710 ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1712 TM_DBG(core,DEINIT_CORE,"could not create htable iterator\n");
1716 /* delete all context entries for each hashed address */
1717 while (ctxt_iter->entry) {
1718 struct hash_chain * tmp;
1719 struct hash_chain * curr;
1720 struct list_head * chain_list;
1723 gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1724 chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1726 /* delete the context */
1727 list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1728 tm_del_stale_ctxt(curr);
1731 v3_htable_iter_advance(ctxt_iter);
1734 v3_destroy_htable_iter(ctxt_iter);
1736 /* we've already deleted the values in this one */
1737 v3_free_htable(tm->addr_ctxt, 0, 0);
1739 /* KCH WARNING: we may not want to free access type values here */
1740 v3_free_htable(tm->access_type, 1, 0);
1742 v3_lock_deinit(&(tm->addr_ctxt_lock));
1743 v3_lock_deinit(&(tm->access_type_lock));
1751 static struct v3_extension_impl trans_mem_impl = {
1752 .name = "trans_mem",
1754 .vm_init = init_trans_mem,
1755 .vm_deinit = deinit_trans_mem,
1756 .core_init = init_trans_mem_core,
1757 .core_deinit = deinit_trans_mem_core,
1762 register_extension(&trans_mem_impl);
1766 * tms->on => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1767 * tms->off => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1770 tm_handle_xend (struct guest_info * core,
1771 struct v3_trans_mem * tm)
1774 /* XEND should raise a GPF when RTM mode is not on */
1775 if (tm->TM_MODE != TM_ON) {
1776 TM_ERR(core, UD, "Encountered XEND while not in a transactional region\n");
1778 v3_raise_exception(core, GPF_EXCEPTION);
1783 /* Our transaction finished! */
1784 /* Copy over data from the staging page */
1785 TM_DBG(core, UD,"Copying data from our staging page back into 'real' memory\n");
1787 if (commit_list(core, tm) == -1) {
1788 TM_ERR(core,UD,"error commiting tm list to memory\n");
1792 TM_DBG(core,UD,"Freeing staging page and internal data structures\n");
1794 // Free the staging page
1795 if (v3_free_staging_page(tm) == -1) {
1796 TM_ERR(core,XEND,"couldnt free staging page\n");
1800 // clear vtlb, as it may still contain our staging page
1801 if (v3_clr_vtlb(core) == -1) {
1802 TM_ERR(core,XEND,"couldnt clear vtlb\n");
1807 v3_clear_tm_lists(tm);
1809 /* Set the state and advance the RIP */
1810 TM_DBG(core,XEND,"advancing rip to %llx\n", core->rip + XEND_INSTR_LEN);
1811 core->rip += XEND_INSTR_LEN;
1815 // time to garbage collect
1817 if (tm_hash_gc(tm) == -1) {
1818 TM_ERR(core,XEND,"could not gc!\n");
1827 * tms->on => handle our abort code, handle_trans_abort will clear necessary state
1828 * tms->off => handle our abort code, handle_trans_abort will clear necessary state
1831 tm_handle_xabort (struct guest_info * core,
1832 struct v3_trans_mem * tm,
1837 // we must reflect the immediate back into EAX 31:24
1838 reason = *(uint8_t*)(instr+2);
1840 // Error checking! make sure that we have gotten here in a legitimate manner
1841 if (tm->TM_MODE != TM_ON) {
1842 TM_DBG(core, UD, "We got here while not in a transactional core!\n");
1843 v3_raise_exception(core, UD_EXCEPTION);
1846 TM_DBG(core,UD,"aborting\n");
1848 if (tm->TM_STATE != TM_NULL) {
1849 v3_restore_dirty_instr(core);
1853 v3_handle_trans_abort(core, TM_ABORT_XABORT, reason);
1860 * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1861 * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1864 tm_handle_xbegin (struct guest_info * core,
1865 struct v3_trans_mem * tm,
1868 sint32_t rel_addr = 0;
1869 uint8_t out_of_bounds = 0;
1870 uint8_t in_compat_no_long = 0;
1872 if (tm->TM_MODE == TM_ON) {
1873 /* TODO: this is actually an indication of nesting, we'll fix this later */
1874 TM_ERR(core,UD,"We don't support nested transactions yet!\n");
1875 v3_raise_exception(core, UD_EXCEPTION);
1879 // Save the fail_call address (first 2 bytes = opcode, last 4 = fail call addr)
1880 rel_addr = *(sint32_t*)(instr+2);
1882 /* raise a GPF if we're trying to set a fail call outside of code segment */
1883 in_compat_no_long = (core->cpu_mode == LONG_32_COMPAT) || ((struct efer_64*)&(core->ctrl_regs.efer))->lma == 0;
1884 out_of_bounds = (core->rip + rel_addr > core->segments.cs.base + core->segments.cs.limit ||
1885 core->rip + rel_addr < core->segments.cs.base);
1887 if (in_compat_no_long && out_of_bounds) {
1888 v3_raise_exception(core, GPF_EXCEPTION);
1892 /* TODO: also raise GPF if we're in long mode and failcall isn't canonical */
1895 /* set the tm_mode for this core */
1898 TM_DBG(core,UD,"Set the system in TM Mode, save fallback address");
1901 tm->fail_call = core->rip + XBEGIN_INSTR_LEN + rel_addr;
1903 TM_DBG(core,UD,"we set fail_call to %llx, rip is %llx, rel_addr is %x", (uint64_t)tm->fail_call,(uint64_t)core->rip,rel_addr);
1905 /* flush the shadow page tables */
1906 TM_DBG(core,UD,"Throwing out the shadow table");
1909 // Increase RIP, ready to go to next instruction
1910 core->rip += XBEGIN_INSTR_LEN;
1917 * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1918 * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1921 tm_handle_xtest (struct guest_info * core,
1922 struct v3_trans_mem * tm)
1924 struct rflags * rf = (struct rflags*)&(core->ctrl_regs.rflags);
1926 // if we are in tm mode, set zf to 0, otherwise 1
1927 if (tm->TM_MODE == TM_ON) {
1939 core->rip += XTEST_INSTR_LEN;
1946 * XBEGIN c7 f8 rel32
1951 tm_handle_ud (struct guest_info * core)
1953 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
1954 uchar_t instr[INSTR_BUF_SZ];
1955 uint8_t byte1, byte2, byte3;
1957 tm_read_instr(core, (addr_t)core->rip, instr, INSTR_BUF_SZ);
1959 byte1 = *(uint8_t *)((addr_t)instr);
1960 byte2 = *(uint8_t *)((addr_t)instr + 1);
1961 byte3 = *(uint8_t *)((addr_t)instr + 2);
1964 if (byte1 == 0xc7 && byte2 == 0xf8) { /* third byte is an immediate */
1966 TM_DBG(core,UD,"Encountered Haswell-specific XBEGIN %x %x %d at %llx", byte1, byte2, byte3, (uint64_t)core->rip);
1968 if (tm_handle_xbegin(core, tm, instr) == -1) {
1969 TM_ERR(core, UD, "Problem handling XBEGIN\n");
1973 } else if (byte1 == 0xc6 && byte2 == 0xf8) { /* third byte is an immediate */
1975 TM_DBG(core, UD, "Encountered Haswell-specific XABORT %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1977 if (tm_handle_xabort(core, tm, instr) == -1) {
1978 TM_ERR(core, UD, "Problem handling XABORT\n");
1982 } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd5) {
1984 TM_DBG(core, UD, "Encountered Haswell-specific XEND %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1986 if (tm_handle_xend(core, tm) == -1) {
1987 TM_ERR(core, UD, "Problem handling XEND\n");
1992 } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd6) { /* third byte is an immediate */
1994 TM_DBG(core,UD,"Encountered Haswell-specific XTEST %x %x %x at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1996 if (tm_handle_xtest(core, tm) == -1) {
1997 TM_ERR(core, UD, "Problem handling XTEST\n");
2003 /* oh no, this is still unknown, pass the error back to the guest! */
2004 TM_DBG(core,UD,"Encountered:%x %x %x\n", byte1, byte2, byte3);
2005 v3_raise_exception(core, UD_EXCEPTION);
2013 v3_tm_handle_exception (struct guest_info * info,
2016 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2019 TM_ERR(info,ERR,"TM extension state not found\n");
2023 switch (exit_code) {
2024 /* any of these exceptions should abort current transactions */
2025 case SVM_EXIT_EXCP6:
2026 if (tm_handle_ud(info) == -1) {
2030 case SVM_EXIT_EXCP0:
2031 if (tm->TM_MODE != TM_ON) {
2032 v3_raise_exception(info, DE_EXCEPTION);
2035 TM_DBG(info,EXCP,"aborting due to DE exception\n");
2036 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2039 case SVM_EXIT_EXCP1:
2040 if (tm->TM_MODE != TM_ON) {
2041 v3_raise_exception(info, DB_EXCEPTION);
2044 TM_DBG(info,EXCP,"aborting due to DB exception\n");
2045 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2048 case SVM_EXIT_EXCP3:
2049 if (tm->TM_MODE != TM_ON) {
2050 v3_raise_exception(info, BP_EXCEPTION);
2053 TM_DBG(info,EXCP,"aborting due to BP exception\n");
2054 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2057 case SVM_EXIT_EXCP4:
2058 if (tm->TM_MODE != TM_ON) {
2059 v3_raise_exception(info, OF_EXCEPTION);
2062 TM_DBG(info,EXCP,"aborting due to OF exception\n");
2063 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2066 case SVM_EXIT_EXCP5:
2067 if (tm->TM_MODE != TM_ON) {
2068 v3_raise_exception(info, BR_EXCEPTION);
2071 TM_DBG(info,EXCP,"aborting due to BR exception\n");
2072 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2075 case SVM_EXIT_EXCP7:
2076 if (tm->TM_MODE != TM_ON) {
2077 v3_raise_exception(info, NM_EXCEPTION);
2080 TM_DBG(info,EXCP,"aborting due to NM exception\n");
2081 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2084 case SVM_EXIT_EXCP10:
2085 if (tm->TM_MODE != TM_ON) {
2086 v3_raise_exception(info, TS_EXCEPTION);
2089 TM_DBG(info,EXCP,"aborting due to TS exception\n");
2090 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2093 case SVM_EXIT_EXCP11:
2094 if (tm->TM_MODE != TM_ON) {
2095 v3_raise_exception(info, NP_EXCEPTION);
2098 TM_DBG(info,EXCP,"aborting due to NP exception\n");
2099 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2102 case SVM_EXIT_EXCP12:
2103 if (tm->TM_MODE != TM_ON) {
2104 v3_raise_exception(info, SS_EXCEPTION);
2107 TM_DBG(info,EXCP,"aborting due to SS exception\n");
2108 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2111 case SVM_EXIT_EXCP13:
2112 if (tm->TM_MODE != TM_ON) {
2113 v3_raise_exception(info, GPF_EXCEPTION);
2116 TM_DBG(info,EXCP,"aborting due to GPF exception\n");
2117 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2120 case SVM_EXIT_EXCP16:
2121 if (tm->TM_MODE != TM_ON) {
2122 v3_raise_exception(info, MF_EXCEPTION);
2125 TM_DBG(info,EXCP,"aborting due to MF exception\n");
2126 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2129 case SVM_EXIT_EXCP17:
2130 if (tm->TM_MODE != TM_ON) {
2131 v3_raise_exception(info, AC_EXCEPTION);
2134 TM_DBG(info,EXCP,"aborting due to AC exception\n");
2135 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2138 case SVM_EXIT_EXCP19:
2139 if (tm->TM_MODE != TM_ON) {
2140 v3_raise_exception(info, XF_EXCEPTION);
2143 TM_DBG(info,EXCP,"aborting due to XF exception\n");
2144 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2148 TM_DBG(info,EXCP,"exception # %d\n", (int)exit_code - 0x40);
2156 v3_tm_set_excp_intercepts (vmcb_ctrl_t * ctrl_area)
2158 ctrl_area->exceptions.de = 1; // 0 : divide by zero
2159 ctrl_area->exceptions.db = 1; // 1 : debug
2160 ctrl_area->exceptions.bp = 1; // 3 : breakpoint
2161 ctrl_area->exceptions.of = 1; // 4 : overflow
2162 ctrl_area->exceptions.br = 1; // 5 : bound range
2163 ctrl_area->exceptions.ud = 1; // 6 : undefined opcode
2164 ctrl_area->exceptions.nm = 1; // 7 : device not available
2165 ctrl_area->exceptions.ts = 1; // 10 : invalid tss
2166 ctrl_area->exceptions.np = 1; // 11 : segment not present
2167 ctrl_area->exceptions.ss = 1; // 12 : stack
2168 ctrl_area->exceptions.gp = 1; // 13 : general protection
2169 ctrl_area->exceptions.mf = 1; // 16 : x87 exception pending
2170 ctrl_area->exceptions.ac = 1; // 17 : alignment check
2171 ctrl_area->exceptions.xf = 1; // 19 : simd floating point
2175 extern void v3_stgi();
2176 extern void v3_clgi();
2178 /* 441-tm: if we are in TM mode, we need to check for any interrupts here,
2179 * and if there are any, need to do some aborting! Make sure not to die here
2180 * if we are already 'aborting', this results in infiloop
2183 v3_tm_check_intr_state (struct guest_info * info,
2184 vmcb_ctrl_t * guest_ctrl,
2185 vmcb_saved_state_t * guest_state)
2188 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2191 TM_ERR(info,INTR,"TM extension state not found\n");
2195 if ((tm->TM_MODE == TM_ON) &&
2196 (tm->TM_ABORT != 1)) {
2198 if (guest_ctrl->guest_ctrl.V_IRQ ||
2199 guest_ctrl->EVENTINJ.valid) {
2201 // We do indeed have pending interrupts
2204 TM_DBG(info,INTR,"we have a pending interrupt\n");
2206 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2208 // Copy new RIP state into arch dependent structure
2209 guest_state->rip = info->rip;
2211 //TM_DBG(info,INTR,"currently guest state rip is %llx\n",(uint64_t)guest_state->rip);
2221 v3_tm_handle_pf_64 (struct guest_info * info,
2222 pf_error_t error_code,
2224 addr_t * page_to_use)
2226 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2227 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2230 TM_ERR(info,HANDLE_PF, "couldn't get tm core state\n");
2235 TM_ERR(info,HANDLE_PF, "couldn't get tm global state\n");
2239 if ((tms->TM_MODE == TM_ON) &&
2240 (error_code.user == 1)) {
2242 TM_DBG(info,PF,"Core reporting in, got a #PF (tms->mode is %d)\n", tms->TM_MODE);
2244 *page_to_use = v3_handle_trans_mem_fault(info, fault_addr, error_code);
2246 if (*page_to_use == ERR_TRANS_FAULT_FAIL){
2247 TM_ERR(info,HANDLE_PF, "could not handle transaction page fault\n");
2251 if ((tm->TM_MODE == TM_ON) &&
2252 (tm->staging_page == NULL)) {
2254 tm->staging_page = V3_AllocPages(1);
2256 if (!(tm->staging_page)) {
2257 TM_ERR(info,MMU,"Problem allocating staging page\n");
2261 TM_DBG(info,MMU,"Created staging page at %p\n", (void *)tm->staging_page);
2270 v3_tm_handle_usr_tlb_miss (struct guest_info * info,
2271 pf_error_t error_code,
2275 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2277 /* TLB miss from user */
2278 if ((tm->TM_MODE == TM_ON) &&
2279 (error_code.user == 1)) {
2281 if (page_to_use > TRANS_FAULT_OK) {
2282 TM_DBG(info,MMU, "Using alternate page at: %llx\n", (uint64_t)page_to_use);
2283 *shadow_pa = page_to_use;
2292 v3_tm_handle_read_fault (struct guest_info * info,
2293 pf_error_t error_code,
2294 pte64_t * shadow_pte)
2296 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2297 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2299 // If we are about to read, make it read only
2300 if ((tms->TM_MODE == TM_ON) &&
2301 (tm->TM_STATE == TM_EXEC) &&
2302 (error_code.write == 0) &&
2303 (error_code.user == 1)) {
2305 TM_DBG(info,MMU, "Flagging the page read only\n");
2306 shadow_pte->writable = 0;
2312 v3_tm_decode_rtm_instrs (struct guest_info * info,
2314 struct x86_instr * instr)
2316 uint8_t byte1, byte2, byte3;
2317 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2319 if (tm->TM_MODE == TM_ON) {
2321 byte1 = *(uint8_t *)(instr_ptr);
2322 byte2 = *(uint8_t *)(instr_ptr + 1);
2323 byte3 = *(uint8_t *)(instr_ptr + 2);
2325 if (byte1 == 0xc7 &&
2326 byte2 == 0xf8) { /* third byte is an immediate */
2328 TM_DBG(info, DECODE,"Decoding XBEGIN %x %x %d\n", byte1, byte2, byte3);
2329 instr->instr_length = 6;
2332 } else if (byte1 == 0xc6 &&
2333 byte2 == 0xf8) { /* third byte is an immediate */
2335 TM_DBG(info, DECODE, "Decoding XABORT %x %x %d\n", byte1, byte2, byte3);
2336 instr->instr_length = 3;
2339 } else if (byte1 == 0x0f &&
2343 TM_DBG(info, DECODE, "Decoding XEND %x %x %x\n", byte1, byte2, byte3);
2344 instr->instr_length = 3;