2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2012, NWU EECS 441 Transactional Memory Team
11 * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Maciek Swiech <dotpyfe@u.northwestern.edu>
15 * Kyle C. Hale <kh@u.northwestern.edu>
16 * Marcel Flores <marcel-flores@u.northwestern.edu>
17 * Zachary Bischof <zbischof@u.northwestern.edu>
20 * This is free software. You are permitted to use,
21 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
24 #include <palacios/vmm_mem.h>
25 #include <palacios/vmm.h>
26 #include <palacios/vmcb.h>
27 #include <palacios/vmm_decoder.h>
28 #include <palacios/vm_guest_mem.h>
29 #include <palacios/vmm_ctrl_regs.h>
30 #include <palacios/vmm_paging.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/svm.h>
33 #include <palacios/svm_handler.h>
34 #include <palacios/vmm_excp.h>
35 #include <palacios/vmm_extensions.h>
36 #include <palacios/vmm_sprintf.h>
37 #include <palacios/vmm_hashtable.h>
39 #include <extensions/trans_mem.h>
40 #include <extensions/tm_util.h>
42 #if !V3_CONFIG_DEBUG_TM_FUNC
44 #define PrintDebug(fmt, args...)
48 * - save/restore register state on XBEGIN/XABORT
49 * - put status codes in RAX
50 * - Implement proper exceptions for failed XBEGINS etc.
53 /* this includes a mov to rax */
54 static const char * vmmcall_bytes = "\x48\xc7\xc0\x37\x13\x00\x00\x0f\x01\xd9";
55 static struct v3_tm_state * tm_global_state = NULL;
59 tm_translate_rip (struct guest_info * core, addr_t * target)
62 if (core->mem_mode == PHYSICAL_MEM) {
64 get_addr_linear(core, core->rip, &(core->segments.cs)),
66 } else if (core->mem_mode == VIRTUAL_MEM) {
68 get_addr_linear(core, core->rip, &(core->segments.cs)),
76 tm_read_instr (struct guest_info * core,
82 if (core->mem_mode == PHYSICAL_MEM) {
83 v3_read_gpa_memory(core,
84 get_addr_linear(core, addr , &(core->segments.cs)),
89 v3_read_gva_memory(core,
90 get_addr_linear(core, addr, &(core->segments.cs)),
99 tm_handle_decode_fail (struct guest_info * core)
104 tm_translate_rip(core, &cur_rip);
106 #ifdef V3_CONFIG_DEBUG_TM_FUNC
107 v3_dump_mem((uint8_t *)cur_rip, INSTR_BUF_SZ);
110 /* If we can't decode an instruction, we treat it as a catastrophic event, aborting *everyone* */
111 for (core_num = 0; core_num < core->vm_info->num_cores; core_num++ ) {
112 struct v3_trans_mem * remote_tm;
114 /* skip local core */
115 if (core_num == core->vcpu_id) {
119 remote_tm = v3_get_ext_core_state(&(core->vm_info->cores[core_num]), "trans_mem");
121 TM_ERR(core,DECODE,"couldnt get remote_tm\n");
125 /* skip cores who aren't in transacitonal context */
126 if (remote_tm->TM_MODE == TM_OFF) {
130 TM_DBG(core,DECODE,"setting abort for core %d due to decoding error\n", core_num);
131 remote_tm->TM_ABORT = 1;
138 /* special casing for control-flow instructions
139 * returns 1 if we need to jump
140 * returns -1 on error
143 tm_handle_ctrl_flow (struct guest_info * core,
144 struct v3_trans_mem * tm,
145 addr_t * instr_location,
146 struct x86_instr * struct_instr)
149 /* special casing for control flow instructions */
150 struct rflags * flags = (struct rflags *)&(core->ctrl_regs.rflags);
154 switch (struct_instr->op_type) {
157 TM_DBG(core,DECODE, "!!++ JLE\n");
158 to_jmp = (flags->zf || flags->sf != flags->of);
159 offset = struct_instr->dst_operand.operand;
161 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
163 tm->to_branch = to_jmp;
166 TM_DBG(core,DECODE,"!!++ JAE\n");
167 to_jmp = (flags->cf == 0);
168 offset = struct_instr->dst_operand.operand;
170 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
172 tm->to_branch = to_jmp;
175 TM_DBG(core,DECODE,"!!++ JMP\n");
177 offset = struct_instr->dst_operand.operand;
179 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
181 tm->to_branch = to_jmp;
184 TM_DBG(core,DECODE,"!!++ JNZ\n");
185 to_jmp = (flags->zf == 0);
186 offset = struct_instr->dst_operand.operand;
188 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
190 tm->to_branch = to_jmp;
193 TM_DBG(core,DECODE,"!!++ JL\n");
194 to_jmp = (flags->sf != flags->of);
195 offset = struct_instr->dst_operand.operand;
197 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
199 tm->to_branch = to_jmp;
202 TM_DBG(core,DECODE,"!!++ JNS\n");
203 to_jmp = (flags->sf == 0);
204 offset = struct_instr->dst_operand.operand;
206 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
208 tm->to_branch = to_jmp;
211 *instr_location = core->rip + tm->cur_instr_len;
220 * called inside #UD and VMMCALL handlers
221 * only affects global state in case of quix86 fall over
222 * -> set other cores TM_ABORT to 1, return -2
225 v3_store_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
227 struct x86_instr struct_instr;
228 uchar_t cur_instr[INSTR_BUF_SZ];
229 addr_t instr_location;
231 // Fetch the current instruction
232 tm_read_instr(core, core->rip, cur_instr, INSTR_BUF_SZ);
234 TM_DBG(core,STORE,"storing next instruction, current rip: %llx\n", (uint64_t)core->rip);
236 /* Attempt to decode current instruction to determine its length */
237 if (v3_decode(core, (addr_t)cur_instr, &struct_instr) == ERR_DECODE_FAIL) {
239 TM_ERR(core,Error,"Could not decode currrent instruction (at %llx)\n", (uint64_t)core->rip);
241 /* this will attempt to abort all the remote cores */
242 if (tm_handle_decode_fail(core) == -1) {
243 TM_ERR(core,Error,"Could not handle failed decode\n");
244 return ERR_STORE_FAIL;
247 /* we need to trigger a local abort */
248 return ERR_STORE_MUST_ABORT;
252 /* we can't currently handle REP prefixes, abort */
253 if (struct_instr.op_type != V3_INVALID_OP &&
254 (struct_instr.prefixes.repne ||
255 struct_instr.prefixes.repnz ||
256 struct_instr.prefixes.rep ||
257 struct_instr.prefixes.repe ||
258 struct_instr.prefixes.repz)) {
260 TM_ERR(core,DECODE,"Encountered REP prefix, aborting\n");
261 return ERR_STORE_MUST_ABORT;
264 tm->cur_instr_len = struct_instr.instr_length;
266 /* handle jump instructions */
267 tm_handle_ctrl_flow(core, tm, &instr_location, &struct_instr);
269 /* save next 10 bytes after current instruction, we'll put vmmcall here */
270 tm_read_instr(core, instr_location, cur_instr, INSTR_INJECT_LEN);
272 /* store the next instruction and its length in info */
273 memcpy(tm->dirty_instr, cur_instr, INSTR_INJECT_LEN);
280 v3_overwrite_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
285 tm->clobbered_rax = (core->vm_regs).rax;
289 /* we can't currently handle instructions that span page boundaries */
290 if ((ptr + tm->cur_instr_len) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
291 TM_ERR(core,OVERWRITE,"emulated instr straddling page boundary\n");
295 ptr = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
297 if ((ptr + INSTR_INJECT_LEN) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
298 TM_ERR(core,OVERWRITE,"injected instr straddling page boundary\n");
302 if (v3_gva_to_hva(core,
303 get_addr_linear(core, ptr, &(core->segments.cs)),
306 TM_ERR(core,Error,"Calculating next rip hva failed\n");
310 TM_DBG(core,REPLACE,"Replacing next instruction at addr %llx with vmm hyper call, len=%d\n",
311 core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0), (int)tm->cur_instr_len );
313 /* Copy VMM call into the memory address of beginning of next instruction (ptr) */
314 memcpy((char*)ptr, vmmcall_bytes, INSTR_INJECT_LEN);
316 /* KCH: flag that we've dirtied an instruction, and store its host address */
317 tm->dirty_instr_flag = 1;
318 tm->dirty_gva = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
328 * this should only be called if TM_STATE == TM_NULL, additionally we check if our dirtied flag if set
331 v3_restore_dirty_instr (struct guest_info * core)
333 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
335 /* Restore next instruction, transition to IFETCH state */
336 TM_DBG(core,RESTORE,"Restoring next instruction.\n");
338 /* check if we've actually done an instruction overwrite */
339 if (!(tm->dirty_instr_flag)) {
340 TM_DBG(core,RESTORE,"nothing to restore here...\n");
344 // Actually restore instruction
345 memcpy((char*)tm->dirty_hva, tm->dirty_instr, INSTR_INJECT_LEN);
348 (core->vm_regs).rax = tm->clobbered_rax;
351 TM_DBG(core,RESTORE,"RIP in vmmcall: %llx\n", core->rip);
352 core->rip = tm->dirty_gva;
355 tm->dirty_instr_flag = 0;
358 memset(tm->dirty_instr, 0, 15);
360 TM_DBG(core,RESTORE,"RIP after scooting it back up: %llx\n", core->rip);
367 tm_handle_fault_ifetch (struct guest_info * core,
368 struct v3_trans_mem * tm)
372 TM_DBG(core,IFETCH,"Page fault caused by IFETCH: rip is the same as faulting address, we must be at an ifetch.\n");
374 sto = v3_store_next_instr(core, tm);
376 if (sto == ERR_STORE_FAIL) {
377 TM_ERR(core,EXIT,"Could not store next instruction in transaction\n");
378 return ERR_TRANS_FAULT_FAIL;
379 } else if (sto == ERR_STORE_MUST_ABORT) {
380 TM_DBG(core,EXIT,"aborting for some reason\n");
381 v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
382 return TRANS_FAULT_OK;
385 if (v3_overwrite_next_instr(core, tm) == -1) {
386 TM_ERR(core,PF,"problem overwriting instruction\n");
387 return ERR_TRANS_FAULT_FAIL;
390 tm->TM_STATE = TM_EXEC;
392 return TRANS_FAULT_OK;
397 tm_handle_fault_read (struct guest_info * core,
398 struct v3_trans_mem * tm,
403 // This page fault was caused by a read to memory in the current instruction for a core in TM mode
404 TM_DBG(core,DATA,"Page fault caused by read.\n");
405 TM_DBG(core,PF,"Adding %p to read list and hash\n", (void*)fault_addr);
407 if (add_mem_op_to_list(&(tm->trans_r_list), fault_addr) == -1) {
408 TM_ERR(core,PF,"problem adding to list\n");
409 return ERR_TRANS_FAULT_FAIL;
412 if (tm_record_access(tm, error.write, fault_addr) == -1) {
413 TM_ERR(core,PF,"problem recording access\n");
414 return ERR_TRANS_FAULT_FAIL;
417 /* if we have previously written to this address, we need to update our
418 * staging page and map it in */
419 if (list_contains_guest_addr(&(tm->trans_w_list), fault_addr)) {
421 TM_DBG(core,PF,"Saw a read from something in the write list\n");
423 /* write the value from linked list to staging page */
424 if (stage_entry(tm, &(tm->trans_w_list), fault_addr) == -1) {
425 TM_ERR(core,PF, "could not stage entry!\n");
426 return ERR_TRANS_FAULT_FAIL;
429 /* Hand it the staging page */
430 return (addr_t)(tm->staging_page);
434 //Add it to the read set
435 addr_t shadow_addr = 0;
437 TM_DBG(core,PF,"Saw a read from a fresh address\n");
439 if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
440 TM_ERR(core,PF,"Could not translate gva to hva for transaction read\n");
441 return ERR_TRANS_FAULT_FAIL;
446 return TRANS_FAULT_OK;
451 tm_handle_fault_write (struct guest_info * core,
452 struct v3_trans_mem * tm,
457 addr_t virt_data_loc;
458 addr_t shadow_addr = 0;
460 TM_DBG(core,DATA,"Page fault cause by write\n");
461 TM_DBG(core,PF,"Adding %p to write list and hash\n", (void*)fault_addr);
463 if (add_mem_op_to_list(&(tm->trans_w_list), fault_addr) == -1) {
464 TM_ERR(core,WRITE,"could not add to list!\n");
465 return ERR_TRANS_FAULT_FAIL;
468 if (tm_record_access(tm, error.write, fault_addr) == -1) {
469 TM_ERR(core,WRITE,"could not record access!\n");
470 return ERR_TRANS_FAULT_FAIL;
473 if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
474 TM_ERR(core,WRITE,"could not translate gva to hva for transaction read\n");
475 return ERR_TRANS_FAULT_FAIL;
478 // Copy existing values to the staging page, populating that field
479 // This avoids errors in optimized code such as ++, where the original
480 // value is not read, but simply incremented
481 data_loc = (void*)((addr_t)(tm->staging_page) + (shadow_addr % PAGE_SIZE_4KB));
483 if (v3_hpa_to_hva((addr_t)(data_loc), &virt_data_loc) == -1) {
484 TM_ERR(core,WRITE,"Could not convert address on staging page to virt addr\n");
485 return ERR_TRANS_FAULT_FAIL;
488 TM_DBG(core,WRITE,"\tValue being copied (core %d): %p\n", core->vcpu_id, *((void**)(virt_data_loc)));
489 //memcpy((void*)virt_data_loc, (void*)shadow_addr, sizeof(uint64_t));
490 *(uint64_t*)virt_data_loc = *(uint64_t*)shadow_addr;
492 return (addr_t)(tm->staging_page);
497 tm_handle_fault_extern_ifetch (struct guest_info * core,
498 struct v3_trans_mem * tm,
504 // system is in tm state, record the access
505 TM_DBG(core,IFETCH,"Page fault caused by IFETCH: we are not in TM, recording.\n");
507 sto = v3_store_next_instr(core,tm);
509 if (sto == ERR_STORE_FAIL) {
510 TM_ERR(core,Error,"Could not store next instruction in transaction\n");
511 return ERR_TRANS_FAULT_FAIL;
513 } else if (sto == ERR_STORE_MUST_ABORT) {
514 TM_ERR(core,IFETCH,"decode failed, going out of single stepping\n");
515 v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
516 return TRANS_FAULT_OK;
519 if (v3_overwrite_next_instr(core, tm) == -1) {
520 TM_ERR(core,IFETCH,"could not overwrite next instr!\n");
521 return ERR_TRANS_FAULT_FAIL;
524 tm->TM_STATE = TM_EXEC;
526 if (tm_record_access(tm, error.write, fault_addr) == -1) {
527 TM_ERR(core,IFETCH,"could not record access!\n");
528 return ERR_TRANS_FAULT_FAIL;
531 return TRANS_FAULT_OK;
536 tm_handle_fault_extern_access (struct guest_info * core,
537 struct v3_trans_mem * tm,
541 TM_DBG(core,PF_HANDLE,"recording access\n");
542 if (tm_record_access(tm, error.write, fault_addr) == -1) {
543 TM_ERR(core,PF_HANDLE,"could not record access!\n");
544 return ERR_TRANS_FAULT_FAIL;
547 return TRANS_FAULT_OK;
552 tm_handle_fault_tmoff (struct guest_info * core)
554 TM_DBG(core,PF_HANDLE, "in pf handler but noone is in tm mode anymore (core %d), i should try to eliminate hypercalls\n", core->vcpu_id);
556 if (v3_restore_dirty_instr(core) == -1) {
557 TM_ERR(core,PF_HANDLE,"could not restore dirty instr!\n");
558 return ERR_TRANS_FAULT_FAIL;
561 return TRANS_FAULT_OK;
567 * called from MMU - should mean at least tms->TM_MODE is on
569 * tm->on : ifetch -> store instr, overwrite instr
570 * r/w -> record hash, write log, store instr, overwrite instr
571 * tm->off: ifetch -> store instr, overwrite instr
572 * r/w -> record hash, store instr, overwrite instr
574 * returns ERR_TRANS_FAULT_FAIL on error
575 * TRANS_FAULT_OK when things are fine
576 * addr when we're passing back a staging page
580 v3_handle_trans_mem_fault (struct guest_info * core,
584 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
585 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
588 TM_ERR(core,ERROR,": coudln't get core state\n");
589 return ERR_TRANS_FAULT_FAIL;
593 TM_ERR(core,ERROR,": couldn't get vm trans_mem state\n");
594 return ERR_TRANS_FAULT_FAIL;
597 TM_DBG(core,PF,"PF handler core->mode : %d, system->mode : %d\n", tm->TM_MODE, tms->TM_MODE);
599 if ((tm->TM_MODE == TM_ON) &&
600 ((void *)fault_addr == (void *)(core->rip))) {
602 return tm_handle_fault_ifetch(core, tm);
604 } else if ((tm->TM_MODE == TM_ON) &&
605 (tm->TM_STATE == TM_EXEC) &&
606 (error.write == 0)) {
608 return tm_handle_fault_read(core, tm, fault_addr, error);
610 } else if ((tm->TM_MODE == TM_ON) &&
611 (tm->TM_STATE == TM_EXEC) &&
612 (error.write == 1)) {
614 return tm_handle_fault_write(core, tm, fault_addr, error);
617 } else if ((tms->TM_MODE == TM_ON) &&
618 ((void *)fault_addr == (void *)(core->rip))) {
620 return tm_handle_fault_extern_ifetch(core, tm, fault_addr, error);
622 } else if ((tms->TM_MODE == TM_ON) &&
623 (tm->TM_STATE == TM_EXEC)) {
625 return tm_handle_fault_extern_access(core, tm, fault_addr, error);
628 return tm_handle_fault_tmoff(core);
632 return TRANS_FAULT_OK;
637 tm_handle_hcall_tmoff (struct guest_info * core, struct v3_trans_mem * tm)
639 if (tm->TM_MODE == TM_ON) {
640 TM_ERR(core,EXIT,"we are in tm mode but system is not!\n");
641 return TRANS_HCALL_FAIL;
644 // we got to an exit when things were off!
645 TM_DBG(core,EXIT,"system is off, restore the instruction and go away\n");
647 if (v3_restore_dirty_instr(core) == -1) {
648 TM_ERR(core,HCALL,"could not restore dirty instr!\n");
649 return TRANS_HCALL_FAIL;
652 tm->TM_STATE = TM_NULL;
654 return TRANS_HCALL_OK;
659 tm_handle_hcall_dec_abort (struct guest_info * core,
660 struct v3_trans_mem * tm)
662 // only ever get here from TM DECODE
663 TM_DBG(core,EXIT,"we are in ABORT, call the abort handler\n");
666 v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
668 TM_DBG(core,EXIT,"RIP after abort: %p\n", ((void*)(core->rip)));
670 return TRANS_HCALL_OK;
675 tm_handle_hcall_ifetch_start (struct guest_info * core,
676 struct v3_trans_mem * tm)
678 tm->TM_STATE = TM_IFETCH;
680 TM_DBG(core,EXIT,"VMEXIT after TM_EXEC, blast away VTLB and go into TM_IFETCH\n");
682 // Finally, invalidate the shadow page table
683 v3_invalidate_shadow_pts(core);
685 return TRANS_HCALL_OK;
690 tm_check_list_conflict (struct guest_info * core,
691 struct v3_trans_mem * tm,
692 struct list_head * access_list,
695 struct mem_op * curr = NULL;
696 struct mem_op * tmp = NULL;
699 list_for_each_entry_safe(curr, tmp, access_list, op_node) {
701 conflict = tm_check_conflict(tm->ginfo->vm_info, curr->guest_addr, op_type, core->vcpu_id, tm->t_num);
703 if (conflict == ERR_CHECK_FAIL) {
705 TM_ERR(core,EXIT,"error checking for conflicts\n");
706 return TRANS_HCALL_FAIL;
708 } else if (conflict == CHECK_IS_CONFLICT) {
710 TM_DBG(core,EXIT,"we have a conflict, aborting\n");
711 v3_handle_trans_abort(core, TM_ABORT_CONFLICT, 0);
712 return CHECK_MUST_ABORT;
718 return TRANS_HCALL_OK;
723 tm_handle_hcall_check_conflicts (struct guest_info * core,
724 struct v3_trans_mem * tm)
728 TM_DBG(core,EXIT,"still TM_ON\n");
729 TM_DBG(core,EXIT,"checking for conflicts\n");
731 if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_w_list), OP_TYPE_WRITE)) == TRANS_HCALL_FAIL) {
732 return TRANS_HCALL_FAIL;
733 } else if (ret == CHECK_MUST_ABORT) {
734 return TRANS_HCALL_OK;
737 if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_r_list), OP_TYPE_READ)) == TRANS_HCALL_FAIL) {
738 return TRANS_HCALL_FAIL;
739 } else if (ret == CHECK_MUST_ABORT) {
740 return TRANS_HCALL_OK;
743 tm->TM_STATE = TM_IFETCH;
745 return TRANS_HCALL_OK;
749 /* trans mem hypercall handler
752 * running mime (tm or tms on)
756 * check for conflicts
758 * abort (due to quix86)
763 tm_handle_hcall (struct guest_info * core,
764 unsigned int hcall_id,
767 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
768 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
770 if (tms->TM_MODE == TM_OFF) {
771 return tm_handle_hcall_tmoff(core, tm);
774 // Previous instruction has finished, copy staging page back into linked list!
775 if (update_list(tm, &(tm->trans_w_list)) == -1) {
776 TM_ERR(core,HCALL,"could not update_list!\n");
777 return TRANS_HCALL_FAIL;
780 // Done handling previous instruction, must put back the next instruction, reset %rip and go back to IFETCH state
781 TM_DBG(core,EXIT,"saw VMEXIT, need to restore previous state and proceed\n");
783 if (v3_restore_dirty_instr(core) == -1) {
784 TM_ERR(core,HCALL,"could not restore dirty instr!\n");
785 return TRANS_HCALL_FAIL;
789 if (tm->TM_ABORT == 1 &&
790 tms->TM_MODE == TM_ON) {
792 return tm_handle_hcall_dec_abort(core, tm);
794 } else if (tm->TM_STATE == TM_EXEC) {
795 return tm_handle_hcall_ifetch_start(core, tm);
799 if (tm->TM_MODE == TM_ON &&
800 tms->TM_MODE == TM_ON) {
802 return tm_handle_hcall_check_conflicts(core, tm);
804 } else if (tm->TM_MODE == TM_OFF) {
805 TM_DBG(core,EXIT,"we are in TM_OFF\n");
808 return TRANS_HCALL_OK;
813 v3_tm_inc_tnum (struct v3_trans_mem * tm)
819 lt = tm_global_state->last_trans;
821 // grab global last_trans
822 irqstate = v3_lock_irqsave(tm_global_state->lock);
823 new_ctxt = ++(lt[tm->ginfo->vcpu_id]);
824 v3_unlock_irqrestore(tm_global_state->lock, irqstate);
828 TM_DBG(tm->ginfo,INC TNUM,"global state is |%d|%d|, my tnum is %d\n", (int)lt[0],
829 (int)lt[1], (int)tm->t_num);
831 if (new_ctxt != tm->t_num) {
832 TM_ERR(tm->ginfo,TM_INC_TNUM,"misaligned global and local context value\n");
841 tm_set_abort_status (struct guest_info * core,
842 tm_abrt_cause_t cause,
843 uint8_t xabort_reason)
845 core->vm_regs.rax = 0;
848 case TM_ABORT_XABORT:
849 // we put the xabort immediate in eax 31:24
851 core->vm_regs.rax |= (xabort_reason << 24);
853 case TM_ABORT_CONFLICT:
854 // if this was a conflict from another core, it may work
856 core->vm_regs.rax |= (1 << ABORT_CONFLICT) | (1 << ABORT_RETRY);
858 case TM_ABORT_INTERNAL:
860 core->vm_regs.rax |= (1 << cause);
863 TM_ERR(core, ABORT, "invalid abort cause\n");
869 // xabort_reason is only used for XABORT instruction
871 v3_handle_trans_abort (struct guest_info * core,
872 tm_abrt_cause_t cause,
873 uint8_t xabort_reason)
875 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
877 // Free the staging page
878 if (v3_free_staging_page(tm) == -1) {
879 TM_ERR(core,ABORT,"problem freeing staging page\n");
883 // Clear the VTLB which still has our staging page in it
884 if (v3_clr_vtlb(core) == -1) {
885 TM_ERR(core,ABORT,"problem clearing vtlb\n");
890 v3_clear_tm_lists(tm);
892 TM_DBG(core,ABORT -- handler,"TM_MODE: %d | RIP: %llx | XABORT RIP: %llx\n", tm->TM_MODE, (uint64_t)core->rip, (uint64_t)tm->fail_call);
894 if (tm->TM_MODE == TM_ON) {
895 TM_DBG(core,ABORT,"Setting RIP to %llx\n", (uint64_t)tm->fail_call);
896 core->rip = tm->fail_call;
905 tm_set_abort_status(core, cause, xabort_reason);
907 // time to garbage collect
908 if (tm_hash_gc(tm) == -1) {
909 TM_ERR(core,GC,"could not gc!\n");
918 tm_hash_fn (addr_t key)
920 return v3_hash_long(key, sizeof(void *));
925 tm_eq_fn (addr_t key1, addr_t key2)
927 return (key1 == key2);
932 tm_hash_buf_fn (addr_t key)
934 return v3_hash_long(key, sizeof(addr_t));
939 tm_eq_buf_fn(addr_t key1, addr_t key2)
941 return (key1 == key2);
945 /* this checks if the remote access was done on the same
946 * local transaction number as the current one */
948 tm_check_context (struct v3_vm_info * vm,
955 uint64_t core_id_sub;
956 struct v3_tm_access_type * type = NULL;
958 for (core_id_sub = 0; core_id_sub < vm->num_cores; core_id_sub++) {
959 struct v3_trans_mem * remote_tm;
963 /* skip the core that's doing the checking */
964 if (core_id_sub == core_num) {
968 remote_tm = v3_get_ext_core_state(&(vm->cores[core_id_sub]), "trans_mem");
970 PrintError(vm, VCORE_NONE, "Could not get ext core state for core %llu\n", core_id_sub);
971 return ERR_CHECK_FAIL;
974 buf[0] = (void *)gva;
975 buf[1] = (void *)core_id_sub;
976 buf[2] = (void *)curr_lt[core_id_sub];
978 key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
980 type = (struct v3_tm_access_type *)HTABLE_SEARCH(remote_tm->access_type, key);
984 if ( (op_type == OP_TYPE_WRITE && (type->w || type->r)) || // so basically if write?
985 (op_type != OP_TYPE_WRITE && type->w)) {
986 return CHECK_IS_CONFLICT;
991 return CHECK_NO_CONFLICT;
995 /* check all the contexts in the list for a conflict */
997 tm_check_all_contexts (struct v3_vm_info * vm,
998 struct list_head * hash_list,
1004 struct hash_chain * curr = NULL;
1005 struct hash_chain * tmp = NULL;
1006 uint64_t * curr_lt = NULL;
1009 list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1011 curr_lt = curr->curr_lt;
1013 if (curr_lt[core_num] == curr_ctxt) {
1015 ret = tm_check_context(vm, gva, core_num, curr_ctxt, curr_lt, op_type);
1017 if (ret == ERR_CHECK_FAIL) {
1018 return ERR_CHECK_FAIL;
1019 } else if (ret == CHECK_IS_CONFLICT) {
1020 return CHECK_IS_CONFLICT;
1027 return CHECK_NO_CONFLICT;
1031 /* The following access patterns trigger an abort:
1032 * We: Read | Anyone Else: Write
1033 * We: Write | Anyone Else: Read, Write
1035 * (pg 8-2 of haswell manual)
1037 * returns ERR_CHECK_FAIL on error
1038 * returns CHECK_IS_CONFLICT if there is a conflict
1039 * returns CHECK_NO_CONFLICT if there isn't
1042 tm_check_conflict (struct v3_vm_info * vm,
1050 /* loop over other cores -> core_id */
1051 for (core_id = 0; core_id < vm->num_cores; core_id++) {
1053 struct guest_info * core = NULL;
1054 struct v3_trans_mem * tm = NULL;
1055 struct list_head * hash_list;
1057 /* only check other cores */
1058 if (core_id == core_num) {
1062 core = &(vm->cores[core_id]);
1063 tm = (struct v3_trans_mem*)v3_get_ext_core_state(core, "trans_mem");
1066 PrintError(vm, VCORE_NONE, "+++ TM ERROR +++ Couldn't get core state for core %llu\n", core_id);
1067 return ERR_CHECK_FAIL;
1070 /* this core didn't access the address, move on */
1071 if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1076 /* loop over chained hash for gva, find fields with curr_ctxt -> curr_lt*/
1077 int ret = tm_check_all_contexts(vm, hash_list, gva, op_type, core_num, curr_ctxt);
1079 if (ret == ERR_CHECK_FAIL) {
1080 return ERR_CHECK_FAIL;
1081 } else if (ret == CHECK_IS_CONFLICT) {
1082 return CHECK_IS_CONFLICT;
1088 return CHECK_NO_CONFLICT;
1093 tm_need_to_gc (struct v3_trans_mem * tm,
1094 struct hash_chain * curr,
1101 /* if none of the cores are in transactional context,
1102 * we know we can collect this context
1106 for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1107 /* if *any* of the cores are active in a transaction
1108 * number that is current (listed in this context),
1109 * we know we can't collect this context, as it
1110 * will be needed when that core's transaction ends
1112 if (curr->curr_lt[i] >= lt_copy[i]) {
1124 tm_del_stale_ctxt (struct hash_chain * curr)
1126 list_del(&(curr->lt_node));
1127 V3_Free(curr->curr_lt);
1133 tm_del_acc_entry (struct v3_trans_mem * tm, addr_t key)
1135 v3_htable_remove(tm->access_type, key, 0);
1136 (tm->access_type_entries)--;
1141 tm_collect_context (struct v3_trans_mem * tm,
1142 struct hashtable_iter * ctxt_iter,
1143 struct hash_chain * curr,
1144 uint64_t * begin_time,
1145 uint64_t * end_time,
1150 for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1152 struct v3_tm_access_type * type;
1156 if ((*end_time - *begin_time) > 100000000) {
1157 TM_ERR(tm->ginfo,GC,"time threshhold exceeded, exiting!!!\n");
1161 buf[0] = (void *)gva;
1163 buf[2] = (void *)curr->curr_lt[i];
1165 key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
1167 type = (struct v3_tm_access_type *)v3_htable_search(tm->access_type, key);
1169 if (!type) { // something has gone terribly wrong
1170 TM_ERR(tm->ginfo,GC,"could not find accesstype entry to gc, THIS! IS! WRONG!\n");
1174 /* delete the access type entry */
1175 tm_del_acc_entry(tm, key);
1178 /* delete the stale context */
1179 tm_del_stale_ctxt(curr);
1186 tm_collect_all_contexts (struct v3_trans_mem * tm,
1187 struct hashtable_iter * ctxt_iter,
1190 uint64_t * begin_time,
1191 uint64_t * end_time)
1193 struct hash_chain * tmp;
1194 struct hash_chain * curr;
1195 struct list_head * chain_list;
1198 gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1200 chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1202 /* this is a chained hash, so for each address, we will have
1203 * a list of contexts. We now check each context to see
1204 * whether or not it can be collected
1206 list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1208 uint64_t to_gc = tm_need_to_gc(tm, curr, lt_copy, tmoff);
1210 /* not garbage, go on to the next context in the list */
1212 TM_DBG(tm->ginfo,GC,"not garbage collecting entries for address %llx\n", (uint64_t)gva);
1216 TM_DBG(tm->ginfo,GC,"garbage collecting entries for address %llx\n", (uint64_t)gva);
1218 /* found one, delete corresponding entries in access_type */
1219 if (tm_collect_context(tm, ctxt_iter, curr, begin_time, end_time, gva) == -1) {
1220 TM_ERR(tm->ginfo,GC,"ERROR collecting context\n");
1226 /* if context list (hash chain) is now empty, remove the hash entry */
1227 if (list_empty(chain_list)) {
1228 v3_htable_iter_remove(ctxt_iter, 0);
1229 (tm->addr_ctxt_entries)--;
1231 v3_htable_iter_advance(ctxt_iter);
1234 /* give the CPU away NONONO NEVER YIELD WHILE HOLDING A LOCK */
1242 tm_hash_gc (struct v3_trans_mem * tm)
1244 addr_t irqstate, irqstate2;
1246 uint64_t begin_time, end_time, tmoff;
1248 struct v3_tm_state * tms = NULL;
1249 struct hashtable_iter * ctxt_iter = NULL;
1251 tms = (struct v3_tm_state *)v3_get_extension_state(tm->ginfo->vm_info, "trans_mem");
1253 TM_ERR(tm->ginfo,GC,"could not alloc tms\n");
1257 TM_DBG(tm->ginfo,GC,"beginning garbage collection\n");
1258 TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (pre)\n", (int)v3_htable_count(tm->addr_ctxt));
1259 TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (pre)\n", (int)v3_htable_count(tm->access_type));
1261 tmoff = (tms->cores_active == 0);
1263 lt_copy = V3_Malloc(sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1265 TM_ERR(tm->ginfo,GC,"Could not allocate space for lt_copy\n");
1269 memset(lt_copy, 0, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1271 rdtscll(begin_time);
1273 /* lt_copy holds the last transaction number for each core */
1274 irqstate = v3_lock_irqsave(tm_global_state->lock);
1275 memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1276 v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1278 /* lock both hashes */
1279 irqstate = v3_lock_irqsave(tm->addr_ctxt_lock);
1280 irqstate2 = v3_lock_irqsave(tm->access_type_lock);
1282 /* loop over hash entries in addr_ctxt */
1283 ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1285 TM_ERR(tm->ginfo,GC,"could not create htable iterator\n");
1290 /* we check each address stored in the hash */
1291 while (ctxt_iter->entry) {
1292 /* NOTE: this call advances the hash iterator */
1293 if (tm_collect_all_contexts(tm, ctxt_iter, tmoff, lt_copy, &begin_time, &end_time) == -1) {
1300 v3_destroy_htable_iter(ctxt_iter);
1303 v3_unlock_irqrestore(tm->access_type_lock, irqstate);
1304 v3_unlock_irqrestore(tm->addr_ctxt_lock, irqstate2);
1309 TM_ERR(tm->ginfo,GC,"garbage collection failed, time spent: %d cycles\n", (int)(end_time - begin_time));
1311 TM_DBG(tm->ginfo,GC,"ended garbage collection succesfuly, time spent: %d cycles\n", (int)(end_time - begin_time));
1314 TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (post)\n", (int)v3_htable_count(tm->addr_ctxt));
1315 TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (post)\n", (int)v3_htable_count(tm->access_type));
1321 /* TODO: break out the for loops in these functions */
1323 tm_update_ctxt_list (struct v3_trans_mem * tm,
1327 struct list_head * hash_list)
1329 struct hash_chain * curr = NULL;
1330 struct hash_chain * tmp = NULL;
1331 uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1336 list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1340 for (i = 0; i < num_cores; i++) {
1341 if (curr->curr_lt[i] != lt_copy[i]) {
1355 struct hash_chain * new_l = V3_Malloc(sizeof(struct hash_chain));
1358 TM_ERR(tm->ginfo,HASH,"Could not allocate new list\n");
1362 memset(new_l, 0, sizeof(struct hash_chain));
1364 new_l->curr_lt = lt_copy;
1366 list_add_tail(&(new_l->lt_node), hash_list);
1369 for (core_id = 0; core_id < num_cores; core_id++) {
1370 struct v3_tm_access_type * type;
1371 struct v3_ctxt_tuple tup;
1372 tup.gva = (void*)gva;
1373 tup.core_id = (void*)core_id;
1374 tup.core_lt = (void*)lt_copy[core_id];
1377 key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1381 type = (struct v3_tm_access_type *)HTABLE_SEARCH(tm->access_type, key);
1386 type = V3_Malloc(sizeof(struct v3_tm_access_type));
1389 TM_ERR(tm->ginfo,HASH,"could not allocate type access struct\n");
1401 if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1402 TM_ERR(tm->ginfo,HASH,"problem inserting new mem access in htable\n");
1405 (tm->access_type_entries)++;
1413 /* no entry in addr-ctxt yet, create one */
1415 tm_create_ctxt_key (struct v3_trans_mem * tm,
1420 struct list_head * hash_list = NULL;
1421 struct hash_chain * new_l = NULL;
1422 uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1424 hash_list = (struct list_head *)V3_Malloc(sizeof(struct list_head));
1427 TM_ERR(tm->ginfo,HASH,"Problem allocating hash_list\n");
1431 INIT_LIST_HEAD(hash_list);
1433 new_l = V3_Malloc(sizeof(struct hash_chain));
1436 TM_ERR(tm->ginfo,HASH,"Problem allocating hash_chain\n");
1440 memset(new_l, 0, sizeof(struct hash_chain));
1442 new_l->curr_lt = lt_copy;
1444 /* add the context to the hash chain */
1445 list_add_tail(&(new_l->lt_node), hash_list);
1447 if (!(HTABLE_INSERT(tm->addr_ctxt, gva, hash_list))) {
1448 TM_ERR(tm->ginfo,HASH CHAIN,"problem inserting new chain into hash\n");
1452 (tm->addr_ctxt_entries)++;
1455 /* TODO: we need a way to unwind and deallocate for all cores on failure here */
1456 for (core_id = 0; core_id < num_cores; core_id++) {
1457 struct v3_tm_access_type * type = NULL;
1458 struct v3_ctxt_tuple tup;
1459 tup.gva = (void*)gva;
1460 tup.core_id = (void*)core_id;
1461 tup.core_lt = (void*)lt_copy[core_id];
1464 type = V3_Malloc(sizeof(struct v3_tm_access_type));
1467 TM_ERR(tm->ginfo,HASH,"could not allocate access type struct\n");
1477 key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1479 if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1480 TM_ERR(tm->ginfo,HASH,"TM: problem inserting new mem access in htable\n");
1483 (tm->access_type_entries)++;
1489 list_del(&(new_l->lt_node));
1499 * called during MIME execution
1500 * record memory access in conflict logs
1501 * this locks the table during insertion
1504 tm_record_access (struct v3_trans_mem * tm,
1509 struct list_head * hash_list;
1513 num_cores = tm->ginfo->vm_info->num_cores;
1515 TM_DBG(tm->ginfo,REC,"recording addr %llx, addr-ctxt.cnt = %d, access-type.cnt = %d\n", (uint64_t)gva,
1516 (int)v3_htable_count(tm->addr_ctxt), (int)v3_htable_count(tm->access_type));
1517 //PrintDebug(tm->ginfo->vm_info, tm->ginfo,"\tWe think that addr-ctxt.cnt = %d, access-type.cnt = %d\n",(int)tm->addr_ctxt_entries,(int)tm->access_type_entries);
1519 lt_copy = V3_Malloc(sizeof(uint64_t)*num_cores);
1521 TM_ERR(tm->ginfo,REC,"Allocating array failed\n");
1525 memset(lt_copy, 0, sizeof(uint64_t)*num_cores);
1527 irqstate = v3_lock_irqsave(tm_global_state->lock);
1528 memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*num_cores);
1529 v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1531 if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1532 /* we haven't created a context list for this address yet, go do it */
1533 return tm_create_ctxt_key(tm, lt_copy, gva, write);
1536 /* we have a context list for this addres already, do we need to create a new context? */
1537 return tm_update_ctxt_list(tm, lt_copy, gva, write, hash_list);
1545 tm_prepare_cpuid (struct v3_vm_info * vm)
1548 V3_Print(vm, VCORE_NONE, "TM INIT | enabling RTM cap in CPUID\n");
1550 /* increase max CPUID function to 7 (extended feature flags enumeration) */
1551 v3_cpuid_add_fields(vm,0x0,
1558 /* do the same for AMD */
1559 v3_cpuid_add_fields(vm,0x80000000,
1560 0xffffffff, 0x80000007,
1566 /* enable RTM (CPUID.07H.EBX.RTM = 1) */
1567 v3_cpuid_add_fields(vm, 0x07, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1568 v3_cpuid_add_fields(vm, 0x80000007, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1573 init_trans_mem (struct v3_vm_info * vm,
1574 v3_cfg_tree_t * cfg,
1577 struct v3_tm_state * tms;
1579 PrintDebug(vm, VCORE_NONE, "Trans Mem. Init\n");
1581 tms = V3_Malloc(sizeof(struct v3_tm_state));
1583 PrintError(vm, VCORE_NONE, "Problem allocating v3_tm_state\n");
1587 memset(tms, 0, sizeof(struct v3_tm_state));
1589 if (v3_register_hypercall(vm, TM_KICKBACK_CALL, tm_handle_hcall, NULL) == -1) {
1590 PrintError(vm, VCORE_NONE, "TM could not register hypercall\n");
1594 v3_lock_init(&(tms->lock));
1596 tms->TM_MODE = TM_OFF;
1597 tms->cores_active = 0;
1599 uint64_t * lt = V3_Malloc(sizeof(uint64_t) * vm->num_cores);
1601 PrintError(vm, VCORE_NONE, "Problem allocating last_trans array\n");
1605 memset(lt, 0, sizeof(uint64_t) * vm->num_cores);
1608 for (i = 0; i < vm->num_cores; i++) {
1612 tms->last_trans = lt;
1615 tm_global_state = tms;
1617 tm_prepare_cpuid(vm);
1622 v3_lock_deinit(&(tms->lock));
1623 v3_remove_hypercall(vm, TM_KICKBACK_CALL);
1631 init_trans_mem_core (struct guest_info * core,
1635 struct v3_trans_mem * tm = V3_Malloc(sizeof(struct v3_trans_mem));
1637 TM_DBG(core,INIT, "Trans Mem. Core Init\n");
1640 TM_ERR(core,INIT, "Problem allocating TM state\n");
1644 memset(tm, 0, sizeof(struct v3_trans_mem));
1646 INIT_LIST_HEAD(&tm->trans_r_list);
1647 INIT_LIST_HEAD(&tm->trans_w_list);
1649 tm->addr_ctxt = v3_create_htable(0, tm_hash_fn, tm_eq_fn);
1650 if (!(tm->addr_ctxt)) {
1651 TM_ERR(core,INIT,"problem creating addr_ctxt\n");
1655 tm->access_type = v3_create_htable(0, tm_hash_buf_fn, tm_eq_buf_fn);
1656 if (!(tm->access_type)) {
1657 TM_ERR(core,INIT,"problem creating access_type\n");
1661 v3_lock_init(&(tm->addr_ctxt_lock));
1662 v3_lock_init(&(tm->access_type_lock));
1664 tm->TM_STATE = TM_NULL;
1665 tm->TM_MODE = TM_OFF;
1671 tm->access_type_entries = 0;
1672 tm->addr_ctxt_entries = 0;
1673 tm->dirty_instr_flag = 0;
1675 /* TODO: Cache Model */
1676 //tm->box = (struct cache_box *)V3_Malloc(sizeof(struct cache_box *));
1677 //tm->box->init = init_cache;
1678 //tm->box->init(sample_spec, tm->box);
1685 v3_free_htable(tm->addr_ctxt, 0, 0);
1693 deinit_trans_mem (struct v3_vm_info * vm, void * priv_data)
1695 struct v3_tm_state * tms = (struct v3_tm_state *)priv_data;
1697 if (v3_remove_hypercall(vm, TM_KICKBACK_CALL) == -1) {
1698 PrintError(vm, VCORE_NONE, "Problem removing TM hypercall\n");
1702 v3_lock_deinit(&(tms->lock));
1713 deinit_trans_mem_core (struct guest_info * core,
1717 struct v3_trans_mem * tm = (struct v3_trans_mem *)core_data;
1718 struct hashtable_iter * ctxt_iter = NULL;
1720 v3_clear_tm_lists(tm);
1722 if (tm->staging_page) {
1723 TM_ERR(core,DEINIT CORE,"WARNING: staging page not freed!\n");
1726 ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1728 TM_DBG(core,DEINIT_CORE,"could not create htable iterator\n");
1732 /* delete all context entries for each hashed address */
1733 while (ctxt_iter->entry) {
1734 struct hash_chain * tmp;
1735 struct hash_chain * curr;
1736 struct list_head * chain_list;
1739 gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1740 chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1742 /* delete the context */
1743 list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1744 tm_del_stale_ctxt(curr);
1747 v3_htable_iter_advance(ctxt_iter);
1750 v3_destroy_htable_iter(ctxt_iter);
1752 /* we've already deleted the values in this one */
1753 v3_free_htable(tm->addr_ctxt, 0, 0);
1755 /* KCH WARNING: we may not want to free access type values here */
1756 v3_free_htable(tm->access_type, 1, 0);
1758 v3_lock_deinit(&(tm->addr_ctxt_lock));
1759 v3_lock_deinit(&(tm->access_type_lock));
1769 static struct v3_extension_impl trans_mem_impl = {
1770 .name = "trans_mem",
1772 .vm_init = init_trans_mem,
1773 .vm_deinit = deinit_trans_mem,
1774 .core_init = init_trans_mem_core,
1775 .core_deinit = deinit_trans_mem_core,
1780 register_extension(&trans_mem_impl);
1784 * tms->on => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1785 * tms->off => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1788 tm_handle_xend (struct guest_info * core,
1789 struct v3_trans_mem * tm)
1791 rdtscll(tm->exit_time);
1793 // Error checking! make sure that we have gotten here in a legitimate manner
1794 if (tm->TM_MODE != TM_ON) {
1795 TM_ERR(core, UD, "Encountered XEND while not in a transactional region\n");
1796 v3_free_staging_page(tm);
1798 v3_clear_tm_lists(tm);
1799 v3_raise_exception(core, UD_EXCEPTION);
1803 /* Our transaction finished! */
1804 /* Copy over data from the staging page */
1805 TM_DBG(core, UD,"Copying data from our staging page back into 'real' memory\n");
1807 if (commit_list(core, tm) == -1) {
1808 TM_ERR(core,UD,"error commiting tm list to memory\n");
1812 TM_DBG(core,UD,"Freeing staging page and internal data structures\n");
1814 // Free the staging page
1815 if (v3_free_staging_page(tm) == -1) {
1816 TM_ERR(core,XEND,"couldnt free staging page\n");
1820 // clear vtlb, as it may still contain our staging page
1821 if (v3_clr_vtlb(core) == -1) {
1822 TM_ERR(core,XEND,"couldnt clear vtlb\n");
1827 v3_clear_tm_lists(tm);
1829 /* Set the state and advance the RIP */
1830 TM_DBG(core,XEND,"advancing rip to %llx\n", core->rip + XEND_INSTR_LEN);
1831 core->rip += XEND_INSTR_LEN;
1835 // time to garbage collect
1837 if (tm_hash_gc(tm) == -1) {
1838 TM_ERR(core,XEND,"could not gc!\n");
1847 * tms->on => handle our abort code, handle_trans_abort will clear necessary state
1848 * tms->off => handle our abort code, handle_trans_abort will clear necessary state
1851 tm_handle_xabort (struct guest_info * core,
1852 struct v3_trans_mem * tm,
1857 // we must reflect the immediate back into EAX 31:24
1858 reason = *(uint8_t*)(instr+2);
1860 /* TODO: this probably needs to move somewhere else */
1861 rdtscll(tm->exit_time);
1863 // Error checking! make sure that we have gotten here in a legitimate manner
1864 if (tm->TM_MODE != TM_ON) {
1865 TM_DBG(core, UD, "We got here while not in a transactional core!\n");
1866 v3_raise_exception(core, UD_EXCEPTION);
1869 TM_DBG(core,UD,"aborting\n");
1871 if (tm->TM_STATE != TM_NULL) {
1872 v3_restore_dirty_instr(core);
1876 v3_handle_trans_abort(core, TM_ABORT_XABORT, reason);
1883 * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1884 * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1887 tm_handle_xbegin (struct guest_info * core,
1888 struct v3_trans_mem * tm,
1891 sint32_t rel_addr = 0;
1893 if (tm->TM_MODE == TM_ON) {
1894 TM_ERR(core,UD,"We got here while already in a transactional region!");
1895 v3_raise_exception(core, UD_EXCEPTION);
1898 rdtscll(tm->entry_time);
1899 tm->entry_exits = core->num_exits;
1901 /* set the tm_mode for this core */
1904 TM_DBG(core,UD,"Set the system in TM Mode, save fallback address");
1906 // Save the fail_call address (first 2 bytes = opcode, last 4 = fail call addr)
1907 rel_addr = *(sint32_t*)(instr+2);
1908 tm->fail_call = core->rip + XBEGIN_INSTR_LEN + rel_addr;
1910 TM_DBG(core,UD,"we set fail_call to %llx, rip is %llx, rel_addr is %x", (uint64_t)tm->fail_call,(uint64_t)core->rip,rel_addr);
1912 /* flush the shadow page tables */
1913 TM_DBG(core,UD,"Throwing out the shadow table");
1916 // Increase RIP, ready to go to next instruction
1917 core->rip += XBEGIN_INSTR_LEN;
1924 * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1925 * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1928 tm_handle_xtest (struct guest_info * core,
1929 struct v3_trans_mem * tm)
1931 // if we are in tm mode, set zf to 0, otherwise 1
1932 if (tm->TM_MODE == TM_ON) {
1933 core->ctrl_regs.rflags &= ~(1ULL << 6);
1935 core->ctrl_regs.rflags |= (1ULL << 6);
1938 core->rip += XTEST_INSTR_LEN;
1945 * XBEGIN c7 f8 rel32
1950 tm_handle_ud (struct guest_info * core)
1952 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
1953 uchar_t instr[INSTR_BUF_SZ];
1954 uint8_t byte1, byte2, byte3;
1956 tm_read_instr(core, (addr_t)core->rip, instr, INSTR_BUF_SZ);
1958 byte1 = *(uint8_t *)((addr_t)instr);
1959 byte2 = *(uint8_t *)((addr_t)instr + 1);
1960 byte3 = *(uint8_t *)((addr_t)instr + 2);
1963 if (byte1 == 0xc7 && byte2 == 0xf8) { /* third byte is an immediate */
1965 TM_DBG(core,UD,"Encountered Haswell-specific XBEGIN %x %x %d at %llx", byte1, byte2, byte3, (uint64_t)core->rip);
1967 if (tm_handle_xbegin(core, tm, instr) == -1) {
1968 TM_ERR(core, UD, "Problem handling XBEGIN\n");
1972 } else if (byte1 == 0xc6 && byte2 == 0xf8) { /* third byte is an immediate */
1974 TM_DBG(core, UD, "Encountered Haswell-specific XABORT %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1976 if (tm_handle_xabort(core, tm, instr) == -1) {
1977 TM_ERR(core, UD, "Problem handling XABORT\n");
1981 } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd5) {
1983 TM_DBG(core, UD, "Encountered Haswell-specific XEND %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1985 if (tm_handle_xend(core, tm) == -1) {
1986 TM_ERR(core, UD, "Problem handling XEND\n");
1991 } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd6) { /* third byte is an immediate */
1993 TM_DBG(core,UD,"Encountered Haswell-specific XTEST %x %x %x at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1995 if (tm_handle_xtest(core, tm) == -1) {
1996 TM_ERR(core, UD, "Problem handling XTEST\n");
2002 /* oh no, this is still unknown, pass the error back to the guest! */
2003 TM_DBG(core,UD,"Encountered:%x %x %x\n", byte1, byte2, byte3);
2004 v3_raise_exception(core, UD_EXCEPTION);
2012 v3_tm_handle_exception (struct guest_info * info,
2015 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2018 TM_ERR(info,ERR,"TM extension state not found\n");
2022 switch (exit_code) {
2023 /* any of these exceptions should abort current transactions */
2024 case SVM_EXIT_EXCP6:
2025 if (tm_handle_ud(info) == -1) {
2029 case SVM_EXIT_EXCP0:
2030 if (tm->TM_MODE != TM_ON) {
2031 v3_raise_exception(info, DE_EXCEPTION);
2034 TM_DBG(info,EXCP,"aborting due to DE exception\n");
2035 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2038 case SVM_EXIT_EXCP1:
2039 if (tm->TM_MODE != TM_ON) {
2040 v3_raise_exception(info, DB_EXCEPTION);
2043 TM_DBG(info,EXCP,"aborting due to DB exception\n");
2044 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2047 case SVM_EXIT_EXCP3:
2048 if (tm->TM_MODE != TM_ON) {
2049 v3_raise_exception(info, BP_EXCEPTION);
2052 TM_DBG(info,EXCP,"aborting due to BP exception\n");
2053 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2056 case SVM_EXIT_EXCP4:
2057 if (tm->TM_MODE != TM_ON) {
2058 v3_raise_exception(info, OF_EXCEPTION);
2061 TM_DBG(info,EXCP,"aborting due to OF exception\n");
2062 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2065 case SVM_EXIT_EXCP5:
2066 if (tm->TM_MODE != TM_ON) {
2067 v3_raise_exception(info, BR_EXCEPTION);
2070 TM_DBG(info,EXCP,"aborting due to BR exception\n");
2071 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2074 case SVM_EXIT_EXCP7:
2075 if (tm->TM_MODE != TM_ON) {
2076 v3_raise_exception(info, NM_EXCEPTION);
2079 TM_DBG(info,EXCP,"aborting due to NM exception\n");
2080 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2083 case SVM_EXIT_EXCP10:
2084 if (tm->TM_MODE != TM_ON) {
2085 v3_raise_exception(info, TS_EXCEPTION);
2088 TM_DBG(info,EXCP,"aborting due to TS exception\n");
2089 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2092 case SVM_EXIT_EXCP11:
2093 if (tm->TM_MODE != TM_ON) {
2094 v3_raise_exception(info, NP_EXCEPTION);
2097 TM_DBG(info,EXCP,"aborting due to NP exception\n");
2098 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2101 case SVM_EXIT_EXCP12:
2102 if (tm->TM_MODE != TM_ON) {
2103 v3_raise_exception(info, SS_EXCEPTION);
2106 TM_DBG(info,EXCP,"aborting due to SS exception\n");
2107 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2110 case SVM_EXIT_EXCP13:
2111 if (tm->TM_MODE != TM_ON) {
2112 v3_raise_exception(info, GPF_EXCEPTION);
2115 TM_DBG(info,EXCP,"aborting due to GPF exception\n");
2116 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2119 case SVM_EXIT_EXCP16:
2120 if (tm->TM_MODE != TM_ON) {
2121 v3_raise_exception(info, MF_EXCEPTION);
2124 TM_DBG(info,EXCP,"aborting due to MF exception\n");
2125 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2128 case SVM_EXIT_EXCP17:
2129 if (tm->TM_MODE != TM_ON) {
2130 v3_raise_exception(info, AC_EXCEPTION);
2133 TM_DBG(info,EXCP,"aborting due to AC exception\n");
2134 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2137 case SVM_EXIT_EXCP19:
2138 if (tm->TM_MODE != TM_ON) {
2139 v3_raise_exception(info, XF_EXCEPTION);
2142 TM_DBG(info,EXCP,"aborting due to XF exception\n");
2143 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2147 TM_DBG(info,EXCP,"exception # %d\n", (int)exit_code - 0x40);
2155 v3_tm_set_excp_intercepts (vmcb_ctrl_t * ctrl_area)
2157 ctrl_area->exceptions.de = 1; // 0 : divide by zero
2158 ctrl_area->exceptions.db = 1; // 1 : debug
2159 ctrl_area->exceptions.bp = 1; // 3 : breakpoint
2160 ctrl_area->exceptions.of = 1; // 4 : overflow
2161 ctrl_area->exceptions.br = 1; // 5 : bound range
2162 ctrl_area->exceptions.ud = 1; // 6 : undefined opcode
2163 ctrl_area->exceptions.nm = 1; // 7 : device not available
2164 ctrl_area->exceptions.ts = 1; // 10 : invalid tss
2165 ctrl_area->exceptions.np = 1; // 11 : segment not present
2166 ctrl_area->exceptions.ss = 1; // 12 : stack
2167 ctrl_area->exceptions.gp = 1; // 13 : general protection
2168 ctrl_area->exceptions.mf = 1; // 16 : x87 exception pending
2169 ctrl_area->exceptions.ac = 1; // 17 : alignment check
2170 ctrl_area->exceptions.xf = 1; // 19 : simd floating point
2174 extern void v3_stgi();
2175 extern void v3_clgi();
2177 /* 441-tm: if we are in TM mode, we need to check for any interrupts here,
2178 * and if there are any, need to do some aborting! Make sure not to die here
2179 * if we are already 'aborting', this results in infiloop
2182 v3_tm_check_intr_state (struct guest_info * info,
2183 vmcb_ctrl_t * guest_ctrl,
2184 vmcb_saved_state_t * guest_state)
2187 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2190 TM_ERR(info,INTR,"TM extension state not found\n");
2195 /* TODO: work this in */
2196 if (0 && (tm->TM_MODE == TM_ON) &&
2197 (tm->TM_ABORT != 1)) {
2199 if (guest_ctrl->guest_ctrl.V_IRQ ||
2200 guest_ctrl->EVENTINJ.valid) {
2202 rdtscll(tm->exit_time);
2203 TM_DBG(info,INTR,"%lld exits happened, time delta is %lld",(info->num_exits - tm->entry_exits),(tm->entry_time - tm->exit_time));
2205 // We do indeed have pending interrupts
2207 TM_DBG(info,INTR,"we have a pending interrupt!\n");
2209 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2210 // Copy new RIP state into arch dependent structure
2211 guest_state->rip = info->rip;
2212 TM_DBG(info,INTR,"currently guest state rip is %llx\n",(uint64_t)guest_state->rip);
2222 v3_tm_handle_pf_64 (struct guest_info * info,
2223 pf_error_t error_code,
2225 addr_t * page_to_use)
2227 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2228 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2231 TM_ERR(info,HANDLE_PF, "couldn't get tm core state\n");
2236 TM_ERR(info,HANDLE_PF, "couldn't get tm global state\n");
2240 if ((tms->TM_MODE == TM_ON) &&
2241 (error_code.user == 1)) {
2243 TM_DBG(info,PF,"Core reporting in, got a #PF (tms->mode is %d)\n", tms->TM_MODE);
2245 *page_to_use = v3_handle_trans_mem_fault(info, fault_addr, error_code);
2247 if (*page_to_use == ERR_TRANS_FAULT_FAIL){
2248 TM_ERR(info,HANDLE_PF, "could not handle transaction page fault\n");
2252 if ((tm->TM_MODE == TM_ON) &&
2253 (tm->staging_page == NULL)) {
2255 tm->staging_page = V3_AllocPages(1);
2257 if (!(tm->staging_page)) {
2258 TM_ERR(info,MMU,"Problem allocating staging page\n");
2262 TM_DBG(info,MMU,"Created staging page at %p\n", (void *)tm->staging_page);
2271 v3_tm_handle_usr_tlb_miss (struct guest_info * info,
2272 pf_error_t error_code,
2276 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2278 /* TLB miss from user */
2279 if ((tm->TM_MODE == TM_ON) &&
2280 (error_code.user == 1)) {
2282 if (page_to_use > TRANS_FAULT_OK) {
2283 TM_DBG(info,MMU, "Using alternate page at: %llx\n", (uint64_t)page_to_use);
2284 *shadow_pa = page_to_use;
2293 v3_tm_handle_read_fault (struct guest_info * info,
2294 pf_error_t error_code,
2295 pte64_t * shadow_pte)
2297 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2298 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2300 // If we are about to read, make it read only
2301 if ((tms->TM_MODE == TM_ON) &&
2302 (tm->TM_STATE == TM_EXEC) &&
2303 (error_code.write == 0) &&
2304 (error_code.user == 1)) {
2306 TM_DBG(info,MMU, "Flagging the page read only\n");
2307 shadow_pte->writable = 0;
2313 v3_tm_decode_rtm_instrs (struct guest_info * info,
2315 struct x86_instr * instr)
2317 uint8_t byte1, byte2, byte3;
2318 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2320 if (tm->TM_MODE == TM_ON) {
2322 byte1 = *(uint8_t *)(instr_ptr);
2323 byte2 = *(uint8_t *)(instr_ptr + 1);
2324 byte3 = *(uint8_t *)(instr_ptr + 2);
2326 if (byte1 == 0xc7 &&
2327 byte2 == 0xf8) { /* third byte is an immediate */
2329 TM_DBG(info, DECODE,"Decoding XBEGIN %x %x %d\n", byte1, byte2, byte3);
2330 instr->instr_length = 6;
2333 } else if (byte1 == 0xc6 &&
2334 byte2 == 0xf8) { /* third byte is an immediate */
2336 TM_DBG(info, DECODE, "Decoding XABORT %x %x %d\n", byte1, byte2, byte3);
2337 instr->instr_length = 3;
2340 } else if (byte1 == 0x0f &&
2344 TM_DBG(info, DECODE, "Decoding XEND %x %x %x\n", byte1, byte2, byte3);
2345 instr->instr_length = 3;