2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2012, NWU EECS 441 Transactional Memory Team
11 * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Maciek Swiech <dotpyfe@u.northwestern.edu>
15 * Kyle C. Hale <kh@u.northwestern.edu>
16 * Marcel Flores <marcel-flores@u.northwestern.edu>
17 * Zachary Bischof <zbischof@u.northwestern.edu>
20 * This is free software. You are permitted to use,
21 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
24 #include <palacios/vmm_mem.h>
25 #include <palacios/vmm.h>
26 #include <palacios/vmcb.h>
27 #include <palacios/vmm_decoder.h>
28 #include <palacios/vm_guest_mem.h>
29 #include <palacios/vmm_ctrl_regs.h>
30 #include <palacios/vmm_paging.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/svm.h>
33 #include <palacios/svm_handler.h>
34 #include <palacios/vmm_excp.h>
35 #include <palacios/vmm_extensions.h>
36 #include <palacios/vmm_sprintf.h>
37 #include <palacios/vmm_hashtable.h>
39 #include <extensions/trans_mem.h>
40 #include <extensions/tm_util.h>
42 #if !V3_CONFIG_DEBUG_TM_FUNC
44 #define PrintDebug(fmt, args...)
48 * - save/restore register state on XBEGIN/XABORT
49 * - put status codes in RAX
50 * - Implement proper exceptions for failed XBEGINS etc.
53 /* this includes a mov to rax */
54 static const char * vmmcall_bytes = "\x48\xc7\xc0\x37\x13\x00\x00\x0f\x01\xd9";
55 static struct v3_tm_state * tm_global_state = NULL;
59 tm_translate_rip (struct guest_info * core, addr_t * target)
62 if (core->mem_mode == PHYSICAL_MEM) {
64 get_addr_linear(core, core->rip, &(core->segments.cs)),
66 } else if (core->mem_mode == VIRTUAL_MEM) {
68 get_addr_linear(core, core->rip, &(core->segments.cs)),
76 tm_read_instr (struct guest_info * core,
82 if (core->mem_mode == PHYSICAL_MEM) {
83 v3_read_gpa_memory(core,
84 get_addr_linear(core, addr , &(core->segments.cs)),
89 v3_read_gva_memory(core,
90 get_addr_linear(core, addr, &(core->segments.cs)),
99 tm_handle_decode_fail (struct guest_info * core)
104 tm_translate_rip(core, &cur_rip);
106 #ifdef V3_CONFIG_DEBUG_TM_FUNC
107 v3_dump_mem((uint8_t *)cur_rip, INSTR_BUF_SZ);
110 /* If we can't decode an instruction, we treat it as a catastrophic event, aborting *everyone* */
111 for (core_num = 0; core_num < core->vm_info->num_cores; core_num++ ) {
112 struct v3_trans_mem * remote_tm;
114 /* skip local core */
115 if (core_num == core->vcpu_id) {
119 remote_tm = v3_get_ext_core_state(&(core->vm_info->cores[core_num]), "trans_mem");
121 TM_ERR(core,DECODE,"couldnt get remote_tm\n");
125 /* skip cores who aren't in transacitonal context */
126 if (remote_tm->TM_MODE == TM_OFF) {
130 TM_DBG(core,DECODE,"setting abort for core %d due to decoding error\n", core_num);
131 remote_tm->TM_ABORT = 1;
138 /* special casing for control-flow instructions
139 * returns 1 if we need to jump
140 * returns -1 on error
143 tm_handle_ctrl_flow (struct guest_info * core,
144 struct v3_trans_mem * tm,
145 addr_t * instr_location,
146 struct x86_instr * struct_instr)
149 /* special casing for control flow instructions */
150 struct rflags * flags = (struct rflags *)&(core->ctrl_regs.rflags);
154 switch (struct_instr->op_type) {
157 TM_DBG(core,DECODE, "!!++ JLE\n");
158 to_jmp = (flags->zf || flags->sf != flags->of);
159 offset = struct_instr->dst_operand.operand;
161 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
163 tm->to_branch = to_jmp;
166 TM_DBG(core,DECODE,"!!++ JAE\n");
167 to_jmp = (flags->cf == 0);
168 offset = struct_instr->dst_operand.operand;
170 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
172 tm->to_branch = to_jmp;
175 TM_DBG(core,DECODE,"!!++ JMP\n");
177 offset = struct_instr->dst_operand.operand;
179 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
181 tm->to_branch = to_jmp;
184 TM_DBG(core,DECODE,"!!++ JNZ\n");
185 to_jmp = (flags->zf == 0);
186 offset = struct_instr->dst_operand.operand;
188 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
190 tm->to_branch = to_jmp;
193 TM_DBG(core,DECODE,"!!++ JL\n");
194 to_jmp = (flags->sf != flags->of);
195 offset = struct_instr->dst_operand.operand;
197 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
199 tm->to_branch = to_jmp;
202 TM_DBG(core,DECODE,"!!++ JNS\n");
203 to_jmp = (flags->sf == 0);
204 offset = struct_instr->dst_operand.operand;
206 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
208 tm->to_branch = to_jmp;
211 *instr_location = core->rip + tm->cur_instr_len;
220 * called inside #UD and VMMCALL handlers
221 * only affects global state in case of quix86 fall over
222 * -> set other cores TM_ABORT to 1, return -2
225 v3_store_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
227 struct x86_instr struct_instr;
228 uchar_t cur_instr[INSTR_BUF_SZ];
229 addr_t instr_location;
231 // Fetch the current instruction
232 tm_read_instr(core, core->rip, cur_instr, INSTR_BUF_SZ);
234 TM_DBG(core,STORE,"storing next instruction, current rip: %llx\n", (uint64_t)core->rip);
236 /* Attempt to decode current instruction to determine its length */
237 if (v3_decode(core, (addr_t)cur_instr, &struct_instr) == ERR_DECODE_FAIL) {
239 TM_ERR(core,Error,"Could not decode currrent instruction (at %llx)\n", (uint64_t)core->rip);
241 /* this will attempt to abort all the remote cores */
242 if (tm_handle_decode_fail(core) == -1) {
243 TM_ERR(core,Error,"Could not handle failed decode\n");
244 return ERR_STORE_FAIL;
247 /* we need to trigger a local abort */
248 return ERR_STORE_MUST_ABORT;
252 /* we can't currently handle REP prefixes, abort */
253 if (struct_instr.op_type != V3_INVALID_OP &&
254 (struct_instr.prefixes.repne ||
255 struct_instr.prefixes.repnz ||
256 struct_instr.prefixes.rep ||
257 struct_instr.prefixes.repe ||
258 struct_instr.prefixes.repz)) {
260 TM_ERR(core,DECODE,"Encountered REP prefix, aborting\n");
261 return ERR_STORE_MUST_ABORT;
264 tm->cur_instr_len = struct_instr.instr_length;
266 /* handle jump instructions */
267 tm_handle_ctrl_flow(core, tm, &instr_location, &struct_instr);
269 /* save next 10 bytes after current instruction, we'll put vmmcall here */
270 tm_read_instr(core, instr_location, cur_instr, INSTR_INJECT_LEN);
272 /* store the next instruction and its length in info */
273 memcpy(tm->dirty_instr, cur_instr, INSTR_INJECT_LEN);
280 v3_overwrite_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
285 tm->clobbered_rax = (core->vm_regs).rax;
289 /* we can't currently handle instructions that span page boundaries */
290 if ((ptr + tm->cur_instr_len) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
291 TM_ERR(core,OVERWRITE,"emulated instr straddling page boundary\n");
295 ptr = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
297 if ((ptr + INSTR_INJECT_LEN) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
298 TM_ERR(core,OVERWRITE,"injected instr straddling page boundary\n");
302 if (v3_gva_to_hva(core,
303 get_addr_linear(core, ptr, &(core->segments.cs)),
306 TM_ERR(core,Error,"Calculating next rip hva failed\n");
310 TM_DBG(core,REPLACE,"Replacing next instruction at addr %llx with vmm hyper call, len=%d\n",
311 core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0), (int)tm->cur_instr_len );
313 /* Copy VMM call into the memory address of beginning of next instruction (ptr) */
314 memcpy((char*)ptr, vmmcall_bytes, INSTR_INJECT_LEN);
316 /* KCH: flag that we've dirtied an instruction, and store its host address */
317 tm->dirty_instr_flag = 1;
318 tm->dirty_gva = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
328 * this should only be called if TM_STATE == TM_NULL, additionally we check if our dirtied flag if set
331 v3_restore_dirty_instr (struct guest_info * core)
333 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
335 /* Restore next instruction, transition to IFETCH state */
336 TM_DBG(core,RESTORE,"Restoring next instruction.\n");
338 /* check if we've actually done an instruction overwrite */
339 if (!(tm->dirty_instr_flag)) {
340 TM_DBG(core,RESTORE,"nothing to restore here...\n");
344 // Actually restore instruction
345 memcpy((char*)tm->dirty_hva, tm->dirty_instr, INSTR_INJECT_LEN);
348 (core->vm_regs).rax = tm->clobbered_rax;
351 TM_DBG(core,RESTORE,"RIP in vmmcall: %llx\n", core->rip);
352 core->rip = tm->dirty_gva;
355 tm->dirty_instr_flag = 0;
358 memset(tm->dirty_instr, 0, 15);
360 TM_DBG(core,RESTORE,"RIP after scooting it back up: %llx\n", core->rip);
367 tm_handle_fault_ifetch (struct guest_info * core,
368 struct v3_trans_mem * tm)
372 TM_DBG(core,IFETCH,"Page fault caused by IFETCH: rip is the same as faulting address, we must be at an ifetch.\n");
374 sto = v3_store_next_instr(core, tm);
376 if (sto == ERR_STORE_FAIL) {
377 TM_ERR(core,EXIT,"Could not store next instruction in transaction\n");
378 return ERR_TRANS_FAULT_FAIL;
379 } else if (sto == ERR_STORE_MUST_ABORT) {
380 TM_DBG(core,EXIT,"aborting for some reason\n");
381 v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
382 return TRANS_FAULT_OK;
385 if (v3_overwrite_next_instr(core, tm) == -1) {
386 TM_ERR(core,PF,"problem overwriting instruction\n");
387 return ERR_TRANS_FAULT_FAIL;
390 tm->TM_STATE = TM_EXEC;
392 return TRANS_FAULT_OK;
397 tm_handle_fault_read (struct guest_info * core,
398 struct v3_trans_mem * tm,
403 // This page fault was caused by a read to memory in the current instruction for a core in TM mode
404 TM_DBG(core,DATA,"Page fault caused by read.\n");
405 TM_DBG(core,PF,"Adding %p to read list and hash\n", (void*)fault_addr);
407 if (add_mem_op_to_list(&(tm->trans_r_list), fault_addr) == -1) {
408 TM_ERR(core,PF,"problem adding to list\n");
409 return ERR_TRANS_FAULT_FAIL;
412 if (tm_record_access(tm, error.write, fault_addr) == -1) {
413 TM_ERR(core,PF,"problem recording access\n");
414 return ERR_TRANS_FAULT_FAIL;
417 /* if we have previously written to this address, we need to update our
418 * staging page and map it in */
419 if (list_contains_guest_addr(&(tm->trans_w_list), fault_addr)) {
421 TM_DBG(core,PF,"Saw a read from something in the write list\n");
423 /* write the value from linked list to staging page */
424 if (stage_entry(tm, &(tm->trans_w_list), fault_addr) == -1) {
425 TM_ERR(core,PF, "could not stage entry!\n");
426 return ERR_TRANS_FAULT_FAIL;
429 /* Hand it the staging page */
430 return (addr_t)(tm->staging_page);
434 //Add it to the read set
435 addr_t shadow_addr = 0;
437 TM_DBG(core,PF,"Saw a read from a fresh address\n");
439 if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
440 TM_ERR(core,PF,"Could not translate gva to hva for transaction read\n");
441 return ERR_TRANS_FAULT_FAIL;
446 return TRANS_FAULT_OK;
451 tm_handle_fault_write (struct guest_info * core,
452 struct v3_trans_mem * tm,
457 addr_t virt_data_loc;
458 addr_t shadow_addr = 0;
460 TM_DBG(core,DATA,"Page fault cause by write\n");
461 TM_DBG(core,PF,"Adding %p to write list and hash\n", (void*)fault_addr);
463 if (add_mem_op_to_list(&(tm->trans_w_list), fault_addr) == -1) {
464 TM_ERR(core,WRITE,"could not add to list!\n");
465 return ERR_TRANS_FAULT_FAIL;
468 if (tm_record_access(tm, error.write, fault_addr) == -1) {
469 TM_ERR(core,WRITE,"could not record access!\n");
470 return ERR_TRANS_FAULT_FAIL;
473 if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
474 TM_ERR(core,WRITE,"could not translate gva to hva for transaction read\n");
475 return ERR_TRANS_FAULT_FAIL;
478 // Copy existing values to the staging page, populating that field
479 // This avoids errors in optimized code such as ++, where the original
480 // value is not read, but simply incremented
481 data_loc = (void*)((addr_t)(tm->staging_page) + (shadow_addr % PAGE_SIZE_4KB));
483 if (v3_hpa_to_hva((addr_t)(data_loc), &virt_data_loc) == -1) {
484 TM_ERR(core,WRITE,"Could not convert address on staging page to virt addr\n");
485 return ERR_TRANS_FAULT_FAIL;
488 TM_DBG(core,WRITE,"\tValue being copied (core %d): %p\n", core->vcpu_id, *((void**)(virt_data_loc)));
489 //memcpy((void*)virt_data_loc, (void*)shadow_addr, sizeof(uint64_t));
490 *(uint64_t*)virt_data_loc = *(uint64_t*)shadow_addr;
492 return (addr_t)(tm->staging_page);
497 tm_handle_fault_extern_ifetch (struct guest_info * core,
498 struct v3_trans_mem * tm,
504 // system is in tm state, record the access
505 TM_DBG(core,IFETCH,"Page fault caused by IFETCH: we are not in TM, recording.\n");
507 sto = v3_store_next_instr(core,tm);
509 if (sto == ERR_STORE_FAIL) {
510 TM_ERR(core,Error,"Could not store next instruction in transaction\n");
511 return ERR_TRANS_FAULT_FAIL;
513 } else if (sto == ERR_STORE_MUST_ABORT) {
514 TM_ERR(core,IFETCH,"decode failed, going out of single stepping\n");
515 v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
516 return TRANS_FAULT_OK;
519 if (v3_overwrite_next_instr(core, tm) == -1) {
520 TM_ERR(core,IFETCH,"could not overwrite next instr!\n");
521 return ERR_TRANS_FAULT_FAIL;
524 tm->TM_STATE = TM_EXEC;
526 if (tm_record_access(tm, error.write, fault_addr) == -1) {
527 TM_ERR(core,IFETCH,"could not record access!\n");
528 return ERR_TRANS_FAULT_FAIL;
531 return TRANS_FAULT_OK;
536 tm_handle_fault_extern_access (struct guest_info * core,
537 struct v3_trans_mem * tm,
541 TM_DBG(core,PF_HANDLE,"recording access\n");
542 if (tm_record_access(tm, error.write, fault_addr) == -1) {
543 TM_ERR(core,PF_HANDLE,"could not record access!\n");
544 return ERR_TRANS_FAULT_FAIL;
547 return TRANS_FAULT_OK;
552 tm_handle_fault_tmoff (struct guest_info * core)
554 TM_DBG(core,PF_HANDLE, "in pf handler but noone is in tm mode anymore (core %d), i should try to eliminate hypercalls\n", core->vcpu_id);
556 if (v3_restore_dirty_instr(core) == -1) {
557 TM_ERR(core,PF_HANDLE,"could not restore dirty instr!\n");
558 return ERR_TRANS_FAULT_FAIL;
561 return TRANS_FAULT_OK;
567 * called from MMU - should mean at least tms->TM_MODE is on
569 * tm->on : ifetch -> store instr, overwrite instr
570 * r/w -> record hash, write log, store instr, overwrite instr
571 * tm->off: ifetch -> store instr, overwrite instr
572 * r/w -> record hash, store instr, overwrite instr
574 * returns ERR_TRANS_FAULT_FAIL on error
575 * TRANS_FAULT_OK when things are fine
576 * addr when we're passing back a staging page
580 v3_handle_trans_mem_fault (struct guest_info * core,
584 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
585 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
588 TM_ERR(core,ERROR,": coudln't get core state\n");
589 return ERR_TRANS_FAULT_FAIL;
593 TM_ERR(core,ERROR,": couldn't get vm trans_mem state\n");
594 return ERR_TRANS_FAULT_FAIL;
597 TM_DBG(core,PF,"PF handler core->mode : %d, system->mode : %d\n", tm->TM_MODE, tms->TM_MODE);
599 if ((tm->TM_MODE == TM_ON) &&
600 ((void *)fault_addr == (void *)(core->rip))) {
602 return tm_handle_fault_ifetch(core, tm);
604 } else if ((tm->TM_MODE == TM_ON) &&
605 (tm->TM_STATE == TM_EXEC) &&
606 (error.write == 0)) {
608 return tm_handle_fault_read(core, tm, fault_addr, error);
610 } else if ((tm->TM_MODE == TM_ON) &&
611 (tm->TM_STATE == TM_EXEC) &&
612 (error.write == 1)) {
614 return tm_handle_fault_write(core, tm, fault_addr, error);
617 } else if ((tms->TM_MODE == TM_ON) &&
618 ((void *)fault_addr == (void *)(core->rip))) {
620 return tm_handle_fault_extern_ifetch(core, tm, fault_addr, error);
622 } else if ((tms->TM_MODE == TM_ON) &&
623 (tm->TM_STATE == TM_EXEC)) {
625 return tm_handle_fault_extern_access(core, tm, fault_addr, error);
628 return tm_handle_fault_tmoff(core);
632 return TRANS_FAULT_OK;
637 tm_handle_hcall_tmoff (struct guest_info * core, struct v3_trans_mem * tm)
639 if (tm->TM_MODE == TM_ON) {
640 TM_ERR(core,EXIT,"we are in tm mode but system is not!\n");
641 return TRANS_HCALL_FAIL;
644 // we got to an exit when things were off!
645 TM_DBG(core,EXIT,"system is off, restore the instruction and go away\n");
647 if (v3_restore_dirty_instr(core) == -1) {
648 TM_ERR(core,HCALL,"could not restore dirty instr!\n");
649 return TRANS_HCALL_FAIL;
652 tm->TM_STATE = TM_NULL;
654 return TRANS_HCALL_OK;
659 tm_handle_hcall_dec_abort (struct guest_info * core,
660 struct v3_trans_mem * tm)
662 // only ever get here from TM DECODE
663 TM_DBG(core,EXIT,"we are in ABORT, call the abort handler\n");
666 v3_handle_trans_abort(core, TM_ABORT_UNSPECIFIED, 0);
668 TM_DBG(core,EXIT,"RIP after abort: %p\n", ((void*)(core->rip)));
670 return TRANS_HCALL_OK;
675 tm_handle_hcall_ifetch_start (struct guest_info * core,
676 struct v3_trans_mem * tm)
678 tm->TM_STATE = TM_IFETCH;
680 TM_DBG(core,EXIT,"VMEXIT after TM_EXEC, blast away VTLB and go into TM_IFETCH\n");
682 // Finally, invalidate the shadow page table
683 v3_invalidate_shadow_pts(core);
685 return TRANS_HCALL_OK;
690 tm_check_list_conflict (struct guest_info * core,
691 struct v3_trans_mem * tm,
692 struct list_head * access_list,
695 struct mem_op * curr = NULL;
696 struct mem_op * tmp = NULL;
699 list_for_each_entry_safe(curr, tmp, access_list, op_node) {
701 conflict = tm_check_conflict(tm->ginfo->vm_info, curr->guest_addr, op_type, core->vcpu_id, tm->t_num);
703 if (conflict == ERR_CHECK_FAIL) {
705 TM_ERR(core,EXIT,"error checking for conflicts\n");
706 return TRANS_HCALL_FAIL;
708 } else if (conflict == CHECK_IS_CONFLICT) {
710 TM_DBG(core,EXIT,"we have a conflict, aborting\n");
711 v3_handle_trans_abort(core, TM_ABORT_CONFLICT, 0);
712 return CHECK_MUST_ABORT;
718 return TRANS_HCALL_OK;
723 tm_handle_hcall_check_conflicts (struct guest_info * core,
724 struct v3_trans_mem * tm)
728 TM_DBG(core,EXIT,"still TM_ON\n");
729 TM_DBG(core,EXIT,"checking for conflicts\n");
731 if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_w_list), OP_TYPE_WRITE)) == TRANS_HCALL_FAIL) {
732 return TRANS_HCALL_FAIL;
733 } else if (ret == CHECK_MUST_ABORT) {
734 return TRANS_HCALL_OK;
737 if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_r_list), OP_TYPE_READ)) == TRANS_HCALL_FAIL) {
738 return TRANS_HCALL_FAIL;
739 } else if (ret == CHECK_MUST_ABORT) {
740 return TRANS_HCALL_OK;
743 tm->TM_STATE = TM_IFETCH;
745 return TRANS_HCALL_OK;
749 /* trans mem hypercall handler
752 * running mime (tm or tms on)
756 * check for conflicts
758 * abort (due to quix86)
763 tm_handle_hcall (struct guest_info * core,
764 unsigned int hcall_id,
767 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
768 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
770 if (tms->TM_MODE == TM_OFF) {
771 return tm_handle_hcall_tmoff(core, tm);
774 // Previous instruction has finished, copy staging page back into linked list!
775 if (update_list(tm, &(tm->trans_w_list)) == -1) {
776 TM_ERR(core,HCALL,"could not update_list!\n");
777 return TRANS_HCALL_FAIL;
780 // Done handling previous instruction, must put back the next instruction, reset %rip and go back to IFETCH state
781 TM_DBG(core,EXIT,"saw VMEXIT, need to restore previous state and proceed\n");
783 if (v3_restore_dirty_instr(core) == -1) {
784 TM_ERR(core,HCALL,"could not restore dirty instr!\n");
785 return TRANS_HCALL_FAIL;
789 if (tm->TM_ABORT == 1 &&
790 tms->TM_MODE == TM_ON) {
792 return tm_handle_hcall_dec_abort(core, tm);
794 } else if (tm->TM_STATE == TM_EXEC) {
795 return tm_handle_hcall_ifetch_start(core, tm);
799 if (tm->TM_MODE == TM_ON &&
800 tms->TM_MODE == TM_ON) {
802 return tm_handle_hcall_check_conflicts(core, tm);
804 } else if (tm->TM_MODE == TM_OFF) {
805 TM_DBG(core,EXIT,"we are in TM_OFF\n");
808 return TRANS_HCALL_OK;
813 v3_tm_inc_tnum (struct v3_trans_mem * tm)
819 lt = tm_global_state->last_trans;
821 // grab global last_trans
822 irqstate = v3_lock_irqsave(tm_global_state->lock);
823 new_ctxt = ++(lt[tm->ginfo->vcpu_id]);
824 v3_unlock_irqrestore(tm_global_state->lock, irqstate);
828 TM_DBG(tm->ginfo,INC TNUM,"global state is |%d|%d|, my tnum is %d\n", (int)lt[0],
829 (int)lt[1], (int)tm->t_num);
831 if (new_ctxt != tm->t_num) {
832 TM_ERR(tm->ginfo,TM_INC_TNUM,"misaligned global and local context value\n");
841 tm_set_abort_status (struct guest_info * core,
842 tm_abrt_cause_t cause,
843 uint8_t xabort_reason)
845 core->vm_regs.rax = 0;
848 case TM_ABORT_XABORT:
849 // we put the xabort immediate in eax 31:24
851 core->vm_regs.rax |= (xabort_reason << 24);
853 case TM_ABORT_CONFLICT:
854 // if this was a conflict from another core, it may work
856 core->vm_regs.rax |= (1 << ABORT_CONFLICT) | (1 << ABORT_RETRY);
858 case TM_ABORT_INTERNAL:
860 core->vm_regs.rax |= (1 << cause);
862 case TM_ABORT_UNSPECIFIED:
863 // just return 0 in EAX
866 TM_ERR(core, ABORT, "invalid abort cause\n");
872 // xabort_reason is only used for XABORT instruction
874 v3_handle_trans_abort (struct guest_info * core,
875 tm_abrt_cause_t cause,
876 uint8_t xabort_reason)
878 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
880 // Free the staging page
881 if (v3_free_staging_page(tm) == -1) {
882 TM_ERR(core,ABORT,"problem freeing staging page\n");
886 // Clear the VTLB which still has our staging page in it
887 if (v3_clr_vtlb(core) == -1) {
888 TM_ERR(core,ABORT,"problem clearing vtlb\n");
893 v3_clear_tm_lists(tm);
895 TM_DBG(core,ABORT -- handler,"TM_MODE: %d | RIP: %llx | XABORT RIP: %llx\n", tm->TM_MODE, (uint64_t)core->rip, (uint64_t)tm->fail_call);
897 if (tm->TM_MODE == TM_ON) {
898 TM_DBG(core,ABORT,"Setting RIP to %llx\n", (uint64_t)tm->fail_call);
899 core->rip = tm->fail_call;
908 tm_set_abort_status(core, cause, xabort_reason);
910 // time to garbage collect
911 if (tm_hash_gc(tm) == -1) {
912 TM_ERR(core,GC,"could not gc!\n");
921 tm_hash_fn (addr_t key)
923 return v3_hash_long(key, sizeof(void *));
928 tm_eq_fn (addr_t key1, addr_t key2)
930 return (key1 == key2);
935 tm_hash_buf_fn (addr_t key)
937 return v3_hash_long(key, sizeof(addr_t));
942 tm_eq_buf_fn(addr_t key1, addr_t key2)
944 return (key1 == key2);
948 /* this checks if the remote access was done on the same
949 * local transaction number as the current one */
951 tm_check_context (struct v3_vm_info * vm,
958 uint64_t core_id_sub;
959 struct v3_tm_access_type * type = NULL;
961 for (core_id_sub = 0; core_id_sub < vm->num_cores; core_id_sub++) {
962 struct v3_trans_mem * remote_tm;
966 /* skip the core that's doing the checking */
967 if (core_id_sub == core_num) {
971 remote_tm = v3_get_ext_core_state(&(vm->cores[core_id_sub]), "trans_mem");
973 PrintError(vm, VCORE_NONE, "Could not get ext core state for core %llu\n", core_id_sub);
974 return ERR_CHECK_FAIL;
977 buf[0] = (void *)gva;
978 buf[1] = (void *)core_id_sub;
979 buf[2] = (void *)curr_lt[core_id_sub];
981 key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
983 type = (struct v3_tm_access_type *)HTABLE_SEARCH(remote_tm->access_type, key);
987 if ( (op_type == OP_TYPE_WRITE && (type->w || type->r)) || // so basically if write?
988 (op_type != OP_TYPE_WRITE && type->w)) {
989 return CHECK_IS_CONFLICT;
994 return CHECK_NO_CONFLICT;
998 /* check all the contexts in the list for a conflict */
1000 tm_check_all_contexts (struct v3_vm_info * vm,
1001 struct list_head * hash_list,
1007 struct hash_chain * curr = NULL;
1008 struct hash_chain * tmp = NULL;
1009 uint64_t * curr_lt = NULL;
1012 list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1014 curr_lt = curr->curr_lt;
1016 if (curr_lt[core_num] == curr_ctxt) {
1018 ret = tm_check_context(vm, gva, core_num, curr_ctxt, curr_lt, op_type);
1020 if (ret == ERR_CHECK_FAIL) {
1021 return ERR_CHECK_FAIL;
1022 } else if (ret == CHECK_IS_CONFLICT) {
1023 return CHECK_IS_CONFLICT;
1030 return CHECK_NO_CONFLICT;
1034 /* The following access patterns trigger an abort:
1035 * We: Read | Anyone Else: Write
1036 * We: Write | Anyone Else: Read, Write
1038 * (pg 8-2 of haswell manual)
1040 * returns ERR_CHECK_FAIL on error
1041 * returns CHECK_IS_CONFLICT if there is a conflict
1042 * returns CHECK_NO_CONFLICT if there isn't
1045 tm_check_conflict (struct v3_vm_info * vm,
1053 /* loop over other cores -> core_id */
1054 for (core_id = 0; core_id < vm->num_cores; core_id++) {
1056 struct guest_info * core = NULL;
1057 struct v3_trans_mem * tm = NULL;
1058 struct list_head * hash_list;
1060 /* only check other cores */
1061 if (core_id == core_num) {
1065 core = &(vm->cores[core_id]);
1066 tm = (struct v3_trans_mem*)v3_get_ext_core_state(core, "trans_mem");
1069 PrintError(vm, VCORE_NONE, "+++ TM ERROR +++ Couldn't get core state for core %llu\n", core_id);
1070 return ERR_CHECK_FAIL;
1073 /* this core didn't access the address, move on */
1074 if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1079 /* loop over chained hash for gva, find fields with curr_ctxt -> curr_lt*/
1080 int ret = tm_check_all_contexts(vm, hash_list, gva, op_type, core_num, curr_ctxt);
1082 if (ret == ERR_CHECK_FAIL) {
1083 return ERR_CHECK_FAIL;
1084 } else if (ret == CHECK_IS_CONFLICT) {
1085 return CHECK_IS_CONFLICT;
1091 return CHECK_NO_CONFLICT;
1096 tm_need_to_gc (struct v3_trans_mem * tm,
1097 struct hash_chain * curr,
1104 /* if none of the cores are in transactional context,
1105 * we know we can collect this context
1109 for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1110 /* if *any* of the cores are active in a transaction
1111 * number that is current (listed in this context),
1112 * we know we can't collect this context, as it
1113 * will be needed when that core's transaction ends
1115 if (curr->curr_lt[i] >= lt_copy[i]) {
1127 tm_del_stale_ctxt (struct hash_chain * curr)
1129 list_del(&(curr->lt_node));
1130 V3_Free(curr->curr_lt);
1136 tm_del_acc_entry (struct v3_trans_mem * tm, addr_t key)
1138 v3_htable_remove(tm->access_type, key, 0);
1139 (tm->access_type_entries)--;
1144 tm_collect_context (struct v3_trans_mem * tm,
1145 struct hashtable_iter * ctxt_iter,
1146 struct hash_chain * curr,
1147 uint64_t * begin_time,
1148 uint64_t * end_time,
1153 for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1155 struct v3_tm_access_type * type;
1159 if ((*end_time - *begin_time) > 100000000) {
1160 TM_ERR(tm->ginfo,GC,"time threshhold exceeded, exiting!!!\n");
1164 buf[0] = (void *)gva;
1166 buf[2] = (void *)curr->curr_lt[i];
1168 key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
1170 type = (struct v3_tm_access_type *)v3_htable_search(tm->access_type, key);
1172 if (!type) { // something has gone terribly wrong
1173 TM_ERR(tm->ginfo,GC,"could not find accesstype entry to gc, THIS! IS! WRONG!\n");
1177 /* delete the access type entry */
1178 tm_del_acc_entry(tm, key);
1181 /* delete the stale context */
1182 tm_del_stale_ctxt(curr);
1189 tm_collect_all_contexts (struct v3_trans_mem * tm,
1190 struct hashtable_iter * ctxt_iter,
1193 uint64_t * begin_time,
1194 uint64_t * end_time)
1196 struct hash_chain * tmp;
1197 struct hash_chain * curr;
1198 struct list_head * chain_list;
1201 gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1203 chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1205 /* this is a chained hash, so for each address, we will have
1206 * a list of contexts. We now check each context to see
1207 * whether or not it can be collected
1209 list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1211 uint64_t to_gc = tm_need_to_gc(tm, curr, lt_copy, tmoff);
1213 /* not garbage, go on to the next context in the list */
1215 TM_DBG(tm->ginfo,GC,"not garbage collecting entries for address %llx\n", (uint64_t)gva);
1219 TM_DBG(tm->ginfo,GC,"garbage collecting entries for address %llx\n", (uint64_t)gva);
1221 /* found one, delete corresponding entries in access_type */
1222 if (tm_collect_context(tm, ctxt_iter, curr, begin_time, end_time, gva) == -1) {
1223 TM_ERR(tm->ginfo,GC,"ERROR collecting context\n");
1229 /* if context list (hash chain) is now empty, remove the hash entry */
1230 if (list_empty(chain_list)) {
1231 v3_htable_iter_remove(ctxt_iter, 0);
1232 (tm->addr_ctxt_entries)--;
1234 v3_htable_iter_advance(ctxt_iter);
1237 /* give the CPU away NONONO NEVER YIELD WHILE HOLDING A LOCK */
1245 tm_hash_gc (struct v3_trans_mem * tm)
1247 addr_t irqstate, irqstate2;
1249 uint64_t begin_time, end_time, tmoff;
1251 struct v3_tm_state * tms = NULL;
1252 struct hashtable_iter * ctxt_iter = NULL;
1254 tms = (struct v3_tm_state *)v3_get_extension_state(tm->ginfo->vm_info, "trans_mem");
1256 TM_ERR(tm->ginfo,GC,"could not alloc tms\n");
1260 TM_DBG(tm->ginfo,GC,"beginning garbage collection\n");
1261 TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (pre)\n", (int)v3_htable_count(tm->addr_ctxt));
1262 TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (pre)\n", (int)v3_htable_count(tm->access_type));
1264 tmoff = (tms->cores_active == 0);
1266 lt_copy = V3_Malloc(sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1268 TM_ERR(tm->ginfo,GC,"Could not allocate space for lt_copy\n");
1272 memset(lt_copy, 0, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1274 rdtscll(begin_time);
1276 /* lt_copy holds the last transaction number for each core */
1277 irqstate = v3_lock_irqsave(tm_global_state->lock);
1278 memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1279 v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1281 /* lock both hashes */
1282 irqstate = v3_lock_irqsave(tm->addr_ctxt_lock);
1283 irqstate2 = v3_lock_irqsave(tm->access_type_lock);
1285 /* loop over hash entries in addr_ctxt */
1286 ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1288 TM_ERR(tm->ginfo,GC,"could not create htable iterator\n");
1293 /* we check each address stored in the hash */
1294 while (ctxt_iter->entry) {
1295 /* NOTE: this call advances the hash iterator */
1296 if (tm_collect_all_contexts(tm, ctxt_iter, tmoff, lt_copy, &begin_time, &end_time) == -1) {
1303 v3_destroy_htable_iter(ctxt_iter);
1306 v3_unlock_irqrestore(tm->access_type_lock, irqstate);
1307 v3_unlock_irqrestore(tm->addr_ctxt_lock, irqstate2);
1312 TM_ERR(tm->ginfo,GC,"garbage collection failed, time spent: %d cycles\n", (int)(end_time - begin_time));
1314 TM_DBG(tm->ginfo,GC,"ended garbage collection succesfuly, time spent: %d cycles\n", (int)(end_time - begin_time));
1317 TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (post)\n", (int)v3_htable_count(tm->addr_ctxt));
1318 TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (post)\n", (int)v3_htable_count(tm->access_type));
1324 /* TODO: break out the for loops in these functions */
1326 tm_update_ctxt_list (struct v3_trans_mem * tm,
1330 struct list_head * hash_list)
1332 struct hash_chain * curr = NULL;
1333 struct hash_chain * tmp = NULL;
1334 uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1339 list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1343 for (i = 0; i < num_cores; i++) {
1344 if (curr->curr_lt[i] != lt_copy[i]) {
1358 struct hash_chain * new_l = V3_Malloc(sizeof(struct hash_chain));
1361 TM_ERR(tm->ginfo,HASH,"Could not allocate new list\n");
1365 memset(new_l, 0, sizeof(struct hash_chain));
1367 new_l->curr_lt = lt_copy;
1369 list_add_tail(&(new_l->lt_node), hash_list);
1372 for (core_id = 0; core_id < num_cores; core_id++) {
1373 struct v3_tm_access_type * type;
1374 struct v3_ctxt_tuple tup;
1375 tup.gva = (void*)gva;
1376 tup.core_id = (void*)core_id;
1377 tup.core_lt = (void*)lt_copy[core_id];
1380 key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1384 type = (struct v3_tm_access_type *)HTABLE_SEARCH(tm->access_type, key);
1389 type = V3_Malloc(sizeof(struct v3_tm_access_type));
1392 TM_ERR(tm->ginfo,HASH,"could not allocate type access struct\n");
1404 if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1405 TM_ERR(tm->ginfo,HASH,"problem inserting new mem access in htable\n");
1408 (tm->access_type_entries)++;
1416 /* no entry in addr-ctxt yet, create one */
1418 tm_create_ctxt_key (struct v3_trans_mem * tm,
1423 struct list_head * hash_list = NULL;
1424 struct hash_chain * new_l = NULL;
1425 uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1427 hash_list = (struct list_head *)V3_Malloc(sizeof(struct list_head));
1430 TM_ERR(tm->ginfo,HASH,"Problem allocating hash_list\n");
1434 INIT_LIST_HEAD(hash_list);
1436 new_l = V3_Malloc(sizeof(struct hash_chain));
1439 TM_ERR(tm->ginfo,HASH,"Problem allocating hash_chain\n");
1443 memset(new_l, 0, sizeof(struct hash_chain));
1445 new_l->curr_lt = lt_copy;
1447 /* add the context to the hash chain */
1448 list_add_tail(&(new_l->lt_node), hash_list);
1450 if (!(HTABLE_INSERT(tm->addr_ctxt, gva, hash_list))) {
1451 TM_ERR(tm->ginfo,HASH CHAIN,"problem inserting new chain into hash\n");
1455 (tm->addr_ctxt_entries)++;
1458 /* TODO: we need a way to unwind and deallocate for all cores on failure here */
1459 for (core_id = 0; core_id < num_cores; core_id++) {
1460 struct v3_tm_access_type * type = NULL;
1461 struct v3_ctxt_tuple tup;
1462 tup.gva = (void*)gva;
1463 tup.core_id = (void*)core_id;
1464 tup.core_lt = (void*)lt_copy[core_id];
1467 type = V3_Malloc(sizeof(struct v3_tm_access_type));
1470 TM_ERR(tm->ginfo,HASH,"could not allocate access type struct\n");
1480 key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1482 if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1483 TM_ERR(tm->ginfo,HASH,"TM: problem inserting new mem access in htable\n");
1486 (tm->access_type_entries)++;
1492 list_del(&(new_l->lt_node));
1502 * called during MIME execution
1503 * record memory access in conflict logs
1504 * this locks the table during insertion
1507 tm_record_access (struct v3_trans_mem * tm,
1512 struct list_head * hash_list;
1516 num_cores = tm->ginfo->vm_info->num_cores;
1518 TM_DBG(tm->ginfo,REC,"recording addr %llx, addr-ctxt.cnt = %d, access-type.cnt = %d\n", (uint64_t)gva,
1519 (int)v3_htable_count(tm->addr_ctxt), (int)v3_htable_count(tm->access_type));
1520 //PrintDebug(tm->ginfo->vm_info, tm->ginfo,"\tWe think that addr-ctxt.cnt = %d, access-type.cnt = %d\n",(int)tm->addr_ctxt_entries,(int)tm->access_type_entries);
1522 lt_copy = V3_Malloc(sizeof(uint64_t)*num_cores);
1524 TM_ERR(tm->ginfo,REC,"Allocating array failed\n");
1528 memset(lt_copy, 0, sizeof(uint64_t)*num_cores);
1530 irqstate = v3_lock_irqsave(tm_global_state->lock);
1531 memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*num_cores);
1532 v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1534 if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1535 /* we haven't created a context list for this address yet, go do it */
1536 return tm_create_ctxt_key(tm, lt_copy, gva, write);
1539 /* we have a context list for this addres already, do we need to create a new context? */
1540 return tm_update_ctxt_list(tm, lt_copy, gva, write, hash_list);
1548 tm_prepare_cpuid (struct v3_vm_info * vm)
1551 V3_Print(vm, VCORE_NONE, "TM INIT | enabling RTM cap in CPUID\n");
1553 /* increase max CPUID function to 7 (extended feature flags enumeration) */
1554 v3_cpuid_add_fields(vm,0x0,
1561 /* do the same for AMD */
1562 v3_cpuid_add_fields(vm,0x80000000,
1563 0xffffffff, 0x80000007,
1569 /* enable RTM (CPUID.07H.EBX.RTM = 1) */
1570 v3_cpuid_add_fields(vm, 0x07, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1571 v3_cpuid_add_fields(vm, 0x80000007, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1576 init_trans_mem (struct v3_vm_info * vm,
1577 v3_cfg_tree_t * cfg,
1580 struct v3_tm_state * tms;
1582 PrintDebug(vm, VCORE_NONE, "Trans Mem. Init\n");
1584 tms = V3_Malloc(sizeof(struct v3_tm_state));
1586 PrintError(vm, VCORE_NONE, "Problem allocating v3_tm_state\n");
1590 memset(tms, 0, sizeof(struct v3_tm_state));
1592 if (v3_register_hypercall(vm, TM_KICKBACK_CALL, tm_handle_hcall, NULL) == -1) {
1593 PrintError(vm, VCORE_NONE, "TM could not register hypercall\n");
1597 v3_lock_init(&(tms->lock));
1599 tms->TM_MODE = TM_OFF;
1600 tms->cores_active = 0;
1602 uint64_t * lt = V3_Malloc(sizeof(uint64_t) * vm->num_cores);
1604 PrintError(vm, VCORE_NONE, "Problem allocating last_trans array\n");
1608 memset(lt, 0, sizeof(uint64_t) * vm->num_cores);
1611 for (i = 0; i < vm->num_cores; i++) {
1615 tms->last_trans = lt;
1618 tm_global_state = tms;
1620 tm_prepare_cpuid(vm);
1625 v3_lock_deinit(&(tms->lock));
1626 v3_remove_hypercall(vm, TM_KICKBACK_CALL);
1634 init_trans_mem_core (struct guest_info * core,
1638 struct v3_trans_mem * tm = V3_Malloc(sizeof(struct v3_trans_mem));
1640 TM_DBG(core,INIT, "Trans Mem. Core Init\n");
1643 TM_ERR(core,INIT, "Problem allocating TM state\n");
1647 memset(tm, 0, sizeof(struct v3_trans_mem));
1649 INIT_LIST_HEAD(&tm->trans_r_list);
1650 INIT_LIST_HEAD(&tm->trans_w_list);
1652 tm->addr_ctxt = v3_create_htable(0, tm_hash_fn, tm_eq_fn);
1653 if (!(tm->addr_ctxt)) {
1654 TM_ERR(core,INIT,"problem creating addr_ctxt\n");
1658 tm->access_type = v3_create_htable(0, tm_hash_buf_fn, tm_eq_buf_fn);
1659 if (!(tm->access_type)) {
1660 TM_ERR(core,INIT,"problem creating access_type\n");
1664 v3_lock_init(&(tm->addr_ctxt_lock));
1665 v3_lock_init(&(tm->access_type_lock));
1667 tm->TM_STATE = TM_NULL;
1668 tm->TM_MODE = TM_OFF;
1674 tm->access_type_entries = 0;
1675 tm->addr_ctxt_entries = 0;
1676 tm->dirty_instr_flag = 0;
1678 /* TODO: Cache Model */
1679 //tm->box = (struct cache_box *)V3_Malloc(sizeof(struct cache_box *));
1680 //tm->box->init = init_cache;
1681 //tm->box->init(sample_spec, tm->box);
1688 v3_free_htable(tm->addr_ctxt, 0, 0);
1696 deinit_trans_mem (struct v3_vm_info * vm, void * priv_data)
1698 struct v3_tm_state * tms = (struct v3_tm_state *)priv_data;
1700 if (v3_remove_hypercall(vm, TM_KICKBACK_CALL) == -1) {
1701 PrintError(vm, VCORE_NONE, "Problem removing TM hypercall\n");
1705 v3_lock_deinit(&(tms->lock));
1716 deinit_trans_mem_core (struct guest_info * core,
1720 struct v3_trans_mem * tm = (struct v3_trans_mem *)core_data;
1721 struct hashtable_iter * ctxt_iter = NULL;
1723 v3_clear_tm_lists(tm);
1725 if (tm->staging_page) {
1726 TM_ERR(core,DEINIT CORE,"WARNING: staging page not freed!\n");
1729 ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1731 TM_DBG(core,DEINIT_CORE,"could not create htable iterator\n");
1735 /* delete all context entries for each hashed address */
1736 while (ctxt_iter->entry) {
1737 struct hash_chain * tmp;
1738 struct hash_chain * curr;
1739 struct list_head * chain_list;
1742 gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1743 chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1745 /* delete the context */
1746 list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1747 tm_del_stale_ctxt(curr);
1750 v3_htable_iter_advance(ctxt_iter);
1753 v3_destroy_htable_iter(ctxt_iter);
1755 /* we've already deleted the values in this one */
1756 v3_free_htable(tm->addr_ctxt, 0, 0);
1758 /* KCH WARNING: we may not want to free access type values here */
1759 v3_free_htable(tm->access_type, 1, 0);
1761 v3_lock_deinit(&(tm->addr_ctxt_lock));
1762 v3_lock_deinit(&(tm->access_type_lock));
1772 static struct v3_extension_impl trans_mem_impl = {
1773 .name = "trans_mem",
1775 .vm_init = init_trans_mem,
1776 .vm_deinit = deinit_trans_mem,
1777 .core_init = init_trans_mem_core,
1778 .core_deinit = deinit_trans_mem_core,
1783 register_extension(&trans_mem_impl);
1787 * tms->on => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1788 * tms->off => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1791 tm_handle_xend (struct guest_info * core,
1792 struct v3_trans_mem * tm)
1794 rdtscll(tm->exit_time);
1796 /* XEND should raise a GPF when RTM mode is not on */
1797 if (tm->TM_MODE != TM_ON) {
1798 TM_ERR(core, UD, "Encountered XEND while not in a transactional region\n");
1799 v3_free_staging_page(tm);
1801 v3_clear_tm_lists(tm);
1802 v3_raise_exception(core, GPF_EXCEPTION);
1806 /* Our transaction finished! */
1807 /* Copy over data from the staging page */
1808 TM_DBG(core, UD,"Copying data from our staging page back into 'real' memory\n");
1810 if (commit_list(core, tm) == -1) {
1811 TM_ERR(core,UD,"error commiting tm list to memory\n");
1815 TM_DBG(core,UD,"Freeing staging page and internal data structures\n");
1817 // Free the staging page
1818 if (v3_free_staging_page(tm) == -1) {
1819 TM_ERR(core,XEND,"couldnt free staging page\n");
1823 // clear vtlb, as it may still contain our staging page
1824 if (v3_clr_vtlb(core) == -1) {
1825 TM_ERR(core,XEND,"couldnt clear vtlb\n");
1830 v3_clear_tm_lists(tm);
1832 /* Set the state and advance the RIP */
1833 TM_DBG(core,XEND,"advancing rip to %llx\n", core->rip + XEND_INSTR_LEN);
1834 core->rip += XEND_INSTR_LEN;
1838 // time to garbage collect
1840 if (tm_hash_gc(tm) == -1) {
1841 TM_ERR(core,XEND,"could not gc!\n");
1850 * tms->on => handle our abort code, handle_trans_abort will clear necessary state
1851 * tms->off => handle our abort code, handle_trans_abort will clear necessary state
1854 tm_handle_xabort (struct guest_info * core,
1855 struct v3_trans_mem * tm,
1860 // we must reflect the immediate back into EAX 31:24
1861 reason = *(uint8_t*)(instr+2);
1863 /* TODO: this probably needs to move somewhere else */
1864 rdtscll(tm->exit_time);
1866 // Error checking! make sure that we have gotten here in a legitimate manner
1867 if (tm->TM_MODE != TM_ON) {
1868 TM_DBG(core, UD, "We got here while not in a transactional core!\n");
1869 v3_raise_exception(core, UD_EXCEPTION);
1872 TM_DBG(core,UD,"aborting\n");
1874 if (tm->TM_STATE != TM_NULL) {
1875 v3_restore_dirty_instr(core);
1879 v3_handle_trans_abort(core, TM_ABORT_XABORT, reason);
1886 * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1887 * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1890 tm_handle_xbegin (struct guest_info * core,
1891 struct v3_trans_mem * tm,
1894 sint32_t rel_addr = 0;
1895 uint8_t out_of_bounds = 0;
1896 uint8_t in_compat_no_long = 0;
1898 if (tm->TM_MODE == TM_ON) {
1899 /* TODO: this is actually an indication of nesting, we'll fix this later */
1900 TM_ERR(core,UD,"We don't support nested transactions yet!\n");
1901 v3_raise_exception(core, UD_EXCEPTION);
1905 // Save the fail_call address (first 2 bytes = opcode, last 4 = fail call addr)
1906 rel_addr = *(sint32_t*)(instr+2);
1908 /* raise a GPF if we're trying to set a fail call outside of code segment */
1909 in_compat_no_long = (core->cpu_mode == LONG_32_COMPAT) || ((struct efer_64*)&(core->ctrl_regs.efer))->lma == 0;
1910 out_of_bounds = (core->rip + rel_addr > core->segments.cs.base + core->segments.cs.limit ||
1911 core->rip + rel_addr < core->segments.cs.base);
1913 if (in_compat_no_long && out_of_bounds) {
1914 v3_raise_exception(core, GPF_EXCEPTION);
1918 /* TODO: also raise GPF if we're in long mode and failcall isn't canonical */
1920 /* TODO: put this elsewhere */
1921 rdtscll(tm->entry_time);
1922 tm->entry_exits = core->num_exits;
1924 /* set the tm_mode for this core */
1927 TM_DBG(core,UD,"Set the system in TM Mode, save fallback address");
1930 tm->fail_call = core->rip + XBEGIN_INSTR_LEN + rel_addr;
1932 TM_DBG(core,UD,"we set fail_call to %llx, rip is %llx, rel_addr is %x", (uint64_t)tm->fail_call,(uint64_t)core->rip,rel_addr);
1934 /* flush the shadow page tables */
1935 TM_DBG(core,UD,"Throwing out the shadow table");
1938 // Increase RIP, ready to go to next instruction
1939 core->rip += XBEGIN_INSTR_LEN;
1946 * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1947 * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1950 tm_handle_xtest (struct guest_info * core,
1951 struct v3_trans_mem * tm)
1953 struct rflags * rf = (struct rflags*)&(core->ctrl_regs.rflags);
1955 // if we are in tm mode, set zf to 0, otherwise 1
1956 if (tm->TM_MODE == TM_ON) {
1968 core->rip += XTEST_INSTR_LEN;
1975 * XBEGIN c7 f8 rel32
1980 tm_handle_ud (struct guest_info * core)
1982 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
1983 uchar_t instr[INSTR_BUF_SZ];
1984 uint8_t byte1, byte2, byte3;
1986 tm_read_instr(core, (addr_t)core->rip, instr, INSTR_BUF_SZ);
1988 byte1 = *(uint8_t *)((addr_t)instr);
1989 byte2 = *(uint8_t *)((addr_t)instr + 1);
1990 byte3 = *(uint8_t *)((addr_t)instr + 2);
1993 if (byte1 == 0xc7 && byte2 == 0xf8) { /* third byte is an immediate */
1995 TM_DBG(core,UD,"Encountered Haswell-specific XBEGIN %x %x %d at %llx", byte1, byte2, byte3, (uint64_t)core->rip);
1997 if (tm_handle_xbegin(core, tm, instr) == -1) {
1998 TM_ERR(core, UD, "Problem handling XBEGIN\n");
2002 } else if (byte1 == 0xc6 && byte2 == 0xf8) { /* third byte is an immediate */
2004 TM_DBG(core, UD, "Encountered Haswell-specific XABORT %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
2006 if (tm_handle_xabort(core, tm, instr) == -1) {
2007 TM_ERR(core, UD, "Problem handling XABORT\n");
2011 } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd5) {
2013 TM_DBG(core, UD, "Encountered Haswell-specific XEND %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
2015 if (tm_handle_xend(core, tm) == -1) {
2016 TM_ERR(core, UD, "Problem handling XEND\n");
2021 } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd6) { /* third byte is an immediate */
2023 TM_DBG(core,UD,"Encountered Haswell-specific XTEST %x %x %x at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
2025 if (tm_handle_xtest(core, tm) == -1) {
2026 TM_ERR(core, UD, "Problem handling XTEST\n");
2032 /* oh no, this is still unknown, pass the error back to the guest! */
2033 TM_DBG(core,UD,"Encountered:%x %x %x\n", byte1, byte2, byte3);
2034 v3_raise_exception(core, UD_EXCEPTION);
2042 v3_tm_handle_exception (struct guest_info * info,
2045 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2048 TM_ERR(info,ERR,"TM extension state not found\n");
2052 switch (exit_code) {
2053 /* any of these exceptions should abort current transactions */
2054 case SVM_EXIT_EXCP6:
2055 if (tm_handle_ud(info) == -1) {
2059 case SVM_EXIT_EXCP0:
2060 if (tm->TM_MODE != TM_ON) {
2061 v3_raise_exception(info, DE_EXCEPTION);
2064 TM_DBG(info,EXCP,"aborting due to DE exception\n");
2065 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2068 case SVM_EXIT_EXCP1:
2069 if (tm->TM_MODE != TM_ON) {
2070 v3_raise_exception(info, DB_EXCEPTION);
2073 TM_DBG(info,EXCP,"aborting due to DB exception\n");
2074 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2077 case SVM_EXIT_EXCP3:
2078 if (tm->TM_MODE != TM_ON) {
2079 v3_raise_exception(info, BP_EXCEPTION);
2082 TM_DBG(info,EXCP,"aborting due to BP exception\n");
2083 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2086 case SVM_EXIT_EXCP4:
2087 if (tm->TM_MODE != TM_ON) {
2088 v3_raise_exception(info, OF_EXCEPTION);
2091 TM_DBG(info,EXCP,"aborting due to OF exception\n");
2092 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2095 case SVM_EXIT_EXCP5:
2096 if (tm->TM_MODE != TM_ON) {
2097 v3_raise_exception(info, BR_EXCEPTION);
2100 TM_DBG(info,EXCP,"aborting due to BR exception\n");
2101 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2104 case SVM_EXIT_EXCP7:
2105 if (tm->TM_MODE != TM_ON) {
2106 v3_raise_exception(info, NM_EXCEPTION);
2109 TM_DBG(info,EXCP,"aborting due to NM exception\n");
2110 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2113 case SVM_EXIT_EXCP10:
2114 if (tm->TM_MODE != TM_ON) {
2115 v3_raise_exception(info, TS_EXCEPTION);
2118 TM_DBG(info,EXCP,"aborting due to TS exception\n");
2119 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2122 case SVM_EXIT_EXCP11:
2123 if (tm->TM_MODE != TM_ON) {
2124 v3_raise_exception(info, NP_EXCEPTION);
2127 TM_DBG(info,EXCP,"aborting due to NP exception\n");
2128 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2131 case SVM_EXIT_EXCP12:
2132 if (tm->TM_MODE != TM_ON) {
2133 v3_raise_exception(info, SS_EXCEPTION);
2136 TM_DBG(info,EXCP,"aborting due to SS exception\n");
2137 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2140 case SVM_EXIT_EXCP13:
2141 if (tm->TM_MODE != TM_ON) {
2142 v3_raise_exception(info, GPF_EXCEPTION);
2145 TM_DBG(info,EXCP,"aborting due to GPF exception\n");
2146 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2149 case SVM_EXIT_EXCP16:
2150 if (tm->TM_MODE != TM_ON) {
2151 v3_raise_exception(info, MF_EXCEPTION);
2154 TM_DBG(info,EXCP,"aborting due to MF exception\n");
2155 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2158 case SVM_EXIT_EXCP17:
2159 if (tm->TM_MODE != TM_ON) {
2160 v3_raise_exception(info, AC_EXCEPTION);
2163 TM_DBG(info,EXCP,"aborting due to AC exception\n");
2164 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2167 case SVM_EXIT_EXCP19:
2168 if (tm->TM_MODE != TM_ON) {
2169 v3_raise_exception(info, XF_EXCEPTION);
2172 TM_DBG(info,EXCP,"aborting due to XF exception\n");
2173 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2177 TM_DBG(info,EXCP,"exception # %d\n", (int)exit_code - 0x40);
2185 v3_tm_set_excp_intercepts (vmcb_ctrl_t * ctrl_area)
2187 ctrl_area->exceptions.de = 1; // 0 : divide by zero
2188 ctrl_area->exceptions.db = 1; // 1 : debug
2189 ctrl_area->exceptions.bp = 1; // 3 : breakpoint
2190 ctrl_area->exceptions.of = 1; // 4 : overflow
2191 ctrl_area->exceptions.br = 1; // 5 : bound range
2192 ctrl_area->exceptions.ud = 1; // 6 : undefined opcode
2193 ctrl_area->exceptions.nm = 1; // 7 : device not available
2194 ctrl_area->exceptions.ts = 1; // 10 : invalid tss
2195 ctrl_area->exceptions.np = 1; // 11 : segment not present
2196 ctrl_area->exceptions.ss = 1; // 12 : stack
2197 ctrl_area->exceptions.gp = 1; // 13 : general protection
2198 ctrl_area->exceptions.mf = 1; // 16 : x87 exception pending
2199 ctrl_area->exceptions.ac = 1; // 17 : alignment check
2200 ctrl_area->exceptions.xf = 1; // 19 : simd floating point
2204 extern void v3_stgi();
2205 extern void v3_clgi();
2207 /* 441-tm: if we are in TM mode, we need to check for any interrupts here,
2208 * and if there are any, need to do some aborting! Make sure not to die here
2209 * if we are already 'aborting', this results in infiloop
2212 v3_tm_check_intr_state (struct guest_info * info,
2213 vmcb_ctrl_t * guest_ctrl,
2214 vmcb_saved_state_t * guest_state)
2217 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2220 TM_ERR(info,INTR,"TM extension state not found\n");
2224 if ((tm->TM_MODE == TM_ON) &&
2225 (tm->TM_ABORT != 1)) {
2227 if (guest_ctrl->guest_ctrl.V_IRQ ||
2228 guest_ctrl->EVENTINJ.valid) {
2230 // We do indeed have pending interrupts
2233 TM_DBG(info,INTR,"we have a pending interrupt\n");
2235 v3_handle_trans_abort(info, TM_ABORT_UNSPECIFIED, 0);
2237 // Copy new RIP state into arch dependent structure
2238 guest_state->rip = info->rip;
2240 //TM_DBG(info,INTR,"currently guest state rip is %llx\n",(uint64_t)guest_state->rip);
2250 v3_tm_handle_pf_64 (struct guest_info * info,
2251 pf_error_t error_code,
2253 addr_t * page_to_use)
2255 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2256 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2259 TM_ERR(info,HANDLE_PF, "couldn't get tm core state\n");
2264 TM_ERR(info,HANDLE_PF, "couldn't get tm global state\n");
2268 if ((tms->TM_MODE == TM_ON) &&
2269 (error_code.user == 1)) {
2271 TM_DBG(info,PF,"Core reporting in, got a #PF (tms->mode is %d)\n", tms->TM_MODE);
2273 *page_to_use = v3_handle_trans_mem_fault(info, fault_addr, error_code);
2275 if (*page_to_use == ERR_TRANS_FAULT_FAIL){
2276 TM_ERR(info,HANDLE_PF, "could not handle transaction page fault\n");
2280 if ((tm->TM_MODE == TM_ON) &&
2281 (tm->staging_page == NULL)) {
2283 tm->staging_page = V3_AllocPages(1);
2285 if (!(tm->staging_page)) {
2286 TM_ERR(info,MMU,"Problem allocating staging page\n");
2290 TM_DBG(info,MMU,"Created staging page at %p\n", (void *)tm->staging_page);
2299 v3_tm_handle_usr_tlb_miss (struct guest_info * info,
2300 pf_error_t error_code,
2304 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2306 /* TLB miss from user */
2307 if ((tm->TM_MODE == TM_ON) &&
2308 (error_code.user == 1)) {
2310 if (page_to_use > TRANS_FAULT_OK) {
2311 TM_DBG(info,MMU, "Using alternate page at: %llx\n", (uint64_t)page_to_use);
2312 *shadow_pa = page_to_use;
2321 v3_tm_handle_read_fault (struct guest_info * info,
2322 pf_error_t error_code,
2323 pte64_t * shadow_pte)
2325 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2326 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2328 // If we are about to read, make it read only
2329 if ((tms->TM_MODE == TM_ON) &&
2330 (tm->TM_STATE == TM_EXEC) &&
2331 (error_code.write == 0) &&
2332 (error_code.user == 1)) {
2334 TM_DBG(info,MMU, "Flagging the page read only\n");
2335 shadow_pte->writable = 0;
2341 v3_tm_decode_rtm_instrs (struct guest_info * info,
2343 struct x86_instr * instr)
2345 uint8_t byte1, byte2, byte3;
2346 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2348 if (tm->TM_MODE == TM_ON) {
2350 byte1 = *(uint8_t *)(instr_ptr);
2351 byte2 = *(uint8_t *)(instr_ptr + 1);
2352 byte3 = *(uint8_t *)(instr_ptr + 2);
2354 if (byte1 == 0xc7 &&
2355 byte2 == 0xf8) { /* third byte is an immediate */
2357 TM_DBG(info, DECODE,"Decoding XBEGIN %x %x %d\n", byte1, byte2, byte3);
2358 instr->instr_length = 6;
2361 } else if (byte1 == 0xc6 &&
2362 byte2 == 0xf8) { /* third byte is an immediate */
2364 TM_DBG(info, DECODE, "Decoding XABORT %x %x %d\n", byte1, byte2, byte3);
2365 instr->instr_length = 3;
2368 } else if (byte1 == 0x0f &&
2372 TM_DBG(info, DECODE, "Decoding XEND %x %x %x\n", byte1, byte2, byte3);
2373 instr->instr_length = 3;