2 * This file is part of the Palacios Virtual Machine Monitor developed
3 * by the V3VEE Project with funding from the United States National
4 * Science Foundation and the Department of Energy.
6 * The V3VEE Project is a joint project between Northwestern University
7 * and the University of New Mexico. You can find out more at
10 * Copyright (c) 2012, NWU EECS 441 Transactional Memory Team
11 * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org>
12 * All rights reserved.
14 * Author: Maciek Swiech <dotpyfe@u.northwestern.edu>
15 * Kyle C. Hale <kh@u.northwestern.edu>
16 * Marcel Flores <marcel-flores@u.northwestern.edu>
17 * Zachary Bischof <zbischof@u.northwestern.edu>
20 * This is free software. You are permitted to use,
21 * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
24 #include <palacios/vmm_mem.h>
25 #include <palacios/vmm.h>
26 #include <palacios/vmcb.h>
27 #include <palacios/vmm_decoder.h>
28 #include <palacios/vm_guest_mem.h>
29 #include <palacios/vmm_ctrl_regs.h>
30 #include <palacios/vmm_paging.h>
31 #include <palacios/vmm_direct_paging.h>
32 #include <palacios/svm.h>
33 #include <palacios/svm_handler.h>
34 #include <palacios/vmm_excp.h>
35 #include <palacios/vmm_extensions.h>
36 #include <palacios/vmm_sprintf.h>
37 #include <palacios/vmm_hashtable.h>
39 #include <extensions/trans_mem.h>
40 #include <extensions/tm_util.h>
42 #if !V3_CONFIG_DEBUG_TM_FUNC
44 #define PrintDebug(fmt, args...)
48 * - save/restore register state on XBEGIN/XABORT
49 * - put status codes in RAX
50 * - Implement proper exceptions for failed XBEGINS etc.
53 /* this includes a mov to rax */
54 static const char * vmmcall_bytes = "\x48\xc7\xc0\x37\x13\x00\x00\x0f\x01\xd9";
55 static struct v3_tm_state * tm_global_state = NULL;
59 tm_translate_rip (struct guest_info * core, addr_t * target)
62 if (core->mem_mode == PHYSICAL_MEM) {
64 get_addr_linear(core, core->rip, &(core->segments.cs)),
66 } else if (core->mem_mode == VIRTUAL_MEM) {
68 get_addr_linear(core, core->rip, &(core->segments.cs)),
76 tm_read_instr (struct guest_info * core,
82 if (core->mem_mode == PHYSICAL_MEM) {
83 v3_read_gpa_memory(core,
84 get_addr_linear(core, addr , &(core->segments.cs)),
89 v3_read_gva_memory(core,
90 get_addr_linear(core, addr, &(core->segments.cs)),
99 tm_handle_decode_fail (struct guest_info * core)
104 tm_translate_rip(core, &cur_rip);
106 #ifdef V3_CONFIG_DEBUG_TM_FUNC
107 v3_dump_mem((uint8_t *)cur_rip, INSTR_BUF_SZ);
110 /* If we can't decode an instruction, we treat it as a catastrophic event, aborting *everyone* */
111 for (core_num = 0; core_num < core->vm_info->num_cores; core_num++ ) {
112 struct v3_trans_mem * remote_tm;
114 /* skip local core */
115 if (core_num == core->vcpu_id) {
119 remote_tm = v3_get_ext_core_state(&(core->vm_info->cores[core_num]), "trans_mem");
121 TM_ERR(core,DECODE,"couldnt get remote_tm\n");
125 /* skip cores who aren't in transacitonal context */
126 if (remote_tm->TM_MODE == TM_OFF) {
130 TM_DBG(core,DECODE,"setting abort for core %d due to decoding error\n", core_num);
131 remote_tm->TM_ABORT = 1;
138 /* special casing for control-flow instructions
139 * returns 1 if we need to jump
140 * returns -1 on error
143 tm_handle_ctrl_flow (struct guest_info * core,
144 struct v3_trans_mem * tm,
145 addr_t * instr_location,
146 struct x86_instr * struct_instr)
149 /* special casing for control flow instructions */
150 struct rflags * flags = (struct rflags *)&(core->ctrl_regs.rflags);
154 switch (struct_instr->op_type) {
157 TM_DBG(core,DECODE, "!!++ JLE\n");
158 to_jmp = (flags->zf || flags->sf != flags->of);
159 offset = struct_instr->dst_operand.operand;
161 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
163 tm->to_branch = to_jmp;
166 TM_DBG(core,DECODE,"!!++ JAE\n");
167 to_jmp = (flags->cf == 0);
168 offset = struct_instr->dst_operand.operand;
170 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
172 tm->to_branch = to_jmp;
175 TM_DBG(core,DECODE,"!!++ JMP\n");
177 offset = struct_instr->dst_operand.operand;
179 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
181 tm->to_branch = to_jmp;
184 TM_DBG(core,DECODE,"!!++ JNZ\n");
185 to_jmp = (flags->zf == 0);
186 offset = struct_instr->dst_operand.operand;
188 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
190 tm->to_branch = to_jmp;
193 TM_DBG(core,DECODE,"!!++ JL\n");
194 to_jmp = (flags->sf != flags->of);
195 offset = struct_instr->dst_operand.operand;
197 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
199 tm->to_branch = to_jmp;
202 TM_DBG(core,DECODE,"!!++ JNS\n");
203 to_jmp = (flags->sf == 0);
204 offset = struct_instr->dst_operand.operand;
206 *instr_location = core->rip + tm->cur_instr_len + (to_jmp ? offset : 0);
208 tm->to_branch = to_jmp;
211 *instr_location = core->rip + tm->cur_instr_len;
220 * called inside #UD and VMMCALL handlers
221 * only affects global state in case of quix86 fall over
222 * -> set other cores TM_ABORT to 1, return -2
225 v3_store_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
227 struct x86_instr struct_instr;
228 uchar_t cur_instr[INSTR_BUF_SZ];
229 addr_t instr_location;
231 // Fetch the current instruction
232 tm_read_instr(core, core->rip, cur_instr, INSTR_BUF_SZ);
234 TM_DBG(core,STORE,"storing next instruction, current rip: %llx\n", (uint64_t)core->rip);
236 /* Attempt to decode current instruction to determine its length */
237 if (v3_decode(core, (addr_t)cur_instr, &struct_instr) == ERR_DECODE_FAIL) {
239 TM_ERR(core,Error,"Could not decode currrent instruction (at %llx)\n", (uint64_t)core->rip);
241 /* this will attempt to abort all the remote cores */
242 if (tm_handle_decode_fail(core) == -1) {
243 TM_ERR(core,Error,"Could not handle failed decode\n");
247 /* we need to trigger a local abort */
248 return ERR_STORE_MUST_ABORT;
252 /* we can't currently handle REP prefixes, abort */
253 if (struct_instr.op_type != V3_INVALID_OP &&
254 (struct_instr.prefixes.repne ||
255 struct_instr.prefixes.repnz ||
256 struct_instr.prefixes.rep ||
257 struct_instr.prefixes.repe ||
258 struct_instr.prefixes.repz)) {
260 TM_ERR(core,DECODE,"Encountered REP prefix, aborting\n");
261 return ERR_STORE_MUST_ABORT;
264 tm->cur_instr_len = struct_instr.instr_length;
266 /* handle jump instructions */
267 tm_handle_ctrl_flow(core, tm, &instr_location, &struct_instr);
269 /* save next 10 bytes after current instruction, we'll put vmmcall here */
270 tm_read_instr(core, instr_location, cur_instr, INSTR_INJECT_LEN);
272 /* store the next instruction and its length in info */
273 memcpy(tm->dirty_instr, cur_instr, INSTR_INJECT_LEN);
280 v3_overwrite_next_instr (struct guest_info * core, struct v3_trans_mem * tm)
285 tm->clobbered_rax = (core->vm_regs).rax;
289 /* we can't currently handle instructions that span page boundaries */
290 if ((ptr + tm->cur_instr_len) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
291 TM_ERR(core,OVERWRITE,"emulated instr straddling page boundary\n");
295 ptr = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
297 if ((ptr + INSTR_INJECT_LEN) % PAGE_SIZE_4KB < (ptr % PAGE_SIZE_4KB)) {
298 TM_ERR(core,OVERWRITE,"injected instr straddling page boundary\n");
302 if (v3_gva_to_hva(core,
303 get_addr_linear(core, ptr, &(core->segments.cs)),
306 TM_ERR(core,Error,"Calculating next rip hva failed\n");
310 TM_DBG(core,REPLACE,"Replacing next instruction at addr %llx with vmm hyper call, len=%d\n",
311 core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0), (int)tm->cur_instr_len );
313 /* Copy VMM call into the memory address of beginning of next instruction (ptr) */
314 memcpy((char*)ptr, vmmcall_bytes, INSTR_INJECT_LEN);
316 /* KCH: flag that we've dirtied an instruction, and store its host address */
317 tm->dirty_instr_flag = 1;
318 tm->dirty_gva = core->rip + tm->cur_instr_len + (tm->to_branch ? tm->offset : 0);
328 * this should only be called if TM_STATE == TM_NULL, additionally we check if our dirtied flag if set
331 v3_restore_dirty_instr (struct guest_info * core)
333 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
335 /* Restore next instruction, transition to IFETCH state */
336 TM_DBG(core,RESTORE,"Restoring next instruction.\n");
338 /* check if we've actually done an instruction overwrite */
339 if (!(tm->dirty_instr_flag)) {
340 TM_DBG(core,RESTORE,"nothing to restore here...\n");
344 // Actually restore instruction
345 memcpy((char*)tm->dirty_hva, tm->dirty_instr, INSTR_INJECT_LEN);
348 (core->vm_regs).rax = tm->clobbered_rax;
351 TM_DBG(core,RESTORE,"RIP in vmmcall: %llx\n", core->rip);
352 core->rip = tm->dirty_gva;
355 tm->dirty_instr_flag = 0;
358 memset(tm->dirty_instr, 0, 15);
360 TM_DBG(core,RESTORE,"RIP after scooting it back up: %llx\n", core->rip);
367 tm_handle_fault_ifetch (struct guest_info * core,
368 struct v3_trans_mem * tm)
372 TM_DBG(core,IFETCH,"Page fault caused by IFETCH: rip is the same as faulting address, we must be at an ifetch.\n");
374 sto = v3_store_next_instr(core, tm);
376 if (sto == ERR_STORE_FAIL) {
377 TM_ERR(core,EXIT,"Could not store next instruction in transaction\n");
378 return ERR_TRANS_FAULT_FAIL;
379 } else if (sto == ERR_STORE_MUST_ABORT) {
380 TM_DBG(core,EXIT,"aborting for some reason\n");
381 v3_handle_trans_abort(core);
382 return TRANS_FAULT_OK;
385 if (v3_overwrite_next_instr(core, tm) == -1) {
386 TM_ERR(core,PF,"problem overwriting instruction\n");
387 return ERR_TRANS_FAULT_FAIL;
390 tm->TM_STATE = TM_EXEC;
392 return TRANS_FAULT_OK;
397 tm_handle_fault_read (struct guest_info * core,
398 struct v3_trans_mem * tm,
403 // This page fault was caused by a read to memory in the current instruction for a core in TM mode
404 TM_DBG(core,DATA,"Page fault caused by read.\n");
405 TM_DBG(core,PF,"Adding %p to read list and hash\n", (void*)fault_addr);
407 if (add_mem_op_to_list(&(tm->trans_r_list), fault_addr) == -1) {
408 TM_ERR(core,PF,"problem adding to list\n");
409 return ERR_TRANS_FAULT_FAIL;
412 if (tm_record_access(tm, error.write, fault_addr) == -1) {
413 TM_ERR(core,PF,"problem recording access\n");
414 return ERR_TRANS_FAULT_FAIL;
417 /* if we have previously written to this address, we need to update our
418 * staging page and map it in */
419 if (list_contains_guest_addr(&(tm->trans_w_list), fault_addr)) {
421 TM_DBG(core,PF,"Saw a read from something in the write list\n");
423 /* write the value from linked list to staging page */
424 if (stage_entry(tm, &(tm->trans_w_list), fault_addr) == -1) {
425 TM_ERR(core,PF, "could not stage entry!\n");
426 return ERR_TRANS_FAULT_FAIL;
429 /* Hand it the staging page */
430 return (addr_t)(tm->staging_page);
434 //Add it to the read set
435 addr_t shadow_addr = 0;
437 TM_DBG(core,PF,"Saw a read from a fresh address\n");
439 if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
440 TM_ERR(core,PF,"Could not translate gva to hva for transaction read\n");
441 return ERR_TRANS_FAULT_FAIL;
446 return TRANS_FAULT_OK;
451 tm_handle_fault_write (struct guest_info * core,
452 struct v3_trans_mem * tm,
457 addr_t virt_data_loc;
458 addr_t shadow_addr = 0;
460 TM_DBG(core,DATA,"Page fault cause by write\n");
461 TM_DBG(core,PF,"Adding %p to write list and hash\n", (void*)fault_addr);
463 if (add_mem_op_to_list(&(tm->trans_w_list), fault_addr) == -1) {
464 TM_ERR(core,WRITE,"could not add to list!\n");
465 return ERR_TRANS_FAULT_FAIL;
468 if (tm_record_access(tm, error.write, fault_addr) == -1) {
469 TM_ERR(core,WRITE,"could not record access!\n");
470 return ERR_TRANS_FAULT_FAIL;
473 if (v3_gva_to_hva(core, (uint64_t)fault_addr, &shadow_addr) == -1) {
474 TM_ERR(core,WRITE,"could not translate gva to hva for transaction read\n");
475 return ERR_TRANS_FAULT_FAIL;
478 // Copy existing values to the staging page, populating that field
479 // This avoids errors in optimized code such as ++, where the original
480 // value is not read, but simply incremented
481 data_loc = (void*)((addr_t)(tm->staging_page) + (shadow_addr % PAGE_SIZE_4KB));
483 if (v3_hpa_to_hva((addr_t)(data_loc), &virt_data_loc) == -1) {
484 TM_ERR(core,WRITE,"Could not convert address on staging page to virt addr\n");
485 return ERR_TRANS_FAULT_FAIL;
488 TM_DBG(core,WRITE,"\tValue being copied (core %d): %p\n", core->vcpu_id, *((void**)(virt_data_loc)));
489 //memcpy((void*)virt_data_loc, (void*)shadow_addr, sizeof(uint64_t));
490 *(uint64_t*)virt_data_loc = *(uint64_t*)shadow_addr;
492 return (addr_t)(tm->staging_page);
497 tm_handle_fault_extern_ifetch (struct guest_info * core,
498 struct v3_trans_mem * tm,
504 // system is in tm state, record the access
505 TM_DBG(core,IFETCH,"Page fault caused by IFETCH: we are not in TM, recording.\n");
507 sto = v3_store_next_instr(core,tm);
509 if (sto == ERR_STORE_FAIL) {
510 TM_ERR(core,Error,"Could not store next instruction in transaction\n");
511 return ERR_TRANS_FAULT_FAIL;
513 } else if (sto == ERR_STORE_MUST_ABORT) {
514 TM_ERR(core,IFETCH,"decode failed, going out of single stepping\n");
515 v3_handle_trans_abort(core);
516 return TRANS_FAULT_OK;
519 if (v3_overwrite_next_instr(core, tm) == -1) {
520 TM_ERR(core,IFETCH,"could not overwrite next instr!\n");
521 return ERR_TRANS_FAULT_FAIL;
524 tm->TM_STATE = TM_EXEC;
526 if (tm_record_access(tm, error.write, fault_addr) == -1) {
527 TM_ERR(core,IFETCH,"could not record access!\n");
528 return ERR_TRANS_FAULT_FAIL;
531 return TRANS_FAULT_OK;
536 tm_handle_fault_extern_access (struct guest_info * core,
537 struct v3_trans_mem * tm,
541 TM_DBG(core,PF_HANDLE,"recording access\n");
542 if (tm_record_access(tm, error.write, fault_addr) == -1) {
543 TM_ERR(core,PF_HANDLE,"could not record access!\n");
544 return ERR_TRANS_FAULT_FAIL;
547 return TRANS_FAULT_OK;
552 tm_handle_fault_tmoff (struct guest_info * core)
554 TM_DBG(core,PF_HANDLE, "in pf handler but noone is in tm mode anymore (core %d), i should try to eliminate hypercalls\n", core->vcpu_id);
556 if (v3_restore_dirty_instr(core) == -1) {
557 TM_ERR(core,PF_HANDLE,"could not restore dirty instr!\n");
558 return ERR_TRANS_FAULT_FAIL;
561 return TRANS_FAULT_OK;
567 * called from MMU - should mean at least tms->TM_MODE is on
569 * tm->on : ifetch -> store instr, overwrite instr
570 * r/w -> record hash, write log, store instr, overwrite instr
571 * tm->off: ifetch -> store instr, overwrite instr
572 * r/w -> record hash, store instr, overwrite instr
574 * returns ERR_TRANS_FAULT_FAIL on error
575 * TRANS_FAULT_OK when things are fine
576 * addr when we're passing back a staging page
580 v3_handle_trans_mem_fault (struct guest_info * core,
584 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
585 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
588 TM_ERR(core,ERROR,": coudln't get core state\n");
589 return ERR_TRANS_FAULT_FAIL;
593 TM_ERR(core,ERROR,": couldn't get vm trans_mem state\n");
594 return ERR_TRANS_FAULT_FAIL;
597 TM_DBG(core,PF,"PF handler core->mode : %d, system->mode : %d\n", tm->TM_MODE, tms->TM_MODE);
599 if ((tm->TM_MODE == TM_ON) &&
600 ((void *)fault_addr == (void *)(core->rip))) {
602 return tm_handle_fault_ifetch(core, tm);
604 } else if ((tm->TM_MODE == TM_ON) &&
605 (tm->TM_STATE == TM_EXEC) &&
606 (error.write == 0)) {
608 return tm_handle_fault_read(core, tm, fault_addr, error);
610 } else if ((tm->TM_MODE == TM_ON) &&
611 (tm->TM_STATE == TM_EXEC) &&
612 (error.write == 1)) {
614 return tm_handle_fault_write(core, tm, fault_addr, error);
617 } else if ((tms->TM_MODE == TM_ON) &&
618 ((void *)fault_addr == (void *)(core->rip))) {
620 return tm_handle_fault_extern_ifetch(core, tm, fault_addr, error);
622 } else if ((tms->TM_MODE == TM_ON) &&
623 (tm->TM_STATE == TM_EXEC)) {
625 return tm_handle_fault_extern_access(core, tm, fault_addr, error);
628 return tm_handle_fault_tmoff(core);
632 return TRANS_FAULT_OK;
637 tm_handle_hcall_tmoff (struct guest_info * core, struct v3_trans_mem * tm)
639 if (tm->TM_MODE == TM_ON) {
640 TM_ERR(core,EXIT,"we are in tm mode but system is not!\n");
641 return TRANS_HCALL_FAIL;
644 // we got to an exit when things were off!
645 TM_DBG(core,EXIT,"system is off, restore the instruction and go away\n");
647 if (v3_restore_dirty_instr(core) == -1) {
648 TM_ERR(core,HCALL,"could not restore dirty instr!\n");
649 return TRANS_HCALL_FAIL;
652 tm->TM_STATE = TM_NULL;
654 return TRANS_HCALL_OK;
659 tm_handle_hcall_dec_abort (struct guest_info * core,
660 struct v3_trans_mem * tm)
662 // only ever get here from TM DECODE
663 TM_DBG(core,EXIT,"we are in ABORT, call the abort handler\n");
666 v3_handle_trans_abort(core);
668 TM_DBG(core,EXIT,"RIP after abort: %p\n", ((void*)(core->rip)));
670 return TRANS_HCALL_OK;
675 tm_handle_hcall_ifetch_start (struct guest_info * core,
676 struct v3_trans_mem * tm)
678 tm->TM_STATE = TM_IFETCH;
680 TM_DBG(core,EXIT,"VMEXIT after TM_EXEC, blast away VTLB and go into TM_IFETCH\n");
682 // Finally, invalidate the shadow page table
683 v3_invalidate_shadow_pts(core);
685 return TRANS_HCALL_OK;
690 tm_check_list_conflict (struct guest_info * core,
691 struct v3_trans_mem * tm,
692 struct list_head * access_list,
695 struct mem_op * curr = NULL;
696 struct mem_op * tmp = NULL;
699 list_for_each_entry_safe(curr, tmp, access_list, op_node) {
701 conflict = tm_check_conflict(tm->ginfo->vm_info, curr->guest_addr, op_type, core->vcpu_id, tm->t_num);
703 if (conflict == ERR_CHECK_FAIL) {
705 TM_ERR(core,EXIT,"error checking for conflicts\n");
706 return TRANS_HCALL_FAIL;
708 } else if (conflict == CHECK_IS_CONFLICT) {
710 TM_DBG(core,EXIT,"we have a conflict, aborting\n");
711 v3_handle_trans_abort(core);
712 return CHECK_MUST_ABORT;
718 return TRANS_HCALL_OK;
723 tm_handle_hcall_check_conflicts (struct guest_info * core,
724 struct v3_trans_mem * tm)
728 TM_DBG(core,EXIT,"still TM_ON\n");
729 TM_DBG(core,EXIT,"checking for conflicts\n");
731 if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_w_list), OP_TYPE_WRITE)) == TRANS_HCALL_FAIL) {
732 return TRANS_HCALL_FAIL;
733 } else if (ret == CHECK_MUST_ABORT) {
734 return TRANS_HCALL_OK;
737 if ((ret = tm_check_list_conflict(core, tm, &(tm->trans_r_list), OP_TYPE_READ)) == TRANS_HCALL_FAIL) {
738 return TRANS_HCALL_FAIL;
739 } else if (ret == CHECK_MUST_ABORT) {
740 return TRANS_HCALL_OK;
743 tm->TM_STATE = TM_IFETCH;
745 return TRANS_HCALL_OK;
749 /* trans mem hypercall handler
752 * running mime (tm or tms on)
756 * check for conflicts
758 * abort (due to quix86)
763 tm_handle_hcall (struct guest_info * core,
764 unsigned int hcall_id,
767 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
768 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(core->vm_info, "trans_mem");
770 if (tms->TM_MODE == TM_OFF) {
771 return tm_handle_hcall_tmoff(core, tm);
774 // Previous instruction has finished, copy staging page back into linked list!
775 if (update_list(tm, &(tm->trans_w_list)) == -1) {
776 TM_ERR(core,HCALL,"could not update_list!\n");
777 return TRANS_HCALL_FAIL;
780 // Done handling previous instruction, must put back the next instruction, reset %rip and go back to IFETCH state
781 TM_DBG(core,EXIT,"saw VMEXIT, need to restore previous state and proceed\n");
783 if (v3_restore_dirty_instr(core) == -1) {
784 TM_ERR(core,HCALL,"could not restore dirty instr!\n");
785 return TRANS_HCALL_FAIL;
789 if (tm->TM_ABORT == 1 &&
790 tms->TM_MODE == TM_ON) {
792 return tm_handle_hcall_dec_abort(core, tm);
794 } else if (tm->TM_STATE == TM_EXEC) {
795 return tm_handle_hcall_ifetch_start(core, tm);
799 if (tm->TM_MODE == TM_ON &&
800 tms->TM_MODE == TM_ON) {
802 return tm_handle_hcall_check_conflicts(core, tm);
804 } else if (tm->TM_MODE == TM_OFF) {
805 TM_DBG(core,EXIT,"we are in TM_OFF\n");
808 return TRANS_HCALL_OK;
813 v3_tm_inc_tnum (struct v3_trans_mem * tm)
819 lt = tm_global_state->last_trans;
821 // grab global last_trans
822 irqstate = v3_lock_irqsave(tm_global_state->lock);
823 new_ctxt = ++(lt[tm->ginfo->vcpu_id]);
824 v3_unlock_irqrestore(tm_global_state->lock, irqstate);
828 TM_DBG(tm->ginfo,INC TNUM,"global state is |%d|%d|, my tnum is %d\n", (int)lt[0],
829 (int)lt[1], (int)tm->t_num);
831 if (new_ctxt != tm->t_num) {
832 TM_ERR(tm->ginfo,TM_INC_TNUM,"misaligned global and local context value\n");
841 v3_handle_trans_abort (struct guest_info * core)
843 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
845 // Free the staging page
846 if (v3_free_staging_page(tm) == -1) {
847 TM_ERR(core,ABORT,"problem freeing staging page\n");
851 // Clear the VTLB which still has our staging page in it
852 if (v3_clr_vtlb(core) == -1) {
853 TM_ERR(core,ABORT,"problem clearing vtlb\n");
858 v3_clear_tm_lists(tm);
860 TM_DBG(core,ABORT -- handler,"TM_MODE: %d | RIP: %llx | XABORT RIP: %llx\n", tm->TM_MODE, (uint64_t)core->rip, (uint64_t)tm->fail_call);
862 if (tm->TM_MODE == TM_ON) {
863 TM_DBG(core,ABORT,"Setting RIP to %llx\n", (uint64_t)tm->fail_call);
864 core->rip = tm->fail_call;
874 // time to garbage collect
875 if (tm_hash_gc(tm) == -1) {
876 TM_ERR(core,GC,"could not gc!\n");
885 tm_hash_fn (addr_t key)
887 return v3_hash_long(key, sizeof(void *));
892 tm_eq_fn (addr_t key1, addr_t key2)
894 return (key1 == key2);
899 tm_hash_buf_fn (addr_t key)
901 return v3_hash_long(key, sizeof(addr_t));
906 tm_eq_buf_fn(addr_t key1, addr_t key2)
908 return (key1 == key2);
912 /* this checks if the remote access was done on the same
913 * local transaction number as the current one */
915 tm_check_context (struct v3_vm_info * vm,
922 uint64_t core_id_sub;
923 struct v3_tm_access_type * type = NULL;
925 for (core_id_sub = 0; core_id_sub < vm->num_cores; core_id_sub++) {
926 struct v3_trans_mem * remote_tm;
930 /* skip the core that's doing the checking */
931 if (core_id_sub == core_num) {
935 remote_tm = v3_get_ext_core_state(&(vm->cores[core_id_sub]), "trans_mem");
937 PrintError(vm, VCORE_NONE, "Could not get ext core state for core %llu\n", core_id_sub);
938 return ERR_CHECK_FAIL;
941 buf[0] = (void *)gva;
942 buf[1] = (void *)core_id_sub;
943 buf[2] = (void *)curr_lt[core_id_sub];
945 key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
947 type = (struct v3_tm_access_type *)HTABLE_SEARCH(remote_tm->access_type, key);
951 if ( (op_type == OP_TYPE_WRITE && (type->w || type->r)) || // so basically if write?
952 (op_type != OP_TYPE_WRITE && type->w)) {
953 return CHECK_IS_CONFLICT;
958 return CHECK_NO_CONFLICT;
962 /* check all the contexts in the list for a conflict */
964 tm_check_all_contexts (struct v3_vm_info * vm,
965 struct list_head * hash_list,
971 struct hash_chain * curr = NULL;
972 struct hash_chain * tmp = NULL;
973 uint64_t * curr_lt = NULL;
976 list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
978 curr_lt = curr->curr_lt;
980 if (curr_lt[core_num] == curr_ctxt) {
982 ret = tm_check_context(vm, gva, core_num, curr_ctxt, curr_lt, op_type);
984 if (ret == ERR_CHECK_FAIL) {
985 return ERR_CHECK_FAIL;
986 } else if (ret == CHECK_IS_CONFLICT) {
987 return CHECK_IS_CONFLICT;
994 return CHECK_NO_CONFLICT;
998 /* The following access patterns trigger an abort:
999 * We: Read | Anyone Else: Write
1000 * We: Write | Anyone Else: Read, Write
1002 * (pg 8-2 of haswell manual)
1004 * returns ERR_CHECK_FAIL on error
1005 * returns CHECK_IS_CONFLICT if there is a conflict
1006 * returns CHECK_NO_CONFLICT if there isn't
1009 tm_check_conflict (struct v3_vm_info * vm,
1017 /* loop over other cores -> core_id */
1018 for (core_id = 0; core_id < vm->num_cores; core_id++) {
1020 struct guest_info * core = NULL;
1021 struct v3_trans_mem * tm = NULL;
1022 struct list_head * hash_list;
1024 /* only check other cores */
1025 if (core_id == core_num) {
1029 core = &(vm->cores[core_id]);
1030 tm = (struct v3_trans_mem*)v3_get_ext_core_state(core, "trans_mem");
1033 PrintError(vm, VCORE_NONE, "+++ TM ERROR +++ Couldn't get core state for core %llu\n", core_id);
1034 return ERR_CHECK_FAIL;
1037 /* this core didn't access the address, move on */
1038 if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1043 /* loop over chained hash for gva, find fields with curr_ctxt -> curr_lt*/
1044 int ret = tm_check_all_contexts(vm, hash_list, gva, op_type, core_num, curr_ctxt);
1046 if (ret == ERR_CHECK_FAIL) {
1047 return ERR_CHECK_FAIL;
1048 } else if (ret == CHECK_IS_CONFLICT) {
1049 return CHECK_IS_CONFLICT;
1055 return CHECK_NO_CONFLICT;
1060 tm_need_to_gc (struct v3_trans_mem * tm,
1061 struct hash_chain * curr,
1068 /* if none of the cores are in transactional context,
1069 * we know we can collect this context
1073 for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1074 /* if *any* of the cores are active in a transaction
1075 * number that is current (listed in this context),
1076 * we know we can't collect this context, as it
1077 * will be needed when that core's transaction ends
1079 if (curr->curr_lt[i] >= lt_copy[i]) {
1091 tm_del_stale_ctxt (struct hash_chain * curr)
1093 list_del(&(curr->lt_node));
1094 V3_Free(curr->curr_lt);
1100 tm_del_acc_entry (struct v3_trans_mem * tm, addr_t key)
1102 v3_htable_remove(tm->access_type, key, 0);
1103 (tm->access_type_entries)--;
1108 tm_collect_context (struct v3_trans_mem * tm,
1109 struct hashtable_iter * ctxt_iter,
1110 struct hash_chain * curr,
1111 uint64_t * begin_time,
1112 uint64_t * end_time,
1117 for (i = 0; i < tm->ginfo->vm_info->num_cores; i++) {
1119 struct v3_tm_access_type * type;
1123 if ((*end_time - *begin_time) > 100000000) {
1124 TM_ERR(tm->ginfo,GC,"time threshhold exceeded, exiting!!!\n");
1128 buf[0] = (void *)gva;
1130 buf[2] = (void *)curr->curr_lt[i];
1132 key = v3_hash_buffer((uchar_t*)buf, sizeof(void*)*3);
1134 type = (struct v3_tm_access_type *)v3_htable_search(tm->access_type, key);
1136 if (!type) { // something has gone terribly wrong
1137 TM_ERR(tm->ginfo,GC,"could not find accesstype entry to gc, THIS! IS! WRONG!\n");
1141 /* delete the access type entry */
1142 tm_del_acc_entry(tm, key);
1145 /* delete the stale context */
1146 tm_del_stale_ctxt(curr);
1153 tm_collect_all_contexts (struct v3_trans_mem * tm,
1154 struct hashtable_iter * ctxt_iter,
1157 uint64_t * begin_time,
1158 uint64_t * end_time)
1160 struct hash_chain * tmp;
1161 struct hash_chain * curr;
1162 struct list_head * chain_list;
1165 gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1167 chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1169 /* this is a chained hash, so for each address, we will have
1170 * a list of contexts. We now check each context to see
1171 * whether or not it can be collected
1173 list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1175 uint64_t to_gc = tm_need_to_gc(tm, curr, lt_copy, tmoff);
1177 /* not garbage, go on to the next context in the list */
1179 TM_DBG(tm->ginfo,GC,"not garbage collecting entries for address %llx\n", (uint64_t)gva);
1183 TM_DBG(tm->ginfo,GC,"garbage collecting entries for address %llx\n", (uint64_t)gva);
1185 /* found one, delete corresponding entries in access_type */
1186 if (tm_collect_context(tm, ctxt_iter, curr, begin_time, end_time, gva) == -1) {
1187 TM_ERR(tm->ginfo,GC,"ERROR collecting context\n");
1193 /* if context list (hash chain) is now empty, remove the hash entry */
1194 if (list_empty(chain_list)) {
1195 v3_htable_iter_remove(ctxt_iter, 0);
1196 (tm->addr_ctxt_entries)--;
1198 v3_htable_iter_advance(ctxt_iter);
1201 /* give the CPU away NONONO NEVER YIELD WHILE HOLDING A LOCK */
1209 tm_hash_gc (struct v3_trans_mem * tm)
1211 addr_t irqstate, irqstate2;
1213 uint64_t begin_time, end_time, tmoff;
1215 struct v3_tm_state * tms = NULL;
1216 struct hashtable_iter * ctxt_iter = NULL;
1218 tms = (struct v3_tm_state *)v3_get_extension_state(tm->ginfo->vm_info, "trans_mem");
1220 TM_ERR(tm->ginfo,GC,"could not alloc tms\n");
1224 TM_DBG(tm->ginfo,GC,"beginning garbage collection\n");
1225 TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (pre)\n", (int)v3_htable_count(tm->addr_ctxt));
1226 TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (pre)\n", (int)v3_htable_count(tm->access_type));
1228 tmoff = (tms->cores_active == 0);
1230 lt_copy = V3_Malloc(sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1232 TM_ERR(tm->ginfo,GC,"Could not allocate space for lt_copy\n");
1236 memset(lt_copy, 0, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1238 rdtscll(begin_time);
1240 /* lt_copy holds the last transaction number for each core */
1241 irqstate = v3_lock_irqsave(tm_global_state->lock);
1242 memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*(tm->ginfo->vm_info->num_cores));
1243 v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1245 /* lock both hashes */
1246 irqstate = v3_lock_irqsave(tm->addr_ctxt_lock);
1247 irqstate2 = v3_lock_irqsave(tm->access_type_lock);
1249 /* loop over hash entries in addr_ctxt */
1250 ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1252 TM_ERR(tm->ginfo,GC,"could not create htable iterator\n");
1257 /* we check each address stored in the hash */
1258 while (ctxt_iter->entry) {
1259 /* NOTE: this call advances the hash iterator */
1260 if (tm_collect_all_contexts(tm, ctxt_iter, tmoff, lt_copy, &begin_time, &end_time) == -1) {
1267 v3_destroy_htable_iter(ctxt_iter);
1270 v3_unlock_irqrestore(tm->access_type_lock, irqstate);
1271 v3_unlock_irqrestore(tm->addr_ctxt_lock, irqstate2);
1276 TM_ERR(tm->ginfo,GC,"garbage collection failed, time spent: %d cycles\n", (int)(end_time - begin_time));
1278 TM_DBG(tm->ginfo,GC,"ended garbage collection succesfuly, time spent: %d cycles\n", (int)(end_time - begin_time));
1281 TM_DBG(tm->ginfo,GC,"\t %d entries in addr_ctxt (post)\n", (int)v3_htable_count(tm->addr_ctxt));
1282 TM_DBG(tm->ginfo,GC,"\t %d entries in access_type (post)\n", (int)v3_htable_count(tm->access_type));
1288 /* TODO: break out the for loops in these functions */
1290 tm_update_ctxt_list (struct v3_trans_mem * tm,
1294 struct list_head * hash_list)
1296 struct hash_chain * curr = NULL;
1297 struct hash_chain * tmp = NULL;
1298 uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1303 list_for_each_entry_safe(curr, tmp, hash_list, lt_node) {
1307 for (i = 0; i < num_cores; i++) {
1308 if (curr->curr_lt[i] != lt_copy[i]) {
1322 struct hash_chain * new_l = V3_Malloc(sizeof(struct hash_chain));
1325 TM_ERR(tm->ginfo,HASH,"Could not allocate new list\n");
1329 memset(new_l, 0, sizeof(struct hash_chain));
1331 new_l->curr_lt = lt_copy;
1333 list_add_tail(&(new_l->lt_node), hash_list);
1336 for (core_id = 0; core_id < num_cores; core_id++) {
1337 struct v3_tm_access_type * type;
1338 struct v3_ctxt_tuple tup;
1339 tup.gva = (void*)gva;
1340 tup.core_id = (void*)core_id;
1341 tup.core_lt = (void*)lt_copy[core_id];
1344 key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1348 type = (struct v3_tm_access_type *)HTABLE_SEARCH(tm->access_type, key);
1353 type = V3_Malloc(sizeof(struct v3_tm_access_type));
1356 TM_ERR(tm->ginfo,HASH,"could not allocate type access struct\n");
1368 if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1369 TM_ERR(tm->ginfo,HASH,"problem inserting new mem access in htable\n");
1372 (tm->access_type_entries)++;
1380 /* no entry in addr-ctxt yet, create one */
1382 tm_create_ctxt_key (struct v3_trans_mem * tm,
1387 struct list_head * hash_list = NULL;
1388 struct hash_chain * new_l = NULL;
1389 uint64_t num_cores = tm->ginfo->vm_info->num_cores;
1391 hash_list = (struct list_head *)V3_Malloc(sizeof(struct list_head));
1394 TM_ERR(tm->ginfo,HASH,"Problem allocating hash_list\n");
1398 INIT_LIST_HEAD(hash_list);
1400 new_l = V3_Malloc(sizeof(struct hash_chain));
1403 TM_ERR(tm->ginfo,HASH,"Problem allocating hash_chain\n");
1407 memset(new_l, 0, sizeof(struct hash_chain));
1409 new_l->curr_lt = lt_copy;
1411 /* add the context to the hash chain */
1412 list_add_tail(&(new_l->lt_node), hash_list);
1414 if (!(HTABLE_INSERT(tm->addr_ctxt, gva, hash_list))) {
1415 TM_ERR(tm->ginfo,HASH CHAIN,"problem inserting new chain into hash\n");
1419 (tm->addr_ctxt_entries)++;
1422 /* TODO: we need a way to unwind and deallocate for all cores on failure here */
1423 for (core_id = 0; core_id < num_cores; core_id++) {
1424 struct v3_tm_access_type * type = NULL;
1425 struct v3_ctxt_tuple tup;
1426 tup.gva = (void*)gva;
1427 tup.core_id = (void*)core_id;
1428 tup.core_lt = (void*)lt_copy[core_id];
1431 type = V3_Malloc(sizeof(struct v3_tm_access_type));
1434 TM_ERR(tm->ginfo,HASH,"could not allocate access type struct\n");
1444 key = v3_hash_buffer((uchar_t*)&tup, sizeof(struct v3_ctxt_tuple));
1446 if (HTABLE_INSERT(tm->access_type, key, type) == 0) {
1447 TM_ERR(tm->ginfo,HASH,"TM: problem inserting new mem access in htable\n");
1450 (tm->access_type_entries)++;
1456 list_del(&(new_l->lt_node));
1466 * called during MIME execution
1467 * record memory access in conflict logs
1468 * this locks the table during insertion
1471 tm_record_access (struct v3_trans_mem * tm,
1476 struct list_head * hash_list;
1480 num_cores = tm->ginfo->vm_info->num_cores;
1482 TM_DBG(tm->ginfo,REC,"recording addr %llx, addr-ctxt.cnt = %d, access-type.cnt = %d\n", (uint64_t)gva,
1483 (int)v3_htable_count(tm->addr_ctxt), (int)v3_htable_count(tm->access_type));
1484 //PrintDebug(tm->ginfo->vm_info, tm->ginfo,"\tWe think that addr-ctxt.cnt = %d, access-type.cnt = %d\n",(int)tm->addr_ctxt_entries,(int)tm->access_type_entries);
1486 lt_copy = V3_Malloc(sizeof(uint64_t)*num_cores);
1488 TM_ERR(tm->ginfo,REC,"Allocating array failed\n");
1492 memset(lt_copy, 0, sizeof(uint64_t)*num_cores);
1494 irqstate = v3_lock_irqsave(tm_global_state->lock);
1495 memcpy(lt_copy, tm_global_state->last_trans, sizeof(uint64_t)*num_cores);
1496 v3_unlock_irqrestore(tm_global_state->lock, irqstate);
1498 if (!(hash_list = (struct list_head *)HTABLE_SEARCH(tm->addr_ctxt, gva))) {
1499 /* we haven't created a context list for this address yet, go do it */
1500 return tm_create_ctxt_key(tm, lt_copy, gva, write);
1503 /* we have a context list for this addres already, do we need to create a new context? */
1504 return tm_update_ctxt_list(tm, lt_copy, gva, write, hash_list);
1512 tm_prepare_cpuid (struct v3_vm_info * vm)
1515 V3_Print(vm, VCORE_NONE, "TM INIT | enabling RTM cap in CPUID\n");
1517 /* increase max CPUID function to 7 (extended feature flags enumeration) */
1518 v3_cpuid_add_fields(vm,0x0,
1525 /* do the same for AMD */
1526 v3_cpuid_add_fields(vm,0x80000000,
1527 0xffffffff, 0x80000007,
1533 /* enable RTM (CPUID.07H.EBX.RTM = 1) */
1534 v3_cpuid_add_fields(vm, 0x07, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1535 v3_cpuid_add_fields(vm, 0x80000007, 0, 0, (1<<11), 0, 0, 0, 0, 0);
1540 init_trans_mem (struct v3_vm_info * vm,
1541 v3_cfg_tree_t * cfg,
1544 struct v3_tm_state * tms;
1546 PrintDebug(vm, VCORE_NONE, "Trans Mem. Init\n");
1548 tms = V3_Malloc(sizeof(struct v3_tm_state));
1550 PrintError(vm, VCORE_NONE, "Problem allocating v3_tm_state\n");
1554 memset(tms, 0, sizeof(struct v3_tm_state));
1556 if (v3_register_hypercall(vm, TM_KICKBACK_CALL, tm_handle_hcall, NULL) == -1) {
1557 PrintError(vm, VCORE_NONE, "TM could not register hypercall\n");
1561 v3_lock_init(&(tms->lock));
1563 tms->TM_MODE = TM_OFF;
1564 tms->cores_active = 0;
1566 uint64_t * lt = V3_Malloc(sizeof(uint64_t) * vm->num_cores);
1568 PrintError(vm, VCORE_NONE, "Problem allocating last_trans array\n");
1572 memset(lt, 0, sizeof(uint64_t) * vm->num_cores);
1575 for (i = 0; i < vm->num_cores; i++) {
1579 tms->last_trans = lt;
1582 tm_global_state = tms;
1584 tm_prepare_cpuid(vm);
1589 v3_lock_deinit(&(tms->lock));
1590 v3_remove_hypercall(vm, TM_KICKBACK_CALL);
1598 init_trans_mem_core (struct guest_info * core,
1602 struct v3_trans_mem * tm = V3_Malloc(sizeof(struct v3_trans_mem));
1604 TM_DBG(core,INIT, "Trans Mem. Core Init\n");
1607 TM_ERR(core,INIT, "Problem allocating TM state\n");
1611 memset(tm, 0, sizeof(struct v3_trans_mem));
1613 INIT_LIST_HEAD(&tm->trans_r_list);
1614 INIT_LIST_HEAD(&tm->trans_w_list);
1616 tm->addr_ctxt = v3_create_htable(0, tm_hash_fn, tm_eq_fn);
1617 if (!(tm->addr_ctxt)) {
1618 TM_ERR(core,INIT,"problem creating addr_ctxt\n");
1622 tm->access_type = v3_create_htable(0, tm_hash_buf_fn, tm_eq_buf_fn);
1623 if (!(tm->access_type)) {
1624 TM_ERR(core,INIT,"problem creating access_type\n");
1628 v3_lock_init(&(tm->addr_ctxt_lock));
1629 v3_lock_init(&(tm->access_type_lock));
1631 tm->TM_STATE = TM_NULL;
1632 tm->TM_MODE = TM_OFF;
1638 tm->access_type_entries = 0;
1639 tm->addr_ctxt_entries = 0;
1640 tm->dirty_instr_flag = 0;
1642 /* TODO: Cache Model */
1643 //tm->box = (struct cache_box *)V3_Malloc(sizeof(struct cache_box *));
1644 //tm->box->init = init_cache;
1645 //tm->box->init(sample_spec, tm->box);
1652 v3_free_htable(tm->addr_ctxt, 0, 0);
1660 deinit_trans_mem (struct v3_vm_info * vm, void * priv_data)
1662 struct v3_tm_state * tms = (struct v3_tm_state *)priv_data;
1664 if (v3_remove_hypercall(vm, TM_KICKBACK_CALL) == -1) {
1665 PrintError(vm, VCORE_NONE, "Problem removing TM hypercall\n");
1669 v3_lock_deinit(&(tms->lock));
1680 deinit_trans_mem_core (struct guest_info * core,
1684 struct v3_trans_mem * tm = (struct v3_trans_mem *)core_data;
1685 struct hashtable_iter * ctxt_iter = NULL;
1687 v3_clear_tm_lists(tm);
1689 if (tm->staging_page) {
1690 TM_ERR(core,DEINIT CORE,"WARNING: staging page not freed!\n");
1693 ctxt_iter = v3_create_htable_iter(tm->addr_ctxt);
1695 TM_DBG(core,DEINIT_CORE,"could not create htable iterator\n");
1699 /* delete all context entries for each hashed address */
1700 while (ctxt_iter->entry) {
1701 struct hash_chain * tmp;
1702 struct hash_chain * curr;
1703 struct list_head * chain_list;
1706 gva = (addr_t)v3_htable_get_iter_key(ctxt_iter);
1707 chain_list = (struct list_head *)v3_htable_get_iter_value(ctxt_iter);
1709 /* delete the context */
1710 list_for_each_entry_safe(curr, tmp, chain_list, lt_node) {
1711 tm_del_stale_ctxt(curr);
1714 v3_htable_iter_advance(ctxt_iter);
1717 v3_destroy_htable_iter(ctxt_iter);
1719 /* we've already deleted the values in this one */
1720 v3_free_htable(tm->addr_ctxt, 0, 0);
1722 /* KCH WARNING: we may not want to free access type values here */
1723 v3_free_htable(tm->access_type, 1, 0);
1725 v3_lock_deinit(&(tm->addr_ctxt_lock));
1726 v3_lock_deinit(&(tm->access_type_lock));
1736 static struct v3_extension_impl trans_mem_impl = {
1737 .name = "trans_mem",
1739 .vm_init = init_trans_mem,
1740 .vm_deinit = deinit_trans_mem,
1741 .core_init = init_trans_mem_core,
1742 .core_deinit = deinit_trans_mem_core,
1747 register_extension(&trans_mem_impl);
1751 * tms->on => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1752 * tms->off => commit our list, free sp, clear our lists, clr_tm will handle global state, then gc
1755 tm_handle_xend (struct guest_info * core,
1756 struct v3_trans_mem * tm)
1758 rdtscll(tm->exit_time);
1760 // Error checking! make sure that we have gotten here in a legitimate manner
1761 if (tm->TM_MODE != TM_ON) {
1762 TM_ERR(core, UD, "Encountered XEND while not in a transactional region\n");
1763 v3_free_staging_page(tm);
1765 v3_clear_tm_lists(tm);
1766 v3_raise_exception(core, UD_EXCEPTION);
1770 /* Our transaction finished! */
1771 /* Copy over data from the staging page */
1772 TM_DBG(core, UD,"Copying data from our staging page back into 'real' memory\n");
1774 if (commit_list(core, tm) == -1) {
1775 TM_ERR(core,UD,"error commiting tm list to memory\n");
1779 TM_DBG(core,UD,"Freeing staging page and internal data structures\n");
1781 // Free the staging page
1782 if (v3_free_staging_page(tm) == -1) {
1783 TM_ERR(core,XEND,"couldnt free staging page\n");
1787 // clear vtlb, as it may still contain our staging page
1788 if (v3_clr_vtlb(core) == -1) {
1789 TM_ERR(core,XEND,"couldnt clear vtlb\n");
1794 v3_clear_tm_lists(tm);
1796 /* Set the state and advance the RIP */
1797 TM_DBG(core,XEND,"advancing rip to %llx\n", core->rip + XEND_INSTR_LEN);
1798 core->rip += XEND_INSTR_LEN;
1802 // time to garbage collect
1804 if (tm_hash_gc(tm) == -1) {
1805 TM_ERR(core,XEND,"could not gc!\n");
1814 * tms->on => handle our abort code, handle_trans_abort will clear necessary state
1815 * tms->off => handle our abort code, handle_trans_abort will clear necessary state
1818 tm_handle_xabort (struct guest_info * core,
1819 struct v3_trans_mem * tm)
1821 /* TODO: this probably needs to move somewhere else */
1822 rdtscll(tm->exit_time);
1824 // Error checking! make sure that we have gotten here in a legitimate manner
1825 if (tm->TM_MODE != TM_ON) {
1826 TM_DBG(core, UD, "We got here while not in a transactional core!\n");
1827 v3_raise_exception(core, UD_EXCEPTION);
1830 TM_DBG(core,UD,"aborting\n");
1832 if (tm->TM_STATE != TM_NULL) {
1833 v3_restore_dirty_instr(core);
1837 v3_handle_trans_abort(core);
1844 * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1845 * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1848 tm_handle_xbegin (struct guest_info * core,
1849 struct v3_trans_mem * tm,
1852 sint32_t rel_addr = 0;
1854 if (tm->TM_MODE == TM_ON) {
1855 TM_ERR(core,UD,"We got here while already in a transactional region!");
1856 v3_raise_exception(core, UD_EXCEPTION);
1859 rdtscll(tm->entry_time);
1860 tm->entry_exits = core->num_exits;
1862 /* set the tm_mode for this core */
1865 TM_DBG(core,UD,"Set the system in TM Mode, save fallback address");
1867 // Save the fail_call address (first 2 bytes = opcode, last 4 = fail call addr)
1868 rel_addr = *(sint32_t*)(instr+2);
1869 tm->fail_call = core->rip + XBEGIN_INSTR_LEN + rel_addr;
1871 TM_DBG(core,UD,"we set fail_call to %llx, rip is %llx, rel_addr is %x", (uint64_t)tm->fail_call,(uint64_t)core->rip,rel_addr);
1873 /* flush the shadow page tables */
1874 TM_DBG(core,UD,"Throwing out the shadow table");
1877 // Increase RIP, ready to go to next instruction
1878 core->rip += XBEGIN_INSTR_LEN;
1885 * tms->on => we set up our running env, set_tm will clear other vtlb's to start single stepping
1886 * tms->off => we set up our running env, set_tm will not clear anyone elses vtlb
1889 tm_handle_xtest (struct guest_info * core,
1890 struct v3_trans_mem * tm)
1892 // if we are in tm mode, set zf to 0, otherwise 1
1893 if (tm->TM_MODE == TM_ON) {
1894 core->ctrl_regs.rflags &= ~(1ULL << 6);
1896 core->ctrl_regs.rflags |= (1ULL << 6);
1899 core->rip += XTEST_INSTR_LEN;
1906 * XBEGIN c7 f8 rel32
1911 tm_handle_ud (struct guest_info * core)
1913 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(core, "trans_mem");
1914 uchar_t instr[INSTR_BUF_SZ];
1915 uint8_t byte1, byte2, byte3;
1917 tm_read_instr(core, (addr_t)core->rip, instr, INSTR_BUF_SZ);
1919 byte1 = *(uint8_t *)((addr_t)instr);
1920 byte2 = *(uint8_t *)((addr_t)instr + 1);
1921 byte3 = *(uint8_t *)((addr_t)instr + 2);
1924 if (byte1 == 0xc7 && byte2 == 0xf8) { /* third byte is an immediate */
1926 TM_DBG(core,UD,"Encountered Haswell-specific XBEGIN %x %x %d at %llx", byte1, byte2, byte3, (uint64_t)core->rip);
1928 if (tm_handle_xbegin(core, tm, instr) == -1) {
1929 TM_ERR(core, UD, "Problem handling XBEGIN\n");
1933 } else if (byte1 == 0xc6 && byte2 == 0xf8) { /* third byte is an immediate */
1935 TM_DBG(core, UD, "Encountered Haswell-specific XABORT %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1937 if (tm_handle_xabort(core, tm) == -1) {
1938 TM_ERR(core, UD, "Problem handling XABORT\n");
1942 } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd5) {
1944 TM_DBG(core, UD, "Encountered Haswell-specific XEND %x %x %d at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1946 if (tm_handle_xend(core, tm) == -1) {
1947 TM_ERR(core, UD, "Problem handling XEND\n");
1952 } else if (byte1 == 0x0f && byte2 == 0x01 && byte3 == 0xd6) { /* third byte is an immediate */
1954 TM_DBG(core,UD,"Encountered Haswell-specific XTEST %x %x %x at %llx\n", byte1, byte2, byte3, (uint64_t)core->rip);
1956 if (tm_handle_xtest(core, tm) == -1) {
1957 TM_ERR(core, UD, "Problem handling XTEST\n");
1963 /* oh no, this is still unknown, pass the error back to the guest! */
1964 TM_DBG(core,UD,"Encountered:%x %x %x\n", byte1, byte2, byte3);
1965 v3_raise_exception(core, UD_EXCEPTION);
1973 v3_tm_handle_exception (struct guest_info * info,
1976 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
1979 TM_ERR(info,ERR,"TM extension state not found\n");
1983 switch (exit_code) {
1984 /* any of these exceptions should abort current transactions */
1985 case SVM_EXIT_EXCP6:
1986 if (tm_handle_ud(info) == -1) {
1990 case SVM_EXIT_EXCP0:
1991 if (tm->TM_MODE != TM_ON) {
1992 v3_raise_exception(info, DE_EXCEPTION);
1995 TM_DBG(info,EXCP,"aborting due to DE exception\n");
1996 v3_handle_trans_abort(info);
1999 case SVM_EXIT_EXCP1:
2000 if (tm->TM_MODE != TM_ON) {
2001 v3_raise_exception(info, DB_EXCEPTION);
2004 TM_DBG(info,EXCP,"aborting due to DB exception\n");
2005 v3_handle_trans_abort(info);
2008 case SVM_EXIT_EXCP3:
2009 if (tm->TM_MODE != TM_ON) {
2010 v3_raise_exception(info, BP_EXCEPTION);
2013 TM_DBG(info,EXCP,"aborting due to BP exception\n");
2014 v3_handle_trans_abort(info);
2017 case SVM_EXIT_EXCP4:
2018 if (tm->TM_MODE != TM_ON) {
2019 v3_raise_exception(info, OF_EXCEPTION);
2022 TM_DBG(info,EXCP,"aborting due to OF exception\n");
2023 v3_handle_trans_abort(info);
2026 case SVM_EXIT_EXCP5:
2027 if (tm->TM_MODE != TM_ON) {
2028 v3_raise_exception(info, BR_EXCEPTION);
2031 TM_DBG(info,EXCP,"aborting due to BR exception\n");
2032 v3_handle_trans_abort(info);
2035 case SVM_EXIT_EXCP7:
2036 if (tm->TM_MODE != TM_ON) {
2037 v3_raise_exception(info, NM_EXCEPTION);
2040 TM_DBG(info,EXCP,"aborting due to NM exception\n");
2041 v3_handle_trans_abort(info);
2044 case SVM_EXIT_EXCP10:
2045 if (tm->TM_MODE != TM_ON) {
2046 v3_raise_exception(info, TS_EXCEPTION);
2049 TM_DBG(info,EXCP,"aborting due to TS exception\n");
2050 v3_handle_trans_abort(info);
2053 case SVM_EXIT_EXCP11:
2054 if (tm->TM_MODE != TM_ON) {
2055 v3_raise_exception(info, NP_EXCEPTION);
2058 TM_DBG(info,EXCP,"aborting due to NP exception\n");
2059 v3_handle_trans_abort(info);
2062 case SVM_EXIT_EXCP12:
2063 if (tm->TM_MODE != TM_ON) {
2064 v3_raise_exception(info, SS_EXCEPTION);
2067 TM_DBG(info,EXCP,"aborting due to SS exception\n");
2068 v3_handle_trans_abort(info);
2071 case SVM_EXIT_EXCP13:
2072 if (tm->TM_MODE != TM_ON) {
2073 v3_raise_exception(info, GPF_EXCEPTION);
2076 TM_DBG(info,EXCP,"aborting due to GPF exception\n");
2077 v3_handle_trans_abort(info);
2080 case SVM_EXIT_EXCP16:
2081 if (tm->TM_MODE != TM_ON) {
2082 v3_raise_exception(info, MF_EXCEPTION);
2085 TM_DBG(info,EXCP,"aborting due to MF exception\n");
2086 v3_handle_trans_abort(info);
2089 case SVM_EXIT_EXCP17:
2090 if (tm->TM_MODE != TM_ON) {
2091 v3_raise_exception(info, AC_EXCEPTION);
2094 TM_DBG(info,EXCP,"aborting due to AC exception\n");
2095 v3_handle_trans_abort(info);
2098 case SVM_EXIT_EXCP19:
2099 if (tm->TM_MODE != TM_ON) {
2100 v3_raise_exception(info, XF_EXCEPTION);
2103 TM_DBG(info,EXCP,"aborting due to XF exception\n");
2104 v3_handle_trans_abort(info);
2108 TM_DBG(info,EXCP,"exception # %d\n", (int)exit_code - 0x40);
2116 v3_tm_set_excp_intercepts (vmcb_ctrl_t * ctrl_area)
2118 ctrl_area->exceptions.de = 1; // 0 : divide by zero
2119 ctrl_area->exceptions.db = 1; // 1 : debug
2120 ctrl_area->exceptions.bp = 1; // 3 : breakpoint
2121 ctrl_area->exceptions.of = 1; // 4 : overflow
2122 ctrl_area->exceptions.br = 1; // 5 : bound range
2123 ctrl_area->exceptions.ud = 1; // 6 : undefined opcode
2124 ctrl_area->exceptions.nm = 1; // 7 : device not available
2125 ctrl_area->exceptions.ts = 1; // 10 : invalid tss
2126 ctrl_area->exceptions.np = 1; // 11 : segment not present
2127 ctrl_area->exceptions.ss = 1; // 12 : stack
2128 ctrl_area->exceptions.gp = 1; // 13 : general protection
2129 ctrl_area->exceptions.mf = 1; // 16 : x87 exception pending
2130 ctrl_area->exceptions.ac = 1; // 17 : alignment check
2131 ctrl_area->exceptions.xf = 1; // 19 : simd floating point
2135 extern void v3_stgi();
2136 extern void v3_clgi();
2138 /* 441-tm: if we are in TM mode, we need to check for any interrupts here,
2139 * and if there are any, need to do some aborting! Make sure not to die here
2140 * if we are already 'aborting', this results in infiloop
2143 v3_tm_check_intr_state (struct guest_info * info,
2144 vmcb_ctrl_t * guest_ctrl,
2145 vmcb_saved_state_t * guest_state)
2148 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2151 TM_ERR(info,INTR,"TM extension state not found\n");
2156 /* TODO: work this in */
2157 if (0 && (tm->TM_MODE == TM_ON) &&
2158 (tm->TM_ABORT != 1)) {
2160 if (guest_ctrl->guest_ctrl.V_IRQ ||
2161 guest_ctrl->EVENTINJ.valid) {
2163 rdtscll(tm->exit_time);
2164 TM_DBG(info,INTR,"%lld exits happened, time delta is %lld",(info->num_exits - tm->entry_exits),(tm->entry_time - tm->exit_time));
2166 // We do indeed have pending interrupts
2168 TM_DBG(info,INTR,"we have a pending interrupt!\n");
2170 v3_handle_trans_abort(info);
2171 // Copy new RIP state into arch dependent structure
2172 guest_state->rip = info->rip;
2173 TM_DBG(info,INTR,"currently guest state rip is %llx\n",(uint64_t)guest_state->rip);
2183 v3_tm_handle_pf_64 (struct guest_info * info,
2184 pf_error_t error_code,
2186 addr_t * page_to_use)
2188 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2189 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2192 TM_ERR(info,HANDLE_PF, "couldn't get tm core state\n");
2197 TM_ERR(info,HANDLE_PF, "couldn't get tm global state\n");
2201 if ((tms->TM_MODE == TM_ON) &&
2202 (error_code.user == 1)) {
2204 TM_DBG(info,PF,"Core reporting in, got a #PF (tms->mode is %d)\n", tms->TM_MODE);
2206 *page_to_use = v3_handle_trans_mem_fault(info, fault_addr, error_code);
2208 if (*page_to_use == ERR_TRANS_FAULT_FAIL){
2209 TM_ERR(info,HANDLE_PF, "could not handle transaction page fault\n");
2213 if ((tm->TM_MODE == TM_ON) &&
2214 (tm->staging_page == NULL)) {
2216 tm->staging_page = V3_AllocPages(1);
2218 if (!(tm->staging_page)) {
2219 TM_ERR(info,MMU,"Problem allocating staging page\n");
2223 TM_DBG(info,MMU,"Created staging page at %p\n", (void *)tm->staging_page);
2232 v3_tm_handle_usr_tlb_miss (struct guest_info * info,
2233 pf_error_t error_code,
2237 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2239 /* TLB miss from user */
2240 if ((tm->TM_MODE == TM_ON) &&
2241 (error_code.user == 1)) {
2243 if (page_to_use > TRANS_FAULT_OK) {
2244 TM_DBG(info,MMU, "Using alternate page at: %llx\n", (uint64_t)page_to_use);
2245 *shadow_pa = page_to_use;
2254 v3_tm_handle_read_fault (struct guest_info * info,
2255 pf_error_t error_code,
2256 pte64_t * shadow_pte)
2258 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2259 struct v3_tm_state * tms = (struct v3_tm_state *)v3_get_extension_state(info->vm_info, "trans_mem");
2261 // If we are about to read, make it read only
2262 if ((tms->TM_MODE == TM_ON) &&
2263 (tm->TM_STATE == TM_EXEC) &&
2264 (error_code.write == 0) &&
2265 (error_code.user == 1)) {
2267 TM_DBG(info,MMU, "Flagging the page read only\n");
2268 shadow_pte->writable = 0;
2274 v3_tm_decode_rtm_instrs (struct guest_info * info,
2276 struct x86_instr * instr)
2278 uint8_t byte1, byte2, byte3;
2279 struct v3_trans_mem * tm = (struct v3_trans_mem *)v3_get_ext_core_state(info, "trans_mem");
2281 if (tm->TM_MODE == TM_ON) {
2283 byte1 = *(uint8_t *)(instr_ptr);
2284 byte2 = *(uint8_t *)(instr_ptr + 1);
2285 byte3 = *(uint8_t *)(instr_ptr + 2);
2287 if (byte1 == 0xc7 &&
2288 byte2 == 0xf8) { /* third byte is an immediate */
2290 TM_DBG(info, DECODE,"Decoding XBEGIN %x %x %d\n", byte1, byte2, byte3);
2291 instr->instr_length = 6;
2294 } else if (byte1 == 0xc6 &&
2295 byte2 == 0xf8) { /* third byte is an immediate */
2297 TM_DBG(info, DECODE, "Decoding XABORT %x %x %d\n", byte1, byte2, byte3);
2298 instr->instr_length = 3;
2301 } else if (byte1 == 0x0f &&
2305 TM_DBG(info, DECODE, "Decoding XEND %x %x %x\n", byte1, byte2, byte3);
2306 instr->instr_length = 3;