palacios/include/extensions/trans_mem.h

   1 /*
   2  * This file is part of the Palacios Virtual Machine Monitor developed
   3  * by the V3VEE Project with funding from the United States National
   4  * Science Foundation and the Department of Energy.
   5  *
   6  * The V3VEE Project is a joint project between Northwestern University
   7  * and the University of New Mexico.  You can find out more at
   8  * http://www.v3vee.org
   9  *
  10  * Copyright (c) 2012, NWU EECS 441 Transactional Memory Team
  11  * Copyright (c) 2012, The V3VEE Project <http://www.v3vee.org>
  12  * All rights reserved.
  13  *
  14  * Author: Maciek Swiech <dotpyfe@u.northwestern.edu>
  15  *          Marcel Flores <marcel-flores@u.northwestern.edu>
  16  *          Zachary Bischof <zbischof@u.northwestern.edu>
  17  *          Kyle C. Hale <kh@u.northwestern.edu>
  18  *
  19  * This is free software.  You are permitted to use,
  20  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
  21  *
  22
  23 RTM Implementation Wishlist (roughly in order of priority)
  24 Kyle Hale, Maciek Swiech 2014
  25
  26 From Intel Architecture Instruction Set Extensions Programming Reference, Section 8.3, p.8-6
  27 link: http://software.intel.com/sites/default/files/m/9/2/3/41604
  28
  29 - architectural registers need to be saved / restored
  30 - exceptions that misuse of TSX instructions can raise
  31 - abort on interrupts, asynchronous events
  32 - abort on CPUID, PAUSE
  33 - abort on non-writeback memory ops, including ifetches to uncacheable mem
  34 - RTM-debugger support
  35 - RTM nesting
  36 - parameterized cache model, for generating hardware configuration-based aborts
  37
  38 - to be able to model specific implementations, add options (runtime or compiletime) to abort on:
  39     * x86/mmx state changes, (also fxstor, fxsave),
  40     * cli, sti, popfd, popfq, clts
  41     * mov to segment regs, pop segment regs, lds, les, lfs, lgs, lss, swapgs, wrfsbase, wrgsbase, lgdt, sgdt, lidt, sidt, lldt, sldt, ltr,
  42       str, far call, far jmp, far ret, far iret, mov to DRx, mov to cr0-4, cr8 lmsw
  43     * sysenter, syscall, sysexit, sysret
  44     * clflush, invd, wbinvd, invlpg, invpcid
  45     * memory instructions with temporal hints (e.g. movntdqa)
  46     * xsave, xsaveopt, xrstor
  47     * interrupts: INTn, INTO
  48     * IO: in, ins, rep ins, out, outs, rep outs, and variants
  49     * VMX instructions
  50     * smx: getsec
  51     * ud2, rsm, rdmsr, wrmsr, hlt, monitor, mwait, xsetbv, vzeroupper, maskmovq, v/maskmovdqu
  52
  53  *
  54  *
  55  * We claim that we can have a single, shared "cache"-like box
  56  * that handles all writes and reads when TM is on on any core.  The
  57  * idea is that if TM is on on any core, we redirect reads/writes
  58  * that we get to the box, and it records them internally for
  59  * future playback, and tells us whether an abort condition has
  60  * occured or not:
  61  *
  62  * error = handle_start_tx(boxstate,vcorenum);
  63  * error = handle_abort(boxstate,vcorenum);
  64  * error = handle_commit(boxstate,vcorenum);
  65  *
  66  * should_abort = handle_write(boxstate, vcorenum, physaddr, data, datalen);
  67  * should_abort = handle_read(boxstate, vcorenum,physaddr, *data, datalen);
  68  *
  69  * One implementation:
  70  *
  71  * struct rec {
  72  *    enum {READ,WRITE,BEGIN,ABORT,END} op;
  73  *    addr_t vcorenum,
  74  *           physaddr,
  75  *           datalen ;
  76  *    struct rec *next;
  77  * }
  78  *
  79  * struct cache_model {
  80  *    void *init(xml spec);  // make a cache, return ptr to state
  81  *    int write(void *priv, physaddr, datalen, int (*change_cb(int core,
  82  *                             physaddrstart, len));
  83  *    // similiar for read
  84  *
  85  * // Idea is that we pass writes to cache model, it calls us back to say which
  86  * lines on which cores have changed
  87  * }
  88  *
  89  *
  90  * struct boxstate {
  91  *    struct cache_model *model; //
  92  *    lock_t     global_lock; // any handle_* func acquires this first
  93  *    uint64_t   loglen;
  94  *    uint64_t   numtransactionsactive;
  95  *    struct rec *first;
  96  * }
  97  *
  98  * int handle_write(box,vcore,physaddr,data,datalen) {
  99  *
 100  */
 101
 102 #ifndef __TRANS_MEM_H__
 103 #define __TRANS_MEM_H__
 104
 105 #include <palacios/vmm_lock.h>
 106 #include <palacios/vmcb.h>
 107 #include <palacios/vmm_paging.h>
 108
 109 #define MAX_CORES 32
 110
 111 #define TM_KICKBACK_CALL 0x1337
 112
 113 #define HTABLE_SEARCH(h, k) ({ addr_t ret; v3_lock(h##_lock); ret = v3_htable_search((h), (k)); v3_unlock(h##_lock); ret; })
 114 #define HTABLE_INSERT(h, k, v) ({ addr_t ret; v3_lock(h##_lock); ret = v3_htable_insert((h), (k), (addr_t)(v)); v3_unlock(h##_lock); ret; })
 115
 116 #define INSTR_INJECT_LEN 10
 117 #define INSTR_BUF_SZ  15
 118 #define ERR_STORE_MUST_ABORT -2
 119 #define ERR_STORE_FAIL -1
 120 #define ERR_DECODE_FAIL -1
 121 #define ERR_TRANS_FAULT_FAIL 0
 122 #define TRANS_FAULT_OK 1
 123 #define TRANS_HCALL_FAIL -1
 124 #define TRANS_HCALL_OK 0
 125
 126 /* conflict checking codes */
 127 #define ERR_CHECK_FAIL -1
 128 #define CHECK_MUST_ABORT -2
 129 #define CHECK_IS_CONFLICT 1
 130 #define CHECK_NO_CONFLICT 0
 131
 132 /* RTM instruction handling */
 133 #define XBEGIN_INSTR_LEN 0x6
 134 #define XEND_INSTR_LEN   0x3
 135 #define XABORT_INSTR_LEN 0x3
 136 #define XTEST_INSTR_LEN  0x3
 137
 138 /* abort status definitions (these are bit indeces) */
 139 #define ABORT_XABORT     0x0 // xabort instr
 140 #define ABORT_RETRY      0x1 // may succeed on retry (must be clear if bit 0 set)
 141 #define ABORT_CONFLICT   0x2 // another process accessed memory in the transaction
 142 #define ABORT_OFLOW      0x3 // internal buffer overflowed
 143 #define ABORT_BKPT       0x4 // debug breakpoint was hit
 144 #define ABORT_NESTED     0x5 // abort occured during nested transaction (not currently used)
 145
 146
 147 typedef enum tm_abrt_cause {
 148     TM_ABORT_XABORT,
 149     TM_ABORT_CONFLICT,
 150     TM_ABORT_INTERNAL,
 151     TM_ABORT_BKPT,
 152     TM_ABORT_UNSPECIFIED,
 153 } tm_abrt_cause_t;
 154
 155 struct v3_tm_access_type {
 156     uint8_t r : 1;
 157     uint8_t w : 1;
 158 } __attribute__((packed));
 159
 160 struct v3_ctxt_tuple {
 161     void * gva;
 162     void * core_id;
 163     void * core_lt;
 164 } __attribute__((packed));
 165
 166 /* 441-tm: Are we currently in a transaction */
 167 enum TM_MODE_E {
 168     TM_OFF = 0,
 169     TM_ON = 1,
 170 };
 171
 172 /* 441-tm: Current state of the transaction state machine */
 173 enum TM_STATE_E {
 174     TM_NULL = 0,
 175     TM_IFETCH = 1,
 176     TM_EXEC = 2
 177 //    TM_ABORT = 3
 178 };
 179
 180 typedef enum v3_tm_op {
 181     OP_TYPE_READ,
 182     OP_TYPE_WRITE
 183 } v3_tm_op_t;
 184
 185 struct v3_trans_mem {
 186     /* current transaction */
 187     uint64_t t_num;
 188
 189     /* 441-tm: linked list to store core's reads and writes */
 190     struct list_head trans_r_list;
 191     struct list_head trans_w_list;
 192
 193     /* 441-tm: hash tables of addresses */
 194     struct hashtable * addr_ctxt;       // records the core transaction context at time of address use
 195     v3_lock_t addr_ctxt_lock;
 196     uint64_t addr_ctxt_entries;
 197
 198     struct hashtable * access_type;     // hashes addr:corenum:t_num for each address use
 199     v3_lock_t access_type_lock;
 200     uint64_t access_type_entries;
 201
 202     /* 441-tm: lets remember things about the next instruction */
 203     uint8_t dirty_instr_flag;
 204     addr_t  dirty_hva;
 205     addr_t  dirty_gva;
 206     uchar_t dirty_instr[15];
 207     int     cur_instr_len;
 208
 209     enum TM_MODE_E TM_MODE;
 210     enum TM_STATE_E TM_STATE;
 211     uint64_t TM_ABORT;
 212
 213     struct shadow_page_data * staging_page;
 214
 215     /* 441-tm: Remember the failsafe addr */
 216     addr_t  fail_call;
 217
 218     /* 441-tm: Save the rax we are about to ruin */
 219     v3_reg_t clobbered_rax;
 220
 221     // branching instrs
 222     int to_branch;
 223     addr_t offset;
 224
 225     // timing info
 226     uint64_t entry_time;
 227     uint64_t exit_time;
 228     uint64_t entry_exits;
 229
 230     // cache box
 231     struct cache_box * box;
 232
 233     struct guest_info * ginfo;
 234
 235 };
 236
 237
 238 struct v3_tm_state {
 239     v3_lock_t lock;
 240     enum TM_MODE_E TM_MODE;
 241     uint64_t cores_active;
 242
 243     uint64_t  * last_trans;
 244 };
 245
 246 struct hash_chain {
 247     uint64_t * curr_lt;
 248
 249     struct list_head lt_node;
 250 };
 251
 252 // called from #PF handler, stages entries, catches reads / writes
 253 addr_t v3_handle_trans_mem_fault(struct guest_info *core,
 254                                  addr_t fault_addr,
 255                                  pf_error_t error);
 256
 257 // restores instruction after core->rip
 258 int v3_restore_dirty_instr(struct guest_info *core);
 259
 260 // restores instruction after core->rip
 261 int v3_restore_abort_instr(struct guest_info *core);
 262
 263 // handles abort cleanup, called from INT/EXCP or XABORT
 264 int v3_handle_trans_abort(struct guest_info *core,
 265                          tm_abrt_cause_t cause,
 266                          uint8_t xabort_reason);
 267
 268 // record a memory access in hashes
 269 int tm_record_access (struct v3_trans_mem * tm,
 270                       uint8_t write,
 271                       addr_t gva);
 272
 273 // garbage collect hash recordings
 274 int tm_hash_gc (struct v3_trans_mem * tm);
 275
 276 // check address for conflicts
 277 int tm_check_conflict(struct   v3_vm_info * vm_info,
 278                       addr_t   gva,
 279                       v3_tm_op_t op_type,
 280                       uint64_t core_num,
 281                       uint64_t curr_ctxt);
 282
 283 // increment transaction number
 284 int v3_tm_inc_tnum(struct v3_trans_mem * tm);
 285
 286
 287 /* exception-related functions */
 288 int v3_tm_handle_exception(struct guest_info * info, addr_t exit_code);
 289
 290 void v3_tm_set_excp_intercepts(vmcb_ctrl_t * ctrl_area);
 291
 292 void v3_tm_check_intr_state(struct guest_info * info,
 293         vmcb_ctrl_t * guest_ctrl,
 294         vmcb_saved_state_t * guest_state);
 295
 296
 297 /* paging-related functions */
 298 int v3_tm_handle_pf_64 (struct guest_info * info,
 299                         pf_error_t error_code,
 300                         addr_t fault_addr,
 301                         addr_t * page_to_use);
 302
 303 void v3_tm_handle_usr_tlb_miss(struct guest_info * info,
 304                                pf_error_t error_code,
 305                                addr_t page_to_use,
 306                                addr_t * shadow_pa);
 307
 308 void v3_tm_handle_read_fault(struct guest_info * info,
 309                              pf_error_t error_code,
 310                              pte64_t * shadow_pte);
 311
 312 #include <palacios/vmm_decoder.h>
 313
 314 /* decoding-related functions */
 315 int v3_tm_decode_rtm_instrs(struct guest_info * info,
 316                             addr_t instr_ptr,
 317                             struct x86_instr * instr);
 318
 319
 320 #endif