From: Jack Lange Date: Tue, 8 Mar 2011 00:23:23 +0000 (-0600) Subject: decoder updates X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?a=commitdiff_plain;h=d3a58bbcc5cc9105e894771b1b2c39f76c86a70e;p=palacios.git decoder updates --- diff --git a/palacios/include/palacios/vmm_decoder.h b/palacios/include/palacios/vmm_decoder.h index 41817f7..a6bee03 100644 --- a/palacios/include/palacios/vmm_decoder.h +++ b/palacios/include/palacios/vmm_decoder.h @@ -67,7 +67,7 @@ struct x86_prefixes { struct x86_instr { struct x86_prefixes prefixes; - uint_t instr_length; + uint8_t instr_length; v3_op_type_t op_type; uint_t num_operands; struct x86_operand dst_operand; @@ -75,7 +75,7 @@ struct x86_instr { struct x86_operand third_operand; addr_t str_op_length; addr_t is_str_op; - void * decoder_data; + // void * decoder_data; }; diff --git a/palacios/include/palacios/vmm_instr_decoder.h b/palacios/include/palacios/vmm_instr_decoder.h index b9dc7fd..2958f4f 100644 --- a/palacios/include/palacios/vmm_instr_decoder.h +++ b/palacios/include/palacios/vmm_instr_decoder.h @@ -23,11 +23,6 @@ /* .... Giant fucking switch tables */ - - - - - typedef enum { INVALID_INSTR, LMSW, @@ -140,16 +135,31 @@ typedef enum { } op_form_t; +static int get_addr_width(struct guest_info * info, struct x86_instr * instr, + op_form_t form) { + switch (v3_get_vm_cpu_mode(info)) { + case REAL: + return (instr->prefixes.addr_size) ? 4 : 2; + case PROTECTED: + case PROTECTED_PAE: + return (instr->prefixes.addr_size) ? 2 : 4; + case LONG_32_COMPAT: + case LONG: + default: + PrintError("Unsupported CPU mode: %d\n", info->cpu_mode); + return -1; + } +} -static int get_operand_width(struct guest_info * info, struct x86_instr * instr, op_form_t form) { +static int get_operand_width(struct guest_info * info, struct x86_instr * instr, + op_form_t form) { switch (form) { case CLTS: case HLT: return 0; - case MOV_MEM2_8: case MOV_2MEM_8: case MOV_MEM2AL_8: @@ -200,7 +210,6 @@ static int get_operand_width(struct guest_info * info, struct x86_instr * instr, case SETO: return 1; - case LMSW: case SMSW: return 2; @@ -256,7 +265,6 @@ static int get_operand_width(struct guest_info * info, struct x86_instr * instr, return -1; } - case INVLPG: switch (v3_get_vm_cpu_mode(info)) { case REAL: @@ -272,7 +280,6 @@ static int get_operand_width(struct guest_info * info, struct x86_instr * instr, return -1; } - case PUSHF: case POPF: switch (v3_get_vm_cpu_mode(info)) { @@ -304,7 +311,6 @@ static int get_operand_width(struct guest_info * info, struct x86_instr * instr, return -1; } - case MOV_SR2: case MOV_2SR: default: @@ -672,7 +678,9 @@ static int decode_rm_operand(struct guest_info * core, -static inline op_form_t op_code_to_form_0f(uint8_t * instr) { +static inline op_form_t op_code_to_form_0f(uint8_t * instr, int * length) { + *length += 1; + switch (instr[1]) { case 0x01: { struct modrm_byte * modrm = (struct modrm_byte *)&(instr[2]); @@ -751,7 +759,9 @@ static inline op_form_t op_code_to_form_0f(uint8_t * instr) { } -static op_form_t op_code_to_form(uint8_t * instr) { +static op_form_t op_code_to_form(uint8_t * instr, int * length) { + *length += 1; + switch (instr[0]) { case 0x00: return ADD_2MEM_8; @@ -773,7 +783,7 @@ static op_form_t op_code_to_form(uint8_t * instr) { case 0x0f: - return op_code_to_form_0f(instr); + return op_code_to_form_0f(instr, length); case 0x10: return ADC_2MEM_8; diff --git a/palacios/src/palacios/vmm_v3dec.c b/palacios/src/palacios/vmm_v3dec.c index 55f1df2..60acddb 100644 --- a/palacios/src/palacios/vmm_v3dec.c +++ b/palacios/src/palacios/vmm_v3dec.c @@ -21,6 +21,34 @@ #include +/* Disgusting mask hack... + I can't think right now, so we'll do it this way... +*/ +static const ullong_t mask_1 = 0x00000000000000ffLL; +static const ullong_t mask_2 = 0x000000000000ffffLL; +static const ullong_t mask_4 = 0x00000000ffffffffLL; +static const ullong_t mask_8 = 0xffffffffffffffffLL; + + +#define MASK(val, length) ({ \ + ullong_t mask = 0x0LL; \ + switch (length) { \ + case 1: \ + mask = mask_1; \ + break; \ + case 2: \ + mask = mask_2; \ + break; \ + case 4: \ + mask = mask_4; \ + break; \ + case 8: \ + mask = mask_8; \ + break; \ + } \ + val & mask; \ + }) + static v3_op_type_t op_form_to_type(op_form_t form); static int parse_operands(struct guest_info * core, uint8_t * instr_ptr, struct x86_instr * instr, op_form_t form); @@ -30,7 +58,6 @@ int v3_disasm(struct guest_info * info, void *instr_ptr, addr_t * rip, int mark) } - int v3_init_decoder(struct guest_info * core) { return 0; } @@ -45,39 +72,56 @@ int v3_encode(struct guest_info * info, struct x86_instr * instr, uint8_t * inst return 0; } + int v3_decode(struct guest_info * core, addr_t instr_ptr, struct x86_instr * instr) { - op_form_t form; + op_form_t form = INVALID_INSTR; + int ret = 0; + int length = 0; memset(instr, 0, sizeof(struct x86_instr)); // scan for prefixes - instr_ptr += v3_get_prefixes((uint8_t *)instr_ptr, &(instr->prefixes)); + length = v3_get_prefixes((uint8_t *)instr_ptr, &(instr->prefixes)); // check for REX prefix - form = op_code_to_form((uint8_t *)instr_ptr); - instr->op_type = op_form_to_type(form); + form = op_code_to_form((uint8_t *)(instr_ptr + length), &length); - parse_operands(core, (uint8_t *)instr_ptr, instr, form); + if (form == INVALID_INSTR) { + PrintError("Could not find instruction form (%x)\n", *(uint32_t *)(instr_ptr + length)); + return -1; + } + instr->op_type = op_form_to_type(form); - return 0; -} + ret = parse_operands(core, (uint8_t *)(instr_ptr + length), instr, form); + + if (ret == -1) { + PrintError("Could not parse instruction operands\n"); + return -1; + } + length += ret; + + instr->instr_length += length; + return 0; +} -static int parse_operands(struct guest_info * core, uint8_t * instr_ptr, struct x86_instr * instr, op_form_t form) { +static int parse_operands(struct guest_info * core, uint8_t * instr_ptr, + struct x86_instr * instr, op_form_t form) { // get operational mode of the guest for operand width - int operand_width = get_operand_width(core, instr, form); + uint8_t operand_width = get_operand_width(core, instr, form); + uint8_t addr_width = get_addr_width(core, instr, form);; int ret = 0; - + uint8_t * instr_start = instr_ptr; - switch (form) { + switch (form) { case ADC_IMM2_8: case ADD_IMM2_8: case AND_IMM2_8: @@ -92,7 +136,8 @@ static int parse_operands(struct guest_info * core, uint8_t * instr_ptr, struct case SUB_IMM2: case XOR_IMM2: case MOV_IMM2:{ - uint8_t reg_code = 0;; + uint8_t reg_code = 0; + instr->dst_operand.size = operand_width; ret = decode_rm_operand(core, instr_ptr, &(instr->dst_operand), ®_code); @@ -118,6 +163,10 @@ static int parse_operands(struct guest_info * core, uint8_t * instr_ptr, struct return -1; } + instr_ptr += operand_width; + + instr->num_operands = 2; + break; } case ADC_2MEM_8: @@ -151,9 +200,10 @@ static int parse_operands(struct guest_info * core, uint8_t * instr_ptr, struct instr->src_operand.size = operand_width; decode_gpr(&(core->vm_regs), reg_code, &(instr->src_operand)); + + instr->num_operands = 2; break; } - case ADC_MEM2_8: case ADD_MEM2_8: case AND_MEM2_8: @@ -184,10 +234,10 @@ static int parse_operands(struct guest_info * core, uint8_t * instr_ptr, struct instr->dst_operand.type = REG_OPERAND; decode_gpr(&(core->vm_regs), reg_code, &(instr->dst_operand)); + instr->num_operands = 2; + break; } - - case ADC_IMM2SX_8: case ADD_IMM2SX_8: case AND_IMM2SX_8: @@ -210,18 +260,46 @@ static int parse_operands(struct guest_info * core, uint8_t * instr_ptr, struct instr->src_operand.size = operand_width; instr->src_operand.operand = *(sint8_t *)instr_ptr; // sign extend. + instr_ptr += 1; + + instr->num_operands = 2; + break; } + case MOVS: + case MOVS_8: { + instr->is_str_op = 1; + + if (instr->prefixes.rep == 1) { + instr->str_op_length = MASK(core->vm_regs.rcx, operand_width); + } else { + instr->str_op_length = 1; + } + + // Source: DS:(E)SI + // Source: ES:(E)DI + instr->src_operand.type = MEM_OPERAND; + instr->src_operand.size = operand_width; + instr->src_operand.operand = core->segments.ds.base + MASK(core->vm_regs.rsi, addr_width); + + instr->src_operand.type = MEM_OPERAND; + instr->src_operand.size = operand_width; + instr->src_operand.operand = core->segments.es.base + MASK(core->vm_regs.rdi, addr_width); + + instr->num_operands = 2; + + break; + } default: PrintError("Invalid Instruction form: %d\n", form); return -1; - } - return 0; + return (instr_ptr - instr_start); } + static v3_op_type_t op_form_to_type(op_form_t form) { switch (form) { case LMSW: diff --git a/palacios/src/palacios/vmm_xed.c b/palacios/src/palacios/vmm_xed.c index e23ace0..a3b39d5 100644 --- a/palacios/src/palacios/vmm_xed.c +++ b/palacios/src/palacios/vmm_xed.c @@ -236,7 +236,8 @@ static int decode_string_op(struct guest_info * info, addr_t reg_addr = 0; uint_t reg_length = 0; - xed_reg_to_v3_reg(info, xed_decoded_inst_get_reg(xed_instr, XED_OPERAND_REG1), ®_addr, ®_length); + xed_reg_to_v3_reg(info, xed_decoded_inst_get_reg(xed_instr, XED_OPERAND_REG1), + ®_addr, ®_length); instr->str_op_length = MASK(*(addr_t *)reg_addr, reg_length); } else { instr->str_op_length = 1; @@ -674,7 +675,8 @@ static int get_memory_operand(struct guest_info * info, xed_decoded_inst_t * xe } -static int xed_reg_to_v3_reg(struct guest_info * info, xed_reg_enum_t xed_reg, addr_t * v3_reg, uint_t * reg_len) { +static int xed_reg_to_v3_reg(struct guest_info * info, xed_reg_enum_t xed_reg, + addr_t * v3_reg, uint_t * reg_len) { PrintDebug("Xed Register: %s\n", xed_reg_enum_t2str(xed_reg));