#include <palacios/vmm_decoder.h>
#include <palacios/vmm_instr_decoder.h>
+#ifndef V3_CONFIG_DEBUG_DECODER
+#undef PrintDebug
+#define PrintDebug(fmt, args...)
+#endif
+
+
+#define MASK(val, length) ({ \
+ uint64_t mask = 0x0LL; \
+ switch (length) { \
+ case 1: \
+ mask = 0x00000000000000ffLL; \
+ break; \
+ case 2: \
+ mask = 0x000000000000ffffLL; \
+ break; \
+ case 4: \
+ mask = 0x00000000ffffffffLL; \
+ break; \
+ case 8: \
+ mask = 0xffffffffffffffffLL; \
+ break; \
+ } \
+ val & mask; \
+ })
static v3_op_type_t op_form_to_type(op_form_t form);
static int parse_operands(struct guest_info * core, uint8_t * instr_ptr, struct x86_instr * instr, op_form_t form);
int v3_disasm(struct guest_info * info, void *instr_ptr, addr_t * rip, int mark) {
- return 0;
+ return -1;
}
-
int v3_init_decoder(struct guest_info * core) {
return 0;
}
int v3_encode(struct guest_info * info, struct x86_instr * instr, uint8_t * instr_buf) {
- return 0;
+ return -1;
}
+
int v3_decode(struct guest_info * core, addr_t instr_ptr, struct x86_instr * instr) {
- op_form_t form;
+ op_form_t form = INVALID_INSTR;
+ int ret = 0;
+ int length = 0;
+
+
+ PrintDebug(core->vm_info, core, "Decoding Instruction at %p\n", (void *)instr_ptr);
memset(instr, 0, sizeof(struct x86_instr));
// scan for prefixes
- instr_ptr += v3_get_prefixes((uint8_t *)instr_ptr, &(instr->prefixes));
+ length = v3_get_prefixes((uint8_t *)instr_ptr, &(instr->prefixes));
- // check for REX prefix
+ // REX prefix
+ if (v3_get_vm_cpu_mode(core) == LONG) {
+ uint8_t prefix = *(uint8_t *)(instr_ptr + length);
+ if ((prefix & 0xf0) == 0x40) {
+ instr->prefixes.rex = 1;
- form = op_code_to_form((uint8_t *)instr_ptr);
- instr->op_type = op_form_to_type(form);
+ instr->prefixes.rex_rm = (prefix & 0x01);
+ instr->prefixes.rex_sib_idx = ((prefix & 0x02) >> 1);
+ instr->prefixes.rex_reg = ((prefix & 0x04) >> 2);
+ instr->prefixes.rex_op_size = ((prefix & 0x08) >> 3);
- parse_operands(core, (uint8_t *)instr_ptr, instr, form);
+ length += 1;
+ }
+ }
- return 0;
-}
+ form = op_code_to_form((uint8_t *)(instr_ptr + length), &length);
+ PrintDebug(core->vm_info, core, "\t decoded as (%s)\n", op_form_to_str(form));
+ if (form == INVALID_INSTR) {
+ PrintError(core->vm_info, core, "Could not find instruction form (%x)\n", *(uint32_t *)(instr_ptr + length));
+ return -1;
+ }
+ instr->op_type = op_form_to_type(form);
+
+ ret = parse_operands(core, (uint8_t *)(instr_ptr + length), instr, form);
+
+ if (ret == -1) {
+ PrintError(core->vm_info, core, "Could not parse instruction operands\n");
+ return -1;
+ }
+ length += ret;
+
+ instr->instr_length += length;
+
+#ifdef V3_CONFIG_DEBUG_DECODER
+ V3_Print(core->vm_info, core, "Decoding Instr at %p\n", (void *)core->rip);
+ v3_print_instr(instr);
+ V3_Print(core->vm_info, core, "CS DB FLag=%x\n", core->segments.cs.db);
+#endif
+
+ return 0;
+}
-static int parse_operands(struct guest_info * core, uint8_t * instr_ptr, struct x86_instr * instr, op_form_t form) {
+static int parse_operands(struct guest_info * core, uint8_t * instr_ptr,
+ struct x86_instr * instr, op_form_t form) {
// get operational mode of the guest for operand width
- int operand_width = get_operand_width(core, instr, form);
+ uint8_t operand_width = get_operand_width(core, instr, form);
+ uint8_t addr_width = get_addr_width(core, instr);
int ret = 0;
-
+ uint8_t * instr_start = instr_ptr;
- switch (form) {
+ PrintDebug(core->vm_info, core, "\tOperand width=%d, Addr width=%d\n", operand_width, addr_width);
+
+ switch (form) {
case ADC_IMM2_8:
case ADD_IMM2_8:
case AND_IMM2_8:
case AND_IMM2:
case OR_IMM2:
case SUB_IMM2:
- case XOR_IMM2:
- case MOV_IMM2:{
- uint8_t reg_code = 0;;
- instr->dst_operand.size = operand_width;
+ case XOR_IMM2:
+ case MOV_IMM2: {
+ uint8_t reg_code = 0;
- ret = decode_rm_operand(core, instr_ptr, &(instr->dst_operand), ®_code);
+ ret = decode_rm_operand(core, instr_ptr, form, instr, &(instr->dst_operand), ®_code);
if (ret == -1) {
- PrintError("Error decoding operand\n");
+ PrintError(core->vm_info, core, "Error decoding operand\n");
return -1;
}
instr->src_operand.type = IMM_OPERAND;
instr->src_operand.size = operand_width;
+
if (operand_width == 1) {
instr->src_operand.operand = *(uint8_t *)instr_ptr;
} else if (operand_width == 2) {
instr->src_operand.operand = *(uint16_t *)instr_ptr;
} else if (operand_width == 4) {
instr->src_operand.operand = *(uint32_t *)instr_ptr;
+ } else if (operand_width == 8) {
+ instr->src_operand.operand = *(sint32_t *)instr_ptr; // This is a special case for sign extended 64bit ops
} else {
- PrintError("Illegal operand width (%d)\n", operand_width);
+ PrintError(core->vm_info, core, "Illegal operand width (%d)\n", operand_width);
return -1;
}
+ instr->src_operand.read = 1;
+ instr->dst_operand.write = 1;
+
+ instr_ptr += operand_width;
+
+ instr->num_operands = 2;
+
break;
}
case ADC_2MEM_8:
case MOV_2MEM: {
uint8_t reg_code = 0;
- instr->dst_operand.size = operand_width;
-
- ret = decode_rm_operand(core, instr_ptr, &(instr->dst_operand), ®_code);
+ ret = decode_rm_operand(core, instr_ptr, form, instr, &(instr->dst_operand), ®_code);
if (ret == -1) {
- PrintError("Error decoding operand\n");
+ PrintError(core->vm_info, core, "Error decoding operand\n");
return -1;
}
instr->src_operand.type = REG_OPERAND;
instr->src_operand.size = operand_width;
- decode_gpr(&(core->vm_regs), reg_code, &(instr->src_operand));
+ instr->src_operand.read = 1;
+ instr->dst_operand.write = 1;
+
+
+ decode_gpr(core, reg_code, &(instr->src_operand));
+
+ instr->num_operands = 2;
break;
}
-
case ADC_MEM2_8:
case ADD_MEM2_8:
case AND_MEM2_8:
case XOR_MEM2:
case MOV_MEM2: {
uint8_t reg_code = 0;
+
+ ret = decode_rm_operand(core, instr_ptr, form, instr, &(instr->src_operand), ®_code);
+
+ if (ret == -1) {
+ PrintError(core->vm_info, core, "Error decoding operand\n");
+ return -1;
+ }
+
+ instr_ptr += ret;
+
+ instr->dst_operand.size = operand_width;
+ instr->dst_operand.type = REG_OPERAND;
+ decode_gpr(core, reg_code, &(instr->dst_operand));
+
+ instr->src_operand.read = 1;
+ instr->dst_operand.write = 1;
+
+ instr->num_operands = 2;
+
+ break;
+ }
+ case MOV_MEM2AL_8:
+ case MOV_MEM2AX: {
+
+ /* Use AX for destination operand */
+ instr->dst_operand.size = operand_width;
+ instr->dst_operand.type = REG_OPERAND;
+ instr->dst_operand.operand = (addr_t)&(core->vm_regs.rax);
+ instr->dst_operand.write = 1;
+
+ /* Get the correct offset -- (seg + offset) */
+ struct v3_segment * src_reg = get_instr_segment(core, instr);
+ addr_t offset = 0;
+
+ if (addr_width == 2) {
+ offset = *(uint16_t *)instr_ptr;
+ } else if (addr_width == 4) {
+ offset = *(uint32_t *)instr_ptr;
+ } else if (addr_width == 8) {
+ offset = *(uint64_t *)instr_ptr;
+ } else {
+ PrintError(core->vm_info, core, "illegal address width for %s (width=%d)\n",
+ op_form_to_str(form), addr_width);
+ return -1;
+ }
+
+ instr->src_operand.operand = ADDR_MASK(get_addr_linear(core, offset, src_reg),
+ get_addr_width(core, instr));
+
+ instr->src_operand.read = 1;
+ instr->src_operand.type = MEM_OPERAND;
+ instr->src_operand.size = addr_width;
+
+ instr_ptr += addr_width;
+ instr->num_operands = 2;
+
+ break;
+ }
+ case MOV_AL2MEM_8:
+ case MOV_AX2MEM: {
+
+ /* Use AX for src operand */
instr->src_operand.size = operand_width;
+ instr->src_operand.type = REG_OPERAND;
+ instr->src_operand.operand = (addr_t)&(core->vm_regs.rax);
+ instr->src_operand.write = 1;
+
+ /* Get the correct offset -- (seg + offset) */
+ struct v3_segment * dst_reg = get_instr_segment(core, instr);
+ addr_t offset = 0;
+
+ if (addr_width == 2) {
+ offset = *(uint16_t *)instr_ptr;
+ } else if (addr_width == 4) {
+ offset = *(uint32_t *)instr_ptr;
+ } else if (addr_width == 8) {
+ offset = *(uint64_t *)instr_ptr;
+ } else {
+ PrintError(core->vm_info, core, "illegal address width for %s (width=%d)\n",
+ op_form_to_str(form), addr_width);
+ return -1;
+ }
- ret = decode_rm_operand(core, instr_ptr, &(instr->src_operand), ®_code);
+ instr->dst_operand.operand = ADDR_MASK(get_addr_linear(core, offset, dst_reg),
+ get_addr_width(core, instr));
+
+ instr->dst_operand.read = 1;
+ instr->dst_operand.type = MEM_OPERAND;
+ instr->dst_operand.size = addr_width;
+
+ instr_ptr += addr_width;
+ instr->num_operands = 2;
+
+ break;
+ }
+ case MOVSX_8:
+ case MOVZX_8: {
+ uint8_t reg_code = 0;
+
+ ret = decode_rm_operand(core, instr_ptr, form, instr, &(instr->src_operand), ®_code);
+ instr->src_operand.size = 1;
if (ret == -1) {
- PrintError("Error decoding operand\n");
+ PrintError(core->vm_info, core, "Error decoding operand\n");
return -1;
}
instr->dst_operand.size = operand_width;
instr->dst_operand.type = REG_OPERAND;
- decode_gpr(&(core->vm_regs), reg_code, &(instr->dst_operand));
+ decode_gpr(core, reg_code, &(instr->dst_operand));
+
+ instr->src_operand.read = 1;
+ instr->dst_operand.write = 1;
+
+ instr->num_operands = 2;
break;
}
+ case MOVSX:
+ case MOVZX: {
+ uint8_t reg_code = 0;
+
+ ret = decode_rm_operand(core, instr_ptr, form, instr, &(instr->src_operand), ®_code);
+ instr->src_operand.size = 2;
+ if (ret == -1) {
+ PrintError(core->vm_info, core, "Error decoding operand\n");
+ return -1;
+ }
+
+ instr_ptr += ret;
+
+ instr->dst_operand.size = operand_width;
+ instr->dst_operand.type = REG_OPERAND;
+ decode_gpr(core, reg_code, &(instr->dst_operand));
+
+ instr->src_operand.read = 1;
+ instr->dst_operand.write = 1;
+ instr->num_operands = 2;
+
+ break;
+ }
case ADC_IMM2SX_8:
case ADD_IMM2SX_8:
case AND_IMM2SX_8:
case SUB_IMM2SX_8:
case XOR_IMM2SX_8: {
uint8_t reg_code = 0;
- instr->src_operand.size = operand_width;
- ret = decode_rm_operand(core, instr_ptr, &(instr->src_operand), ®_code);
+ ret = decode_rm_operand(core, instr_ptr, form, instr, &(instr->dst_operand), ®_code);
if (ret == -1) {
- PrintError("Error decoding operand\n");
+ PrintError(core->vm_info, core, "Error decoding operand\n");
return -1;
}
instr->src_operand.type = IMM_OPERAND;
instr->src_operand.size = operand_width;
- instr->src_operand.operand = *(sint8_t *)instr_ptr; // sign extend.
+ instr->src_operand.operand = (addr_t)MASK((sint64_t)*(sint8_t *)instr_ptr, operand_width); // sign extend.
+
+ instr->src_operand.read = 1;
+ instr->dst_operand.write = 1;
+ instr_ptr += 1;
+ instr->num_operands = 2;
+
+ break;
}
+ case MOVS:
+ case MOVS_8: {
+ instr->is_str_op = 1;
+
+ if (instr->prefixes.rep == 1) {
+ instr->str_op_length = MASK(core->vm_regs.rcx, addr_width);
+ } else {
+ instr->str_op_length = 1;
+ }
+
+ // Source: DS:(E)SI
+ // Destination: ES:(E)DI
+
+ instr->src_operand.type = MEM_OPERAND;
+ instr->src_operand.size = operand_width;
+ instr->src_operand.operand = get_addr_linear(core, MASK(core->vm_regs.rsi, addr_width), &(core->segments.ds));
+
+ instr->dst_operand.type = MEM_OPERAND;
+ instr->dst_operand.size = operand_width;
+ instr->dst_operand.operand = get_addr_linear(core, MASK(core->vm_regs.rdi, addr_width), &(core->segments.es));
+
+
+ instr->src_operand.read = 1;
+ instr->dst_operand.write = 1;
+
+ instr->num_operands = 2;
+
+ break;
+ }
+ case MOV_2CR: {
+ uint8_t reg_code = 0;
+
+ ret = decode_rm_operand(core, instr_ptr, form, instr, &(instr->src_operand),
+ ®_code);
+
+ if (ret == -1) {
+ PrintError(core->vm_info, core, "Error decoding operand for (%s)\n", op_form_to_str(form));
+ return -1;
+ }
+
+ instr_ptr += ret;
+
+ instr->dst_operand.type = REG_OPERAND;
+ instr->dst_operand.size = operand_width;
+ decode_cr(core, reg_code, &(instr->dst_operand));
+
+ instr->src_operand.read = 1;
+ instr->dst_operand.write = 1;
+
+ instr->num_operands = 2;
+ break;
+ }
+ case MOV_CR2: {
+ uint8_t reg_code = 0;
+
+ ret = decode_rm_operand(core, instr_ptr, form, instr, &(instr->dst_operand),
+ ®_code);
+
+ if (ret == -1) {
+ PrintError(core->vm_info, core, "Error decoding operand for (%s)\n", op_form_to_str(form));
+ return -1;
+ }
+
+ instr_ptr += ret;
+
+ instr->src_operand.type = REG_OPERAND;
+ instr->src_operand.size = operand_width;
+ decode_cr(core, reg_code, &(instr->src_operand));
+
+ instr->src_operand.read = 1;
+ instr->dst_operand.write = 1;
+
+ instr->num_operands = 2;
+ break;
+ }
+ case STOS:
+ case STOS_8: {
+ instr->is_str_op = 1;
+
+ if (instr->prefixes.rep == 1) {
+ instr->str_op_length = MASK(core->vm_regs.rcx, addr_width);
+ } else {
+ instr->str_op_length = 1;
+ }
+
+ instr->src_operand.size = operand_width;
+ instr->src_operand.type = REG_OPERAND;
+ instr->src_operand.operand = (addr_t)&(core->vm_regs.rax);
+
+ instr->dst_operand.type = MEM_OPERAND;
+ instr->dst_operand.size = operand_width;
+ instr->dst_operand.operand = get_addr_linear(core, MASK(core->vm_regs.rdi, addr_width), &(core->segments.es));
+
+ instr->src_operand.read = 1;
+ instr->dst_operand.write = 1;
+
+ instr->num_operands = 2;
+
+ break;
+ }
+ case INT: {
+ instr->dst_operand.type = IMM_OPERAND;
+ instr->dst_operand.size = operand_width;
+ instr->dst_operand.operand = *(uint8_t *)instr_ptr;
+ instr_ptr += operand_width;
+ instr->num_operands = 1;
+
+ break;
+ }
+ case INVLPG: {
+ uint8_t reg_code = 0;
+
+ ret = decode_rm_operand(core, instr_ptr, form, instr, &(instr->dst_operand), ®_code);
+
+ if (ret == -1) {
+ PrintError(core->vm_info, core, "Error decoding operand for (%s)\n", op_form_to_str(form));
+ return -1;
+ }
+
+ instr_ptr += ret;
+
+ instr->num_operands = 1;
+ break;
+ }
+ case LMSW:
+ case SMSW: {
+ uint8_t reg_code = 0;
+
+ ret = decode_rm_operand(core, instr_ptr, form, instr, &(instr->dst_operand), ®_code);
+
+ if (ret == -1) {
+ PrintError(core->vm_info, core, "Error decoding operand for (%s)\n", op_form_to_str(form));
+ return -1;
+ }
+
+ instr_ptr += ret;
+
+ instr->dst_operand.read = 1;
+
+ instr->num_operands = 1;
+ break;
+ }
+ case CLTS: {
+ // no operands.
+ break;
+ }
default:
- PrintError("Invalid Instruction form: %d\n", form);
+ PrintError(core->vm_info, core, "Invalid Instruction form: %s\n", op_form_to_str(form));
return -1;
-
}
- return 0;
+ return (instr_ptr - instr_start);
}
+
static v3_op_type_t op_form_to_type(op_form_t form) {
switch (form) {
case LMSW:
case INVLPG:
return V3_OP_INVLPG;
+ case INT:
+ return V3_OP_INT;
+
case MOV_CR2:
return V3_OP_MOVCR2;
case MOV_2CR:
return V3_OP_MOV2CR;
-
case MOV_MEM2_8:
case MOV_MEM2:
case MOV_2MEM_8:
return V3_OP_MOVZX;
-
case ADC_2MEM_8:
case ADC_2MEM:
case ADC_MEM2_8: