+++ /dev/null
-/*BEGIN_LEGAL
-Intel Open Source License
-
-Copyright (c) 2002-2007 Intel Corporation
-All rights reserved.
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer. Redistributions
-in binary form must reproduce the above copyright notice, this list of
-conditions and the following disclaimer in the documentation and/or
-other materials provided with the distribution. Neither the name of
-the Intel Corporation nor the names of its contributors may be used to
-endorse or promote products derived from this software without
-specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE INTEL OR
-ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-END_LEGAL */
-/// @file xed-enc-lang.cpp
-/// @author Mark Charney <mark.charney@intel.com>
-
-// This is an example of how to use the encoder from scratch in the context
-// of parsing a string from the command line.
-
-
-#include <iostream>
-#include <iomanip>
-#include <vector>
-#include <sstream>
-#include <cassert>
-extern "C" {
-#include "xed-interface.h"
-#include "xed-portability.h"
-#include "xed-examples-util.h"
-}
-#include "xed-enc-lang.H"
-
-
-
-
-using namespace std;
-static char xed_enc_lang_toupper(char c) {
- if (c >= 'a' && c <= 'z')
- return c-'a'+'A';
- return c;
-}
-
-static string upcase(string s) {
- string t = "";
- xed_uint_t len = static_cast<xed_uint_t>(s.size());
- xed_uint_t i;
- for(i=0 ; i < len ; i++ )
- t = t + xed_enc_lang_toupper(s[i]);
- return t;
-}
-
-unsigned int
-xed_split_args(const string& sep,
- const string& input,
- vector<string>& output_array)
-{
- // returns the number of args
- // rip off the separator characters and split the src string based on separators.
-
- // find the string between last_pos and pos. pos is after last_pos
- string::size_type last_pos = input.find_first_not_of(sep, 0);
- string::size_type pos = input.find_first_of(sep, last_pos);
- if (CLIENT_VERBOSE3)
- printf("input %s\tlast_pos " XED_FMT_U " pos " XED_FMT_U "\n",
- input.c_str() , STATIC_CAST(xed_uint_t,last_pos), STATIC_CAST(xed_uint_t,pos));
- int i=0;
- while( pos != string::npos && last_pos != string::npos )
- {
- string a = input.substr(last_pos, pos-last_pos);
- output_array.push_back(a);
- if (CLIENT_VERBOSE3)
- printf("\t\tlast_pos " XED_FMT_U " pos " XED_FMT_U " i %d\n",
- STATIC_CAST(xed_uint_t,last_pos),
- STATIC_CAST(xed_uint_t,pos),
- i);
- last_pos = input.find_first_not_of(sep, pos);
- pos = input.find_first_of(sep, last_pos);
- i++;
- }
- if (last_pos != string::npos && pos == string::npos)
- {
- if (CLIENT_VERBOSE3)
- printf("\t\tGetting last substring at " XED_FMT_U "\n", STATIC_CAST(xed_uint_t,last_pos));
- string a = input.substr(last_pos); // get the rest of the string
- output_array.push_back(a);
- i++;
- }
- if (CLIENT_VERBOSE3)
- printf("\t returning %d\n",i);
- return i;
-}
-
-vector<string>
-tokenize(const string& s,
- const string& delimiter) {
- vector<string> v;
- (void) xed_split_args(delimiter, s, v);
- return v;
-}
-
-
-void slash_split(const string& src,
- string& first, // output
- string& second) //output
-{
- string::size_type p = src.find("/");
- if (p == string::npos) {
- first = src;
- second = "";
- }
- else {
- first = src.substr(0,p);
- second = src.substr(p+1);
- }
-}
-
-class immed_parser_t {
- public:
- xed_bool_t valid;
- string immed;
- unsigned int width_bits;
- xed_uint64_t immed_val;
- string tok0;
-
- immed_parser_t(const string& s,
- const string& arg_tok0) //CONS
- : valid(false),
- tok0(arg_tok0)
- {
- vector<string> vs = tokenize(s,"(),");
- if (vs.size() == 2) {
- if (vs[0] == tok0) {
- string immed_str = vs[1];
- immed_val = convert_ascii_hex_to_int(immed_str.c_str());
- width_bits = static_cast<unsigned int>(immed_str.size()*4); // nibbles to bits
- valid = true;
- }
- }
- }
-
- void
- print(ostream& o) const {
- o << tok0
- << "(" ;
- if (valid)
- o << hex << immed_val << dec;
- else
- o << "???";
- o << ")";
- }
-
-};
-
-ostream& operator<<(ostream& o, const immed_parser_t& x)
-{
- x.print(o);
- return o;
-}
-
-
-class seg_parser_t
-{
- public:
- xed_bool_t valid;
- xed_reg_enum_t segment_reg;
- string segment;
-
- seg_parser_t(const string& s) // CONS
- : valid(false),
- segment_reg(XED_REG_INVALID)
- {
- vector<string> vs = tokenize(s,"(),");
- xed_uint_t ntokens = static_cast<xed_uint_t>(vs.size());
- cerr << "ntokens " << ntokens << ": " ;
- for(unsigned int i=0;i<ntokens;i++) cerr << '[' << vs[i] << "] ";
- cerr << endl;
- if (ntokens == 2 && vs[0] == "SEG") {
- segment = vs[1];
- segment_reg = str2xed_reg_enum_t(segment.c_str());
- if (segment_reg != XED_REG_INVALID && xed_reg_class(segment_reg) == XED_REG_CLASS_SR) {
- valid=true;
- }
- }
- }
-
-};
-
-class mem_bis_parser_t
-{
- // parse: MEM[length]([segment:]base,index,scale[,displacement])
- // parse: AGEN(base,index,scale[,displacement])
- // The displacement is optional
- // The length of the memop is usually optional
- // but required for x87 ops, for example.
- public:
- xed_bool_t valid;
- xed_bool_t mem;
-
- xed_bool_t agen;
- xed_bool_t disp_valid;
- string segment;
- string base;
- string index;
- string scale;
- string disp; //displacement
- xed_reg_enum_t segment_reg;
- xed_reg_enum_t base_reg;
- xed_reg_enum_t index_reg;
- xed_uint8_t scale_val;
-
-
- xed_int64_t disp_val;
- unsigned int disp_width_bits;
-
- unsigned int mem_len;
-
- void
- print(ostream& o) const {
- if (agen)
- o << "AGEN";
- if (mem)
- o << "MEM";
- if (mem_len)
- o << setw(1) << mem_len;
- o << "(";
- if (segment_reg != XED_REG_INVALID)
- o << segment_reg << ":";
- o << base_reg;
- o << "," << index_reg
- << ","
- << (unsigned int) scale_val;
- if (disp_valid)
- o << "," << disp;
- o << ")";
-
- }
-
- mem_bis_parser_t(const string& s) //CONS
- : valid(false),
- disp_valid(false),
- base("INVALID"),
- index("INVALID"),
- scale("1"),
- segment_reg(XED_REG_INVALID),
- base_reg(XED_REG_INVALID),
- index_reg(XED_REG_INVALID),
- disp_val(0),
- disp_width_bits(0),
- mem_len(0)
- {
-
- mem = false;
- agen = false;
- vector<string> vs = tokenize(s,"(),");
- xed_uint_t ntokens = static_cast<xed_uint_t>(vs.size());
- if (ntokens >= 2 && ntokens <= 5) {
- if (vs[0] == "AGEN") {
- agen = true;
- }
- else if (vs[0].substr(0,3) == "MEM") {
- mem = true;
- if (vs[0].size() > 3) {
- string len = vs[0].substr(3);
- mem_len = strtol(len.c_str(),0,0);
- //printf("mem_len = " XED_FMT_U "\n", mem_len);
- }
- }
- else {
- return;
- }
-
- segment = "INVALID";
- string seg_and_base = upcase(vs[1]);
- vector<string> sb = tokenize(seg_and_base,":");
- int seg_and_base_tokens = STATIC_CAST(int,sb.size());
- if (seg_and_base_tokens == 1) {
- segment = "INVALID";
- base = sb[0];
- }
- else if (seg_and_base_tokens == 2) {
- if (agen) {
- xedex_derror("AGENs cannot have segment overrides");
- }
- segment = sb[0];
- base = sb[1];
- }
- else {
- printf("seg_and_base_tokens = %d\n",seg_and_base_tokens);
- xedex_derror("Bad segment-and-base specifier.");
- }
-
- if (base == "-" || base == "NA") {
- base = "INVALID";
- }
- if (ntokens > 2) {
- index = upcase(vs[2]);
- if (index == "-" || index == "NA") {
- index = "INVALID";
- }
- }
-
- if (ntokens > 3) {
- scale = vs[3];
- if (scale == "-" || scale == "NA") {
- scale = "1";
- }
- }
- if (scale == "1" || scale == "2" || scale == "4" || scale == "8") {
- valid=true;
- scale_val = STATIC_CAST(xed_uint8_t,strtol(scale.c_str(), 0, 10));
- segment_reg = str2xed_reg_enum_t(segment.c_str());
- base_reg = str2xed_reg_enum_t(base.c_str());
- index_reg = str2xed_reg_enum_t(index.c_str());
-
- // look for a displacement
- if (ntokens == 5 && vs[4] != "-") {
- disp = vs[4];
- disp_valid = true;
- unsigned int nibbles = STATIC_CAST(int,disp.size());
- if (nibbles & 1) {
- // ensure an even number of nibbles
- string zero("0");
- disp = zero + disp;
- nibbles++;
- }
- disp_val = convert_ascii_hex_to_int(disp.c_str());
- disp_width_bits = nibbles*4; // nibbles to bits
- }
- }
-
- }
-
- }
-};
-
-ostream& operator<<(ostream& o, const mem_bis_parser_t& x) {
- x.print(o);
- return o;
-}
-
-xed_encoder_request_t parse_encode_request(ascii_encode_request_t& areq) {
- unsigned int i;
- xed_encoder_request_t req;
- xed_encoder_request_zero_set_mode(&req,&(areq.dstate)); // calls xed_encoder_request_zero()
-
- /* This is the important function here. This encodes an instruction from scratch.
-
- You must set:
- the machine mode (machine width, addressing widths)
- the effective operand width
- the iclass
- for some instructions you need to specify prefixes (like REP or LOCK).
- the operands:
- operand kind (XED_OPERAND_{AGEN,MEM0,MEM1,IMM0,IMM1,RELBR,PTR,REG0...REG15}
- operand order
- xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_*);
- where the operand_index is a sequential index starting at zero.
-
- operand details
- FOR MEMOPS: base,segment,index,scale,displacement for memops,
- FOR REGISTERS: register name
- FOR IMMEDIATES: immediate values
-
- */
-
-
- switch(xed_state_get_machine_mode(&(areq.dstate))) {
- // set the default width.
- case XED_MACHINE_MODE_LONG_64:
- xed_encoder_request_set_effective_operand_width(&req, 32);
- xed_encoder_request_set_effective_address_size(&req, 64);
- break;
-
- case XED_MACHINE_MODE_LEGACY_32:
- case XED_MACHINE_MODE_LONG_COMPAT_32:
- xed_encoder_request_set_effective_operand_width(&req, 32);
- xed_encoder_request_set_effective_address_size(&req, 32);
- break;
-
- case XED_MACHINE_MODE_LEGACY_16:
- case XED_MACHINE_MODE_LONG_COMPAT_16:
- xed_encoder_request_set_effective_operand_width(&req, 16);
- xed_encoder_request_set_effective_address_size(&req, 16);
- break;
-
- default:
- assert(0);
- }
-
- //FIXME: allow changing the effective address size from the above defaults.
-
- vector<string> tokens = tokenize(areq.command," ");
- // first token has the operand and our temporary hack for the immediate
-
- string first, second;
- unsigned int token_index = 0;
-
- while(token_index < tokens.size()) {
- slash_split(tokens[token_index], first, second);
- if (CLIENT_VERBOSE3)
- printf( "[%s][%s][%s]\n", tokens[0].c_str(), first.c_str(), second.c_str());
-
- if (token_index == 0 && first == "REP") {
- xed_encoder_request_set_rep(&req);
- token_index++;
- continue;
- }
- else if (token_index == 0 && first == "REPNE") {
- xed_encoder_request_set_repne(&req);
- token_index++;
- continue;
- }
-
- token_index++;
- break;
- }
-
- // we can attempt to override the mode
- if (second == "8")
- xed_encoder_request_set_effective_operand_width(&req, 8);
- else if (second == "16")
- xed_encoder_request_set_effective_operand_width(&req, 16);
- else if (second == "32")
- xed_encoder_request_set_effective_operand_width(&req, 32);
- else if (second == "64")
- xed_encoder_request_set_effective_operand_width(&req, 64);
-
- first = upcase(first);
- xed_iclass_enum_t iclass = str2xed_iclass_enum_t(first.c_str());
- if (iclass == XED_ICLASS_INVALID) {
- ostringstream os;
- os << "Bad instruction name: " << first;
- xedex_derror(os.str().c_str());
- }
- xed_encoder_request_set_iclass(&req, iclass );
-
- xed_uint_t memop = 0;
- xed_uint_t regnum = 0;
- // put the operands in the request. Loop through tokens
- // (skip the opcode iclass, handled above)
- xed_uint_t operand_index = 0;
- for( i=token_index; i < tokens.size(); i++, operand_index++ ) {
- string str_res_reg, second_x;
- slash_split(tokens[i], str_res_reg, second_x);
- str_res_reg = upcase(str_res_reg);
- // prune the AGEN or MEM(base,index,scale[,displacement]) text from str_res_reg
- // FIXME: add MEM(immed) for the OC1_A and OC1_O types????
- mem_bis_parser_t mem_bis(str_res_reg);
- if (mem_bis.valid) {
- if (mem_bis.mem) {
- if (memop == 0) {
- // Tell XED that we have a memory operand
- xed_encoder_request_set_mem0(&req);
- // Tell XED that the mem0 operand is the next operand:
- xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_MEM0);
- }
- else {
- xed_encoder_request_set_mem1(&req);
- // Tell XED that the mem1 operand is the next operand:
- xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_MEM1);
- }
- memop++;
- }
- else if (mem_bis.agen) {
- // Tell XED we have an AGEN
- xed_encoder_request_set_agen(&req);
- // The AGEN is the next operand
- xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_AGEN);
- }
- else
- assert(mem_bis.agen || mem_bis.mem);
-
- xed_reg_class_enum_t rc = xed_gpr_reg_class(mem_bis.base_reg);
- xed_reg_class_enum_t rci = xed_gpr_reg_class(mem_bis.index_reg);
- if (mem_bis.base_reg != XED_REG_INVALID && mem_bis.index_reg != XED_REG_INVALID)
- if (rc != rci) {
- ostringstream os;
- os << "The base and index regs do not agree on the address size" << endl;
- xedex_derror(os.str().c_str()); // dies
- }
-
- if (rc == XED_REG_CLASS_GPR32 || rci == XED_REG_CLASS_GPR32)
- xed_encoder_request_set_effective_address_size(&req, 32);
-
- // fill in the memory fields
- xed_encoder_request_set_base0(&req, mem_bis.base_reg);
- xed_encoder_request_set_index(&req, mem_bis.index_reg);
- xed_encoder_request_set_scale(&req, mem_bis.scale_val);
- xed_encoder_request_set_seg0(&req, mem_bis.segment_reg);
-
- if (mem_bis.mem_len)
- xed_encoder_request_set_memory_operand_length(&req, mem_bis.mem_len ); // BYTES
- if (mem_bis.disp_valid)
- xed_encoder_request_set_memory_displacement(&req,
- mem_bis.disp_val,
- mem_bis.disp_width_bits/8);
- continue;
- }
-
- seg_parser_t seg_parser(str_res_reg);
- if (seg_parser.valid) {
- printf("Setting segment to %s\n", xed_reg_enum_t2str(seg_parser.segment_reg));
- xed_encoder_request_set_seg0(&req, seg_parser.segment_reg);
- xed_encoder_request_set_operand_order(&req, operand_index, XED_OPERAND_SEG0);
- continue;
- }
-
- immed_parser_t imm(str_res_reg, "IMM");
- if (imm.valid) {
- if (CLIENT_VERBOSE3)
- printf("Setting immediate value to " XED_FMT_LX "\n", imm.immed_val);
- xed_encoder_request_set_uimm0_bits(&req,
- imm.immed_val,
- imm.width_bits);
- xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_IMM0);
- continue;
- }
- immed_parser_t simm(str_res_reg, "SIMM");
- if (simm.valid) {
- if (CLIENT_VERBOSE3)
- printf("Setting immediate value to " XED_FMT_LX "\n", simm.immed_val);
- xed_encoder_request_set_simm(&req,
- STATIC_CAST(xed_int32_t,simm.immed_val),
- simm.width_bits/8); //FIXME
- xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_IMM0);
- continue;
- }
- immed_parser_t imm2(str_res_reg, "IMM2");
- if (imm2.valid) {
- if (imm2.width_bits != 8)
- xedex_derror("2nd immediate must be just 1 byte long");
- xed_encoder_request_set_uimm1(&req, imm2.immed_val);
- xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_IMM1);
- continue;
- }
-
- immed_parser_t disp(str_res_reg, "BRDISP");
- if (disp.valid) {
- if (CLIENT_VERBOSE3)
- printf("Setting displacement value to " XED_FMT_LX "\n", disp.immed_val);
- xed_encoder_request_set_branch_displacement(&req,
- STATIC_CAST(xed_uint32_t,disp.immed_val),
- disp.width_bits/8); //FIXME
- xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_RELBR);
- xed_encoder_request_set_relbr(&req);
- continue;
- }
-
- immed_parser_t ptr_disp(str_res_reg, "PTR");
- if (ptr_disp.valid) {
- if (CLIENT_VERBOSE3)
- printf("Setting pointer displacement value to " XED_FMT_LX "\n", ptr_disp.immed_val);
- xed_encoder_request_set_branch_displacement(&req,
- STATIC_CAST(xed_uint32_t,ptr_disp.immed_val),
- ptr_disp.width_bits/8); //FIXME
- xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_PTR);
- xed_encoder_request_set_ptr(&req);
- continue;
- }
-
- xed_reg_enum_t reg = str2xed_reg_enum_t(str_res_reg.c_str());
- if (reg == XED_REG_INVALID) {
- ostringstream os;
- os << "Bad register name: " << str_res_reg << " on operand " << i;
- xedex_derror(os.str().c_str()); // dies
- }
- // The registers operands aer numbered starting from the first one
- // as XED_OPERAND_REG0. We incremenet regnum (below) every time we add a
- // register operands.
- xed_operand_enum_t r = STATIC_CAST(xed_operand_enum_t,XED_OPERAND_REG0 + regnum);
- // store the register identifer in the operand storage field
- xed_encoder_request_set_reg(&req, r, reg);
- // store the operand storage field name in the encode-order array
- xed_encoder_request_set_operand_order(&req, operand_index, r);
- regnum++;
- } // for loop
-
- return req;
-}