2 Intel Open Source License
4 Copyright (c) 2002-2007 Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are
10 Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer. Redistributions
12 in binary form must reproduce the above copyright notice, this list of
13 conditions and the following disclaimer in the documentation and/or
14 other materials provided with the distribution. Neither the name of
15 the Intel Corporation nor the names of its contributors may be used to
16 endorse or promote products derived from this software without
17 specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE INTEL OR
23 ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 /// @file xed-enc-lang.cpp
32 /// @author Mark Charney <mark.charney@intel.com>
34 // This is an example of how to use the encoder from scratch in the context
35 // of parsing a string from the command line.
44 #include "xed-interface.h"
45 #include "xed-portability.h"
46 #include "xed-examples-util.h"
48 #include "xed-enc-lang.H"
54 static char xed_enc_lang_toupper(char c) {
55 if (c >= 'a' && c <= 'z')
60 static string upcase(string s) {
62 xed_uint_t len = static_cast<xed_uint_t>(s.size());
64 for(i=0 ; i < len ; i++ )
65 t = t + xed_enc_lang_toupper(s[i]);
70 xed_split_args(const string& sep,
72 vector<string>& output_array)
74 // returns the number of args
75 // rip off the separator characters and split the src string based on separators.
77 // find the string between last_pos and pos. pos is after last_pos
78 string::size_type last_pos = input.find_first_not_of(sep, 0);
79 string::size_type pos = input.find_first_of(sep, last_pos);
81 printf("input %s\tlast_pos " XED_FMT_U " pos " XED_FMT_U "\n",
82 input.c_str() , STATIC_CAST(xed_uint_t,last_pos), STATIC_CAST(xed_uint_t,pos));
84 while( pos != string::npos && last_pos != string::npos )
86 string a = input.substr(last_pos, pos-last_pos);
87 output_array.push_back(a);
89 printf("\t\tlast_pos " XED_FMT_U " pos " XED_FMT_U " i %d\n",
90 STATIC_CAST(xed_uint_t,last_pos),
91 STATIC_CAST(xed_uint_t,pos),
93 last_pos = input.find_first_not_of(sep, pos);
94 pos = input.find_first_of(sep, last_pos);
97 if (last_pos != string::npos && pos == string::npos)
100 printf("\t\tGetting last substring at " XED_FMT_U "\n", STATIC_CAST(xed_uint_t,last_pos));
101 string a = input.substr(last_pos); // get the rest of the string
102 output_array.push_back(a);
106 printf("\t returning %d\n",i);
111 tokenize(const string& s,
112 const string& delimiter) {
114 (void) xed_split_args(delimiter, s, v);
119 void slash_split(const string& src,
120 string& first, // output
121 string& second) //output
123 string::size_type p = src.find("/");
124 if (p == string::npos) {
129 first = src.substr(0,p);
130 second = src.substr(p+1);
134 class immed_parser_t {
138 unsigned int width_bits;
139 xed_uint64_t immed_val;
142 immed_parser_t(const string& s,
143 const string& arg_tok0) //CONS
147 vector<string> vs = tokenize(s,"(),");
148 if (vs.size() == 2) {
150 string immed_str = vs[1];
151 immed_val = convert_ascii_hex_to_int(immed_str.c_str());
152 width_bits = static_cast<unsigned int>(immed_str.size()*4); // nibbles to bits
159 print(ostream& o) const {
163 o << hex << immed_val << dec;
171 ostream& operator<<(ostream& o, const immed_parser_t& x)
182 xed_reg_enum_t segment_reg;
185 seg_parser_t(const string& s) // CONS
187 segment_reg(XED_REG_INVALID)
189 vector<string> vs = tokenize(s,"(),");
190 xed_uint_t ntokens = static_cast<xed_uint_t>(vs.size());
191 cerr << "ntokens " << ntokens << ": " ;
192 for(unsigned int i=0;i<ntokens;i++) cerr << '[' << vs[i] << "] ";
194 if (ntokens == 2 && vs[0] == "SEG") {
196 segment_reg = str2xed_reg_enum_t(segment.c_str());
197 if (segment_reg != XED_REG_INVALID && xed_reg_class(segment_reg) == XED_REG_CLASS_SR) {
205 class mem_bis_parser_t
207 // parse: MEM[length]([segment:]base,index,scale[,displacement])
208 // parse: AGEN(base,index,scale[,displacement])
209 // The displacement is optional
210 // The length of the memop is usually optional
211 // but required for x87 ops, for example.
217 xed_bool_t disp_valid;
222 string disp; //displacement
223 xed_reg_enum_t segment_reg;
224 xed_reg_enum_t base_reg;
225 xed_reg_enum_t index_reg;
226 xed_uint8_t scale_val;
229 xed_int64_t disp_val;
230 unsigned int disp_width_bits;
232 unsigned int mem_len;
235 print(ostream& o) const {
241 o << setw(1) << mem_len;
243 if (segment_reg != XED_REG_INVALID)
244 o << segment_reg << ":";
246 o << "," << index_reg
248 << (unsigned int) scale_val;
255 mem_bis_parser_t(const string& s) //CONS
261 segment_reg(XED_REG_INVALID),
262 base_reg(XED_REG_INVALID),
263 index_reg(XED_REG_INVALID),
271 vector<string> vs = tokenize(s,"(),");
272 xed_uint_t ntokens = static_cast<xed_uint_t>(vs.size());
273 if (ntokens >= 2 && ntokens <= 5) {
274 if (vs[0] == "AGEN") {
277 else if (vs[0].substr(0,3) == "MEM") {
279 if (vs[0].size() > 3) {
280 string len = vs[0].substr(3);
281 mem_len = strtol(len.c_str(),0,0);
282 //printf("mem_len = " XED_FMT_U "\n", mem_len);
290 string seg_and_base = upcase(vs[1]);
291 vector<string> sb = tokenize(seg_and_base,":");
292 int seg_and_base_tokens = STATIC_CAST(int,sb.size());
293 if (seg_and_base_tokens == 1) {
297 else if (seg_and_base_tokens == 2) {
299 xedex_derror("AGENs cannot have segment overrides");
305 printf("seg_and_base_tokens = %d\n",seg_and_base_tokens);
306 xedex_derror("Bad segment-and-base specifier.");
309 if (base == "-" || base == "NA") {
313 index = upcase(vs[2]);
314 if (index == "-" || index == "NA") {
321 if (scale == "-" || scale == "NA") {
325 if (scale == "1" || scale == "2" || scale == "4" || scale == "8") {
327 scale_val = STATIC_CAST(xed_uint8_t,strtol(scale.c_str(), 0, 10));
328 segment_reg = str2xed_reg_enum_t(segment.c_str());
329 base_reg = str2xed_reg_enum_t(base.c_str());
330 index_reg = str2xed_reg_enum_t(index.c_str());
332 // look for a displacement
333 if (ntokens == 5 && vs[4] != "-") {
336 unsigned int nibbles = STATIC_CAST(int,disp.size());
338 // ensure an even number of nibbles
343 disp_val = convert_ascii_hex_to_int(disp.c_str());
344 disp_width_bits = nibbles*4; // nibbles to bits
353 ostream& operator<<(ostream& o, const mem_bis_parser_t& x) {
358 xed_encoder_request_t parse_encode_request(ascii_encode_request_t& areq) {
360 xed_encoder_request_t req;
361 xed_encoder_request_zero_set_mode(&req,&(areq.dstate)); // calls xed_encoder_request_zero()
363 /* This is the important function here. This encodes an instruction from scratch.
366 the machine mode (machine width, addressing widths)
367 the effective operand width
369 for some instructions you need to specify prefixes (like REP or LOCK).
371 operand kind (XED_OPERAND_{AGEN,MEM0,MEM1,IMM0,IMM1,RELBR,PTR,REG0...REG15}
373 xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_*);
374 where the operand_index is a sequential index starting at zero.
377 FOR MEMOPS: base,segment,index,scale,displacement for memops,
378 FOR REGISTERS: register name
379 FOR IMMEDIATES: immediate values
384 switch(xed_state_get_machine_mode(&(areq.dstate))) {
385 // set the default width.
386 case XED_MACHINE_MODE_LONG_64:
387 xed_encoder_request_set_effective_operand_width(&req, 32);
388 xed_encoder_request_set_effective_address_size(&req, 64);
391 case XED_MACHINE_MODE_LEGACY_32:
392 case XED_MACHINE_MODE_LONG_COMPAT_32:
393 xed_encoder_request_set_effective_operand_width(&req, 32);
394 xed_encoder_request_set_effective_address_size(&req, 32);
397 case XED_MACHINE_MODE_LEGACY_16:
398 case XED_MACHINE_MODE_LONG_COMPAT_16:
399 xed_encoder_request_set_effective_operand_width(&req, 16);
400 xed_encoder_request_set_effective_address_size(&req, 16);
407 //FIXME: allow changing the effective address size from the above defaults.
409 vector<string> tokens = tokenize(areq.command," ");
410 // first token has the operand and our temporary hack for the immediate
412 string first, second;
413 unsigned int token_index = 0;
415 while(token_index < tokens.size()) {
416 slash_split(tokens[token_index], first, second);
418 printf( "[%s][%s][%s]\n", tokens[0].c_str(), first.c_str(), second.c_str());
420 if (token_index == 0 && first == "REP") {
421 xed_encoder_request_set_rep(&req);
425 else if (token_index == 0 && first == "REPNE") {
426 xed_encoder_request_set_repne(&req);
435 // we can attempt to override the mode
437 xed_encoder_request_set_effective_operand_width(&req, 8);
438 else if (second == "16")
439 xed_encoder_request_set_effective_operand_width(&req, 16);
440 else if (second == "32")
441 xed_encoder_request_set_effective_operand_width(&req, 32);
442 else if (second == "64")
443 xed_encoder_request_set_effective_operand_width(&req, 64);
445 first = upcase(first);
446 xed_iclass_enum_t iclass = str2xed_iclass_enum_t(first.c_str());
447 if (iclass == XED_ICLASS_INVALID) {
449 os << "Bad instruction name: " << first;
450 xedex_derror(os.str().c_str());
452 xed_encoder_request_set_iclass(&req, iclass );
454 xed_uint_t memop = 0;
455 xed_uint_t regnum = 0;
456 // put the operands in the request. Loop through tokens
457 // (skip the opcode iclass, handled above)
458 xed_uint_t operand_index = 0;
459 for( i=token_index; i < tokens.size(); i++, operand_index++ ) {
460 string str_res_reg, second_x;
461 slash_split(tokens[i], str_res_reg, second_x);
462 str_res_reg = upcase(str_res_reg);
463 // prune the AGEN or MEM(base,index,scale[,displacement]) text from str_res_reg
464 // FIXME: add MEM(immed) for the OC1_A and OC1_O types????
465 mem_bis_parser_t mem_bis(str_res_reg);
469 // Tell XED that we have a memory operand
470 xed_encoder_request_set_mem0(&req);
471 // Tell XED that the mem0 operand is the next operand:
472 xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_MEM0);
475 xed_encoder_request_set_mem1(&req);
476 // Tell XED that the mem1 operand is the next operand:
477 xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_MEM1);
481 else if (mem_bis.agen) {
482 // Tell XED we have an AGEN
483 xed_encoder_request_set_agen(&req);
484 // The AGEN is the next operand
485 xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_AGEN);
488 assert(mem_bis.agen || mem_bis.mem);
490 xed_reg_class_enum_t rc = xed_gpr_reg_class(mem_bis.base_reg);
491 xed_reg_class_enum_t rci = xed_gpr_reg_class(mem_bis.index_reg);
492 if (mem_bis.base_reg != XED_REG_INVALID && mem_bis.index_reg != XED_REG_INVALID)
495 os << "The base and index regs do not agree on the address size" << endl;
496 xedex_derror(os.str().c_str()); // dies
499 if (rc == XED_REG_CLASS_GPR32 || rci == XED_REG_CLASS_GPR32)
500 xed_encoder_request_set_effective_address_size(&req, 32);
502 // fill in the memory fields
503 xed_encoder_request_set_base0(&req, mem_bis.base_reg);
504 xed_encoder_request_set_index(&req, mem_bis.index_reg);
505 xed_encoder_request_set_scale(&req, mem_bis.scale_val);
506 xed_encoder_request_set_seg0(&req, mem_bis.segment_reg);
509 xed_encoder_request_set_memory_operand_length(&req, mem_bis.mem_len ); // BYTES
510 if (mem_bis.disp_valid)
511 xed_encoder_request_set_memory_displacement(&req,
513 mem_bis.disp_width_bits/8);
517 seg_parser_t seg_parser(str_res_reg);
518 if (seg_parser.valid) {
519 printf("Setting segment to %s\n", xed_reg_enum_t2str(seg_parser.segment_reg));
520 xed_encoder_request_set_seg0(&req, seg_parser.segment_reg);
521 xed_encoder_request_set_operand_order(&req, operand_index, XED_OPERAND_SEG0);
525 immed_parser_t imm(str_res_reg, "IMM");
528 printf("Setting immediate value to " XED_FMT_LX "\n", imm.immed_val);
529 xed_encoder_request_set_uimm0_bits(&req,
532 xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_IMM0);
535 immed_parser_t simm(str_res_reg, "SIMM");
538 printf("Setting immediate value to " XED_FMT_LX "\n", simm.immed_val);
539 xed_encoder_request_set_simm(&req,
540 STATIC_CAST(xed_int32_t,simm.immed_val),
541 simm.width_bits/8); //FIXME
542 xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_IMM0);
545 immed_parser_t imm2(str_res_reg, "IMM2");
547 if (imm2.width_bits != 8)
548 xedex_derror("2nd immediate must be just 1 byte long");
549 xed_encoder_request_set_uimm1(&req, imm2.immed_val);
550 xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_IMM1);
554 immed_parser_t disp(str_res_reg, "BRDISP");
557 printf("Setting displacement value to " XED_FMT_LX "\n", disp.immed_val);
558 xed_encoder_request_set_branch_displacement(&req,
559 STATIC_CAST(xed_uint32_t,disp.immed_val),
560 disp.width_bits/8); //FIXME
561 xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_RELBR);
562 xed_encoder_request_set_relbr(&req);
566 immed_parser_t ptr_disp(str_res_reg, "PTR");
567 if (ptr_disp.valid) {
569 printf("Setting pointer displacement value to " XED_FMT_LX "\n", ptr_disp.immed_val);
570 xed_encoder_request_set_branch_displacement(&req,
571 STATIC_CAST(xed_uint32_t,ptr_disp.immed_val),
572 ptr_disp.width_bits/8); //FIXME
573 xed_encoder_request_set_operand_order(&req,operand_index, XED_OPERAND_PTR);
574 xed_encoder_request_set_ptr(&req);
578 xed_reg_enum_t reg = str2xed_reg_enum_t(str_res_reg.c_str());
579 if (reg == XED_REG_INVALID) {
581 os << "Bad register name: " << str_res_reg << " on operand " << i;
582 xedex_derror(os.str().c_str()); // dies
584 // The registers operands aer numbered starting from the first one
585 // as XED_OPERAND_REG0. We incremenet regnum (below) every time we add a
586 // register operands.
587 xed_operand_enum_t r = STATIC_CAST(xed_operand_enum_t,XED_OPERAND_REG0 + regnum);
588 // store the register identifer in the operand storage field
589 xed_encoder_request_set_reg(&req, r, reg);
590 // store the operand storage field name in the encode-order array
591 xed_encoder_request_set_operand_order(&req, operand_index, r);