00001
00002
00003 #ifndef ROSE_ASSEMBLER_X86_H
00004 #define ROSE_ASSEMBLER_X86_H
00005
00006 #include "Assembler.h"
00007
00008
00009
00026 class AssemblerX86: public Assembler {
00027 public:
00028 AssemblerX86()
00029 : honor_operand_types(false) {
00030 if (defns.size()==0)
00031 initAssemblyRules();
00032 }
00033
00034 virtual ~AssemblerX86() {}
00035
00037 virtual SgUnsignedCharList assembleOne(SgAsmInstruction*);
00038
00044 void set_honor_operand_types(bool b) {
00045 honor_operand_types = b;
00046 }
00047
00050 bool get_honor_operand_types() const {
00051 return honor_operand_types;
00052 }
00053
00055 virtual SgUnsignedCharList assembleProgram(const std::string &source);
00056
00057
00058
00059
00060 private:
00061
00062
00063
00064
00065
00069 static const unsigned od_e_mask = 0x00000070;
00070 static const unsigned od_e_pres = 0x00000080;
00071 static const unsigned od_e0 = 0x00000000 | od_e_pres;
00072 static const unsigned od_e1 = 0x00000010 | od_e_pres;
00073 static const unsigned od_e2 = 0x00000020 | od_e_pres;
00074 static const unsigned od_e3 = 0x00000030 | od_e_pres;
00075 static const unsigned od_e4 = 0x00000040 | od_e_pres;
00076 static const unsigned od_e5 = 0x00000050 | od_e_pres;
00077 static const unsigned od_e6 = 0x00000060 | od_e_pres;
00078 static const unsigned od_e7 = 0x00000070 | od_e_pres;
00079 static size_t od_e_val(unsigned opcode_mods) { return (opcode_mods & od_e_mask)>>4; }
00084 static const unsigned od_rex_pres = 0x00000001;
00085 static const unsigned od_rex_mask = 0x00000f00;
00086 static const unsigned od_rex = 0x00000000 | od_rex_pres;
00087 static const unsigned od_rexb = 0x00000100 | od_rex_pres;
00088 static const unsigned od_rexx = 0x00000200 | od_rex_pres;
00089 static const unsigned od_rexxb = 0x00000300 | od_rex_pres;
00090 static const unsigned od_rexr = 0x00000400 | od_rex_pres;
00091 static const unsigned od_rexrb = 0x00000500 | od_rex_pres;
00092 static const unsigned od_rexrx = 0x00000600 | od_rex_pres;
00093 static const unsigned od_rexrxb = 0x00000700 | od_rex_pres;
00094 static const unsigned od_rexw = 0x00000800 | od_rex_pres;
00095 static const unsigned od_rexwb = 0x00000900 | od_rex_pres;
00096 static const unsigned od_rexwx = 0x00000a00 | od_rex_pres;
00097 static const unsigned od_rexwxb = 0x00000b00 | od_rex_pres;
00098 static const unsigned od_rexwr = 0x00000c00 | od_rex_pres;
00099 static const unsigned od_rexwrb = 0x00000d00 | od_rex_pres;
00100 static const unsigned od_rexwrx = 0x00000e00 | od_rex_pres;
00101 static const unsigned od_rexwrxb = 0x00000f00 | od_rex_pres;
00102 static uint8_t od_rex_byte(unsigned opcode_mods) { return 0x40 | ((opcode_mods & od_rex_mask) >> 8); }
00103
00106 static const unsigned od_modrm = 0x00000002;
00107
00110 static const unsigned od_c_mask = 0x00007000;
00111 static const unsigned od_cb = 0x00001000;
00112 static const unsigned od_cw = 0x00002000;
00113 static const unsigned od_cd = 0x00003000;
00114 static const unsigned od_cp = 0x00004000;
00115 static const unsigned od_co = 0x00005000;
00116 static const unsigned od_ct = 0x00006000;
00117
00120 static const unsigned od_i_mask = 0x00070000;
00121 static const unsigned od_ib = 0x00010000;
00122 static const unsigned od_iw = 0x00020000;
00123 static const unsigned od_id = 0x00030000;
00124 static const unsigned od_io = 0x00040000;
00125
00128 static const unsigned od_r_mask = 0x00700000;
00129 static const unsigned od_rb = 0x00100000;
00130 static const unsigned od_rw = 0x00200000;
00131 static const unsigned od_rd = 0x00300000;
00132 static const unsigned od_ro = 0x00400000;
00133
00137 static const unsigned od_i = 0x00000004;
00138
00139
00141 enum OperandDefn
00142 {
00143 od_none,
00144 od_AL,
00145 od_AX,
00146 od_EAX,
00147 od_RAX,
00148 od_DX,
00149 od_CS,
00150 od_DS,
00151 od_ES,
00152 od_FS,
00153 od_GS,
00154 od_SS,
00155 od_rel8,
00157 od_rel16,
00159 od_rel32,
00161 od_rel64,
00163 od_ptr16_16,
00168 od_ptr16_32,
00173 od_ptr16_64,
00178 od_r8,
00180 od_r16,
00182 od_r32,
00184 od_r64,
00186 od_imm8,
00190 od_imm16,
00192 od_imm32,
00194 od_imm64,
00197 od_r_m8,
00200 od_r_m16,
00205 od_r_m32,
00210 od_r_m64,
00215 od_m,
00216 od_m8,
00218 od_m16,
00220 od_m32,
00222 od_m64,
00223 od_m128,
00224 od_m16_16,
00227 od_m16_32,
00230 od_m16_64,
00233 od_m16a16,
00237 od_m16a32,
00242 od_m32a32,
00246 od_m16a64,
00251 od_moffs8,
00254 od_moffs16,
00257 od_moffs32,
00260 od_moffs64,
00263 od_sreg,
00265 od_m32fp,
00267 od_m64fp,
00269 od_m80fp,
00271 od_st0,
00272 od_st1,
00273 od_st2,
00274 od_st3,
00275 od_st4,
00276 od_st5,
00277 od_st6,
00278 od_st7,
00279 od_sti,
00280 od_mm,
00281 od_mm_m32,
00284 od_mm_m64,
00287 od_xmm,
00289 od_xmm_m16,
00290 od_xmm_m32,
00293 od_xmm_m64,
00296 od_xmm_m128,
00299
00300 od_XMM0,
00301 od_0,
00302 od_1,
00303 od_m80,
00304 od_dec,
00305 od_m80bcd,
00306 od_m2byte,
00307 od_m14_28byte,
00308 od_m94_108byte,
00309 od_m512byte,
00310 od_r16_m16,
00311 od_r32_m8,
00312 od_r32_m16,
00313 od_r64_m16,
00314 od_CR0,
00315 od_CR7,
00316 od_CR8,
00317 od_CR0CR7,
00318 od_DR0DR7,
00319 od_reg,
00320 od_CL,
00321 };
00322
00324 static const unsigned COMPAT_LEGACY = 0x01;
00325 static const unsigned COMPAT_64 = 0x02;
00328 static uint8_t build_modrm(unsigned mod, unsigned reg, unsigned rm) {
00329 return ((mod&0x3)<<6) | ((reg&0x7)<<3) | (rm&0x7);
00330 }
00331
00333 static unsigned modrm_mod(uint8_t modrm) { return modrm>>6; }
00334
00336 static unsigned modrm_reg(uint8_t modrm) { return (modrm>>3) & 0x7; }
00337
00339 static unsigned modrm_rm(uint8_t modrm) { return modrm & 0x7; }
00340
00342 static uint8_t build_sib(unsigned ss, unsigned index, unsigned base) {
00343 return ((ss&0x3)<<6) | ((index&0x7)<<3) | (base&0x7);
00344 }
00345
00347 static unsigned sib_ss(uint8_t sib) {return sib>>6; }
00348
00350 static unsigned sib_index(uint8_t sib) { return (sib>>3) & 0x7; }
00351
00353 static unsigned sib_base(uint8_t sib) { return sib & 0x7; }
00354
00358 class InsnDefn {
00359 public:
00360 InsnDefn(const std::string &mnemonic, X86InstructionKind kind, unsigned compatibility, uint64_t opcode,
00361 unsigned opcode_modifiers, OperandDefn op1=od_none, OperandDefn op2=od_none, OperandDefn op3=od_none,
00362 OperandDefn op4=od_none)
00363 : mnemonic(mnemonic), kind(kind), compatibility(compatibility), opcode(opcode), opcode_modifiers(opcode_modifiers) {
00364 if (op1) operands.push_back(op1);
00365 if (op2) operands.push_back(op2);
00366 if (op3) operands.push_back(op3);
00367 if (op4) operands.push_back(op4);
00368 }
00369 std::string to_str() const;
00370 void set_location(const std::string &s) {
00371 location = s;
00372 }
00373 std::string mnemonic;
00374 X86InstructionKind kind;
00375 unsigned compatibility;
00376 uint64_t opcode;
00377 unsigned opcode_modifiers;
00378 std::vector<OperandDefn> operands;
00379 std::string location;
00380 };
00381
00382 enum MemoryReferencePattern
00383 {
00384 mrp_unknown,
00385 mrp_disp,
00386 mrp_index,
00387 mrp_index_disp,
00388 mrp_base,
00389 mrp_base_disp,
00390 mrp_base_index,
00391 mrp_base_index_disp
00392 };
00393
00395 typedef std::vector<const InsnDefn*> DictionaryPage;
00396
00398 typedef std::map<X86InstructionKind, DictionaryPage> InsnDictionary;
00399
00401 static void initAssemblyRules();
00402 static void initAssemblyRules_part1();
00403 static void initAssemblyRules_part2();
00404 static void initAssemblyRules_part3();
00405 static void initAssemblyRules_part4();
00406 static void initAssemblyRules_part5();
00407 static void initAssemblyRules_part6();
00408 static void initAssemblyRules_part7();
00409 static void initAssemblyRules_part8();
00410 static void initAssemblyRules_part9();
00411
00413 static void define(const InsnDefn *d) {
00414 defns[d->kind].push_back(d);
00415 }
00416
00420 static std::string to_str(X86InstructionKind);
00421
00427 SgUnsignedCharList fixup_prefix_bytes(SgAsmx86Instruction *insn, SgUnsignedCharList source);
00428
00431 SgUnsignedCharList assemble(SgAsmx86Instruction *insn, const InsnDefn *defn);
00432
00436 void matches(const InsnDefn *defn, SgAsmx86Instruction *insn, int64_t *disp, int64_t *imm) const;
00437
00440 bool matches(OperandDefn, SgAsmExpression*, SgAsmInstruction*, int64_t *disp, int64_t *imm) const;
00441
00443 static bool matches_rel(SgAsmInstruction*, int64_t val, size_t nbytes);
00444
00448 static MemoryReferencePattern parse_memref(SgAsmInstruction *insn, SgAsmMemoryReferenceExpression *expr,
00449 SgAsmx86RegisterReferenceExpression **base_reg,
00450 SgAsmx86RegisterReferenceExpression **index_reg,
00451 SgAsmValueExpression **scale, SgAsmValueExpression **displacement);
00452
00454 uint8_t build_modrm(const InsnDefn*, SgAsmx86Instruction*, size_t argno,
00455 uint8_t *sib, int64_t *displacement, uint8_t *rex) const;
00456
00458 void build_modreg(const InsnDefn*, SgAsmx86Instruction*, size_t argno, uint8_t *modrm, uint8_t *rex) const;
00459
00462 uint8_t segment_override(SgAsmx86Instruction*);
00463
00464 static InsnDictionary defns;
00465 bool honor_operand_types;
00466 };
00467
00468 #endif