AssemblerX86.h

Go to the documentation of this file.
00001 /* Assembly specific to the x86 architecture. */
00002 
00003 #ifndef ROSE_ASSEMBLER_X86_H
00004 #define ROSE_ASSEMBLER_X86_H
00005 
00006 #include "Assembler.h"
00007 
00008 //#include "sage3.h"
00009 
00026 class AssemblerX86: public Assembler {
00027 public:
00028     AssemblerX86()
00029         : honor_operand_types(false) {
00030         if (defns.size()==0)
00031             initAssemblyRules();
00032     }
00033 
00034     virtual ~AssemblerX86() {}
00035 
00037     virtual SgUnsignedCharList assembleOne(SgAsmInstruction*);
00038     
00044     void set_honor_operand_types(bool b) {
00045         honor_operand_types = b;
00046     }
00047 
00050     bool get_honor_operand_types() const {
00051         return honor_operand_types;
00052     }
00053 
00055     virtual SgUnsignedCharList assembleProgram(const std::string &source);
00056 
00057     /*========================================================================================================================
00058      * Members for defining instructions.
00059      *========================================================================================================================*/
00060 private:
00061     /* These bit masks specify how the opcode part of the encoding is generated. The base opcode bytes are specified with
00062      * a 64-bit value so that up to eight bytes of opcode can be specified.  The bytes generated come from the 64-bit opcode
00063      * value in big-endian order but without leading zero bytes. If the 64-bit opcode is zero then a single zero byte is
00064      * generated.  For instance, the MONITOR instruction has an opcode of 0x0f01c8, generating the encoding 0x0f, 0x01, 0xc8. */
00065     
00069     static const unsigned od_e_mask   = 0x00000070;                /* mask for numeric value (n) part of En field. */
00070     static const unsigned od_e_pres   = 0x00000080;                /* bit set if En modification was specified. */
00071     static const unsigned od_e0       = 0x00000000 | od_e_pres;
00072     static const unsigned od_e1       = 0x00000010 | od_e_pres;
00073     static const unsigned od_e2       = 0x00000020 | od_e_pres;
00074     static const unsigned od_e3       = 0x00000030 | od_e_pres;
00075     static const unsigned od_e4       = 0x00000040 | od_e_pres;
00076     static const unsigned od_e5       = 0x00000050 | od_e_pres;
00077     static const unsigned od_e6       = 0x00000060 | od_e_pres;
00078     static const unsigned od_e7       = 0x00000070 | od_e_pres;
00079     static size_t od_e_val(unsigned opcode_mods) { return (opcode_mods & od_e_mask)>>4; } 
00084     static const unsigned od_rex_pres = 0x00000001;                /* bit set if REX prefix is present. */
00085     static const unsigned od_rex_mask = 0x00000f00;                /* mask for low nyble of REX byte. */
00086     static const unsigned od_rex      = 0x00000000 | od_rex_pres;
00087     static const unsigned od_rexb     = 0x00000100 | od_rex_pres;
00088     static const unsigned od_rexx     = 0x00000200 | od_rex_pres;
00089     static const unsigned od_rexxb    = 0x00000300 | od_rex_pres;
00090     static const unsigned od_rexr     = 0x00000400 | od_rex_pres;
00091     static const unsigned od_rexrb    = 0x00000500 | od_rex_pres;
00092     static const unsigned od_rexrx    = 0x00000600 | od_rex_pres;
00093     static const unsigned od_rexrxb   = 0x00000700 | od_rex_pres;
00094     static const unsigned od_rexw     = 0x00000800 | od_rex_pres;
00095     static const unsigned od_rexwb    = 0x00000900 | od_rex_pres;
00096     static const unsigned od_rexwx    = 0x00000a00 | od_rex_pres;
00097     static const unsigned od_rexwxb   = 0x00000b00 | od_rex_pres;
00098     static const unsigned od_rexwr    = 0x00000c00 | od_rex_pres;
00099     static const unsigned od_rexwrb   = 0x00000d00 | od_rex_pres;
00100     static const unsigned od_rexwrx   = 0x00000e00 | od_rex_pres;
00101     static const unsigned od_rexwrxb  = 0x00000f00 | od_rex_pres;
00102     static uint8_t od_rex_byte(unsigned opcode_mods) { return 0x40 | ((opcode_mods & od_rex_mask) >> 8); }
00103 
00106     static const unsigned od_modrm    = 0x00000002;
00107 
00110     static const unsigned od_c_mask   = 0x00007000;
00111     static const unsigned od_cb       = 0x00001000;
00112     static const unsigned od_cw       = 0x00002000;
00113     static const unsigned od_cd       = 0x00003000;
00114     static const unsigned od_cp       = 0x00004000;
00115     static const unsigned od_co       = 0x00005000;
00116     static const unsigned od_ct       = 0x00006000;
00117 
00120     static const unsigned od_i_mask   = 0x00070000;
00121     static const unsigned od_ib       = 0x00010000;
00122        static const unsigned od_iw       = 0x00020000;
00123     static const unsigned od_id       = 0x00030000;
00124     static const unsigned od_io       = 0x00040000;
00125 
00128     static const unsigned od_r_mask   = 0x00700000;
00129     static const unsigned od_rb       = 0x00100000;
00130     static const unsigned od_rw       = 0x00200000;
00131     static const unsigned od_rd       = 0x00300000;
00132     static const unsigned od_ro       = 0x00400000;
00133 
00137     static const unsigned od_i        = 0x00000004;
00138 
00139 
00141     enum OperandDefn 
00142         {
00143         od_none,        
00144         od_AL,          
00145         od_AX,          
00146         od_EAX,         
00147         od_RAX,         
00148         od_DX,          
00149         od_CS,          
00150         od_DS,          
00151         od_ES,          
00152         od_FS,          
00153         od_GS,          
00154         od_SS,          
00155         od_rel8,        
00157         od_rel16,       
00159         od_rel32,       
00161         od_rel64,       
00163         od_ptr16_16,    
00168         od_ptr16_32,    
00173         od_ptr16_64,    
00178         od_r8,          
00180         od_r16,         
00182         od_r32,         
00184         od_r64,         
00186         od_imm8,        
00190         od_imm16,       
00192         od_imm32,       
00194         od_imm64,       
00197         od_r_m8,        
00200         od_r_m16,       
00205         od_r_m32,       
00210         od_r_m64,       
00215         od_m,           
00216         od_m8,          
00218         od_m16,         
00220         od_m32,         
00222         od_m64,         
00223         od_m128,        
00224         od_m16_16,      
00227         od_m16_32,      
00230         od_m16_64,      
00233         od_m16a16,      
00237         od_m16a32,      
00242         od_m32a32,      
00246         od_m16a64,      
00251         od_moffs8,      
00254         od_moffs16,      
00257         od_moffs32,     
00260         od_moffs64,     
00263         od_sreg,        
00265         od_m32fp,       
00267         od_m64fp,       
00269         od_m80fp,       
00271         od_st0,         
00272         od_st1,         
00273         od_st2,         
00274         od_st3,         
00275         od_st4,         
00276         od_st5,         
00277         od_st6,         
00278         od_st7,         
00279         od_sti,         
00280         od_mm,          
00281         od_mm_m32,      
00284         od_mm_m64,      
00287         od_xmm,         
00289         od_xmm_m16,     
00290         od_xmm_m32,     
00293         od_xmm_m64,     
00296         od_xmm_m128,    
00299         /* The following are not documented in section 3.1.1.2 but are used elsewhere in the manual */
00300         od_XMM0,        
00301         od_0,           
00302         od_1,           
00303         od_m80,         
00304         od_dec,         
00305         od_m80bcd,      
00306         od_m2byte,      
00307         od_m14_28byte,  
00308         od_m94_108byte, 
00309         od_m512byte,    
00310         od_r16_m16,     
00311         od_r32_m8,      
00312         od_r32_m16,     
00313         od_r64_m16,     
00314         od_CR0,         
00315         od_CR7,         
00316         od_CR8,         
00317         od_CR0CR7,      
00318         od_DR0DR7,      
00319         od_reg,         
00320         od_CL,          
00321     };
00322 
00324     static const unsigned COMPAT_LEGACY = 0x01; 
00325     static const unsigned COMPAT_64     = 0x02; 
00328     static uint8_t build_modrm(unsigned mod, unsigned reg, unsigned rm) {
00329         return ((mod&0x3)<<6) | ((reg&0x7)<<3) | (rm&0x7);
00330     }
00331 
00333     static unsigned modrm_mod(uint8_t modrm) { return modrm>>6; }
00334 
00336     static unsigned modrm_reg(uint8_t modrm) { return (modrm>>3) & 0x7; }
00337 
00339     static unsigned modrm_rm(uint8_t modrm) { return modrm & 0x7; }
00340 
00342     static uint8_t build_sib(unsigned ss, unsigned index, unsigned base) {
00343         return ((ss&0x3)<<6) | ((index&0x7)<<3) | (base&0x7);
00344     }
00345 
00347     static unsigned sib_ss(uint8_t sib) {return sib>>6; }
00348 
00350     static unsigned sib_index(uint8_t sib) { return (sib>>3) & 0x7; }
00351 
00353     static unsigned sib_base(uint8_t sib) { return sib & 0x7; }
00354     
00358     class InsnDefn {
00359     public:
00360         InsnDefn(const std::string &mnemonic, X86InstructionKind kind, unsigned compatibility, uint64_t opcode,
00361                  unsigned opcode_modifiers, OperandDefn op1=od_none, OperandDefn op2=od_none, OperandDefn op3=od_none,
00362                  OperandDefn op4=od_none)
00363             : mnemonic(mnemonic), kind(kind), compatibility(compatibility), opcode(opcode), opcode_modifiers(opcode_modifiers) {
00364             if (op1) operands.push_back(op1);
00365             if (op2) operands.push_back(op2);
00366             if (op3) operands.push_back(op3);
00367             if (op4) operands.push_back(op4);
00368         }
00369         std::string to_str() const;
00370         void set_location(const std::string &s) {
00371             location = s;
00372         }
00373         std::string mnemonic;
00374         X86InstructionKind kind;
00375         unsigned compatibility;
00376         uint64_t opcode;
00377         unsigned opcode_modifiers;
00378         std::vector<OperandDefn> operands;
00379         std::string location;                           /* location of instruction documentation */
00380     };
00381 
00382     enum MemoryReferencePattern 
00383         {
00384         mrp_unknown,
00385         mrp_disp,                       /* displacement */
00386         mrp_index,                      /* register*scale */
00387         mrp_index_disp,                 /* register*scale + displacement */
00388         mrp_base,                       /* register */
00389         mrp_base_disp,                  /* register + displacement */
00390         mrp_base_index,                 /* register + register*scale */
00391         mrp_base_index_disp             /* register + register*scale + displacement */
00392     };
00393 
00395     typedef std::vector<const InsnDefn*> DictionaryPage;
00396 
00398     typedef std::map<X86InstructionKind, DictionaryPage> InsnDictionary;
00399 
00401     static void initAssemblyRules();
00402     static void initAssemblyRules_part1();
00403     static void initAssemblyRules_part2();
00404     static void initAssemblyRules_part3();
00405     static void initAssemblyRules_part4();
00406     static void initAssemblyRules_part5();
00407     static void initAssemblyRules_part6();
00408     static void initAssemblyRules_part7();
00409     static void initAssemblyRules_part8();
00410     static void initAssemblyRules_part9();
00411 
00413     static void define(const InsnDefn *d) {
00414         defns[d->kind].push_back(d);
00415     }
00416     
00420     static std::string to_str(X86InstructionKind);
00421 
00427     SgUnsignedCharList fixup_prefix_bytes(SgAsmx86Instruction *insn, SgUnsignedCharList source);
00428 
00431     SgUnsignedCharList assemble(SgAsmx86Instruction *insn, const InsnDefn *defn);
00432 
00436     void matches(const InsnDefn *defn, SgAsmx86Instruction *insn, int64_t *disp, int64_t *imm) const;
00437 
00440     bool matches(OperandDefn, SgAsmExpression*, SgAsmInstruction*, int64_t *disp, int64_t *imm) const;
00441 
00443     static bool matches_rel(SgAsmInstruction*, int64_t val, size_t nbytes);
00444         
00448     static MemoryReferencePattern parse_memref(SgAsmInstruction *insn, SgAsmMemoryReferenceExpression *expr,
00449                                                SgAsmx86RegisterReferenceExpression **base_reg/*out*/,
00450                                                SgAsmx86RegisterReferenceExpression **index_reg/*out*/,
00451                                                SgAsmValueExpression **scale/*out*/, SgAsmValueExpression **displacement/*out*/);
00452 
00454     uint8_t build_modrm(const InsnDefn*, SgAsmx86Instruction*, size_t argno,
00455                         uint8_t *sib, int64_t *displacement, uint8_t *rex) const;
00456 
00458     void build_modreg(const InsnDefn*, SgAsmx86Instruction*, size_t argno, uint8_t *modrm, uint8_t *rex) const;
00459 
00462     uint8_t segment_override(SgAsmx86Instruction*);
00463 
00464     static InsnDictionary defns;                
00465     bool honor_operand_types;                   
00466 };
00467 
00468 #endif

Generated on Tue Jan 31 05:31:19 2012 for ROSE by  doxygen 1.4.7