DisassemblerX86.h

Go to the documentation of this file.
00001 /* Disassembly specific to the x86 architecture. */
00002 
00003 #ifndef ROSE_DISASSEMBLER_X86_H
00004 #define ROSE_DISASSEMBLER_X86_H
00005 
00006 #include "InstructionEnumsX86.h"
00007 
00010 class DisassemblerX86: public Disassembler {
00011 
00012 
00013     /*========================================================================================================================
00014      * Public methods
00015      *========================================================================================================================*/
00016 public:
00017     DisassemblerX86(size_t wordsize)
00018         : insnSize(x86_insnsize_none), ip(0), insnbufat(0), segOverride(x86_segreg_none), 
00019           branchPrediction(x86_branch_prediction_none), branchPredictionEnabled(false), rexPresent(false), rexW(false), 
00020           rexR(false), rexX(false), rexB(false), sizeMustBe64Bit(false), operandSizeOverride(false), addressSizeOverride(false),
00021           lock(false), repeatPrefix(x86_repeat_none), modregrmByteSet(false), modregrmByte(0), modeField(0), rmField(0), 
00022           modrm(NULL), reg(NULL), isUnconditionalJump(false) {
00023         init(wordsize);
00024     }
00025 
00026     DisassemblerX86(const DisassemblerX86 &other)
00027         : Disassembler(other), insnSize(other.insnSize), ip(other.ip), insnbufat(other.insnbufat),
00028           segOverride(other.segOverride), branchPrediction(other.branchPrediction),
00029           branchPredictionEnabled(other.branchPredictionEnabled), rexPresent(other.rexPresent), rexW(other.rexW), 
00030           rexR(other.rexR), rexX(other.rexX), rexB(other.rexB), sizeMustBe64Bit(other.sizeMustBe64Bit),
00031           operandSizeOverride(other.operandSizeOverride), addressSizeOverride(other.addressSizeOverride),
00032           lock(other.lock), repeatPrefix(other.repeatPrefix), modregrmByteSet(other.modregrmByteSet),
00033           modregrmByte(other.modregrmByte), modeField(other.modeField), rmField(other.rmField), modrm(other.modrm),
00034           reg(other.reg), isUnconditionalJump(other.isUnconditionalJump) {
00035     }
00036     
00037     virtual ~DisassemblerX86() {}
00038 
00039     virtual DisassemblerX86 *clone() const { return new DisassemblerX86(*this); }
00040 
00042     virtual bool can_disassemble(SgAsmGenericHeader*) const;
00043 
00045     virtual SgAsmInstruction *disassembleOne(const MemoryMap *map, rose_addr_t start_va, AddressSet *successors=NULL);
00046 
00048     virtual SgAsmInstruction *make_unknown_instruction(const Exception&);
00049 
00050 
00051     /*========================================================================================================================
00052      * Data types
00053      *========================================================================================================================*/
00054 private:
00055 
00059     class ExceptionX86: public Exception {
00060     public:
00061         ExceptionX86(const std::string &mesg, const DisassemblerX86 *d)
00062             : Exception(mesg, d->ip, d->insnbuf, 8*d->insnbufat)
00063             {}
00064         ExceptionX86(const std::string &mesg, const DisassemblerX86 *d, size_t bit)
00065             : Exception(mesg, d->ip, d->insnbuf, bit)
00066             {}
00067     };
00068 
00070     enum RegisterMode 
00071         {
00072         rmLegacyByte, rmRexByte, rmWord, rmDWord, rmQWord, rmSegment, rmST, rmMM, rmXMM, rmControl, rmDebug, rmReturnNull
00073     };
00074 
00075     /* MMX registers? See mmPrefix method */
00076     enum MMPrefix 
00077         {
00078                 mmNone, mmF3, mm66, mmF2
00079         };
00080 
00081 
00082 
00083 
00084     /*========================================================================================================================
00085      * Methods for reading and writing bytes of the instruction.  These keep track of how much has been read or written.
00086      *========================================================================================================================*/
00087 private:
00088 
00092     uint8_t getByte();
00093 
00097     uint16_t getWord();
00098 
00102     uint32_t getDWord();
00103 
00107     uint64_t getQWord();
00108 
00109     /*========================================================================================================================
00110      * Miscellaneous helper methods
00111      *========================================================================================================================*/
00112 private:
00115     SgAsmExpression *currentDataSegment() const;
00116 
00120     X86InstructionSize effectiveAddressSize() const;
00121 
00123     RegisterMode effectiveOperandMode() const {
00124         return sizeToMode(effectiveOperandSize());
00125     }
00126 
00130     X86InstructionSize effectiveOperandSize() const;
00131 
00133     SgAsmType *effectiveOperandType() const {
00134         return sizeToType(effectiveOperandSize());
00135     }
00136 
00138     bool longMode() const {
00139         return insnSize == x86_insnsize_64;
00140     }
00141 
00142     /* FIXME: documentation? */
00143     MMPrefix mmPrefix() const;
00144 
00146     void not64() const {
00147         if (longMode())
00148             throw ExceptionX86("not valid for 64-bit code", this);
00149     }
00150 
00153     void setRex(uint8_t prefix);
00154 
00156     static RegisterMode sizeToMode(X86InstructionSize);
00157 
00160     static SgAsmType *sizeToType(X86InstructionSize s);
00161 
00162 
00163 
00164     /*========================================================================================================================
00165      * Methods that construct something. (Their names all start with "make".)
00166      *========================================================================================================================*/
00167 private:
00168 
00171     SgAsmExpression *makeAddrSizeValue(int64_t val, size_t bit_offset, size_t bit_size);
00172 
00177     SgAsmx86Instruction *makeInstruction(X86InstructionKind kind, const std::string &mnemonic,
00178                                          SgAsmExpression *op1=NULL, SgAsmExpression *op2=NULL,
00179                                          SgAsmExpression *op3=NULL, SgAsmExpression *op4=NULL);
00180 
00182     SgAsmx86RegisterReferenceExpression *makeIP();
00183 
00184     /* FIXME: documentation? */
00185     SgAsmx86RegisterReferenceExpression *makeOperandRegisterByte(bool rexExtension, uint8_t registerNumber);
00186 
00187     /* FIXME: documentation? */
00188     SgAsmx86RegisterReferenceExpression *makeOperandRegisterFull(bool rexExtension, uint8_t registerNumber);
00189 
00192     SgAsmx86RegisterReferenceExpression *makeRegister(uint8_t fullRegisterNumber, RegisterMode, SgAsmType *registerType=NULL) const;
00193 
00194     /* FIXME: documentation? */
00195     SgAsmx86RegisterReferenceExpression *makeRegisterEffective(uint8_t fullRegisterNumber) {
00196         return makeRegister(fullRegisterNumber, effectiveOperandMode());
00197     }
00198 
00199     /* FIXME: documentation? */
00200     SgAsmx86RegisterReferenceExpression *makeRegisterEffective(bool rexExtension, uint8_t registerNumber) {
00201         return makeRegister(registerNumber + (rexExtension ? 8 : 0), effectiveOperandMode());
00202     }
00203 
00205     SgAsmExpression *makeSegmentRegister(X86SegmentRegister so, bool insn64) const;
00206 
00207 
00208 
00209     /*========================================================================================================================
00210      * Methods for operating on the ModR/M byte.
00211      *========================================================================================================================*/
00212 private:
00213 
00227     void getModRegRM(RegisterMode regMode, RegisterMode rmMode, SgAsmType *t, SgAsmType *tForReg = NULL);
00228 
00230     SgAsmMemoryReferenceExpression *decodeModrmMemory();
00231 
00234     void fillInModRM(RegisterMode rmMode, SgAsmType *t);
00235 
00237     SgAsmExpression *makeModrmNormal(RegisterMode, SgAsmType *mrType);
00238 
00241     SgAsmx86RegisterReferenceExpression *makeModrmRegister(RegisterMode, SgAsmType* mrType=NULL);
00242 
00244     void requireMemory() const {
00245         if (!modregrmByteSet)
00246             throw ExceptionX86("requires Mod/RM byte", this);
00247         if (modeField == 3)
00248             throw ExceptionX86("requires memory", this);
00249     }
00250 
00251 
00252 
00253     /*========================================================================================================================
00254      * Methods that construct an SgAsmExpression for an immediate operand.
00255      *========================================================================================================================*/
00256 private:
00257 
00258     SgAsmExpression *getImmByte();
00259     SgAsmExpression *getImmWord();
00260     SgAsmExpression* getImmDWord();
00261     SgAsmExpression* getImmQWord();
00262     SgAsmExpression *getImmForAddr();
00263     SgAsmExpression *getImmIv();
00264     SgAsmExpression *getImmJz();
00265     SgAsmExpression *getImmByteAsIv();
00266     SgAsmExpression *getImmIzAsIv();
00267     SgAsmExpression *getImmJb();
00268 
00269 
00270 
00271 
00272     /*========================================================================================================================
00273      * Main disassembly functions, each generally containing a huge "switch" statement based on one of the opcode bytes.
00274      *========================================================================================================================*/
00275 private:
00276 
00279     SgAsmx86Instruction *disassemble();
00280 
00282     SgAsmx86Instruction *decodeOpcode0F();
00283 
00285     SgAsmx86Instruction *decodeOpcode0F38();
00286 
00288     SgAsmx86Instruction *decodeX87InstructionD8();
00289 
00291     SgAsmx86Instruction *decodeX87InstructionD9();
00292 
00294     SgAsmx86Instruction *decodeX87InstructionDA();
00295 
00297     SgAsmx86Instruction *decodeX87InstructionDB();
00298 
00300     SgAsmx86Instruction *decodeX87InstructionDC();
00301 
00303     SgAsmx86Instruction *decodeX87InstructionDD();
00304 
00306     SgAsmx86Instruction *decodeX87InstructionDE();
00307 
00309     SgAsmx86Instruction *decodeX87InstructionDF();
00310 
00312     SgAsmx86Instruction *decodeGroup1(SgAsmExpression *imm);
00313 
00315     SgAsmx86Instruction *decodeGroup1a();
00316 
00318     SgAsmx86Instruction *decodeGroup2(SgAsmExpression *count);
00319 
00321     SgAsmx86Instruction *decodeGroup3(SgAsmExpression *immMaybe);
00322 
00324     SgAsmx86Instruction *decodeGroup4();
00325 
00327     SgAsmx86Instruction *decodeGroup5();
00328 
00330     SgAsmx86Instruction *decodeGroup6();
00331 
00334     SgAsmx86Instruction *decodeGroup7();
00335 
00337     SgAsmx86Instruction *decodeGroup8(SgAsmExpression *imm);
00338 
00340     SgAsmx86Instruction *decodeGroup11(SgAsmExpression *imm);
00341 
00343     SgAsmx86Instruction *decodeGroup15();
00344 
00346     SgAsmx86Instruction *decodeGroup16();
00347 
00349     SgAsmx86Instruction *decodeGroupP();
00350 
00351 
00352 
00353     /*========================================================================================================================
00354      * Data members and their initialization.
00355      *========================================================================================================================*/
00356 private:
00357 
00359     void init(size_t wordsize);
00360 
00362     void startInstruction(SgAsmx86Instruction *insn) {
00363         startInstruction(insn->get_address(), NULL, 0);
00364         insnSize = insn->get_baseSize();
00365         lock = insn->get_lockPrefix();
00366         branchPrediction = insn->get_branchPrediction();
00367         branchPredictionEnabled = branchPrediction != x86_branch_prediction_none;
00368         segOverride = insn->get_segmentOverride();
00369     }
00370     
00372     void startInstruction(rose_addr_t start_va, const uint8_t *buf, size_t bufsz) {
00373         ip = start_va;
00374         insnbuf = SgUnsignedCharList(buf, buf+bufsz);
00375         insnbufat = 0;
00376 
00377         /* Prefix flags */
00378         segOverride = x86_segreg_none;
00379         branchPrediction = x86_branch_prediction_none;
00380         branchPredictionEnabled = false;
00381         rexPresent = rexW = rexR = rexX = rexB = false;
00382         sizeMustBe64Bit = false;
00383         operandSizeOverride = false;
00384         addressSizeOverride = false;
00385         lock = false;
00386         repeatPrefix = x86_repeat_none;
00387         modregrmByteSet = false;
00388         modregrmByte = modeField = regField = rmField = 0; /*arbitrary since modregrmByteSet is false*/
00389         modrm = reg = NULL;
00390         isUnconditionalJump = false;
00391     }
00392 
00393     /* Per-disassembler settings; see init() */
00394     X86InstructionSize insnSize;                
00396     /* Per-instruction settings; see startInstruction() */
00397     uint64_t ip;                                
00398     SgUnsignedCharList insnbuf;                 
00399     size_t insnbufat;                           
00401     /* Temporary flags set by the instruction; initialized by startInstruction() */
00402     X86SegmentRegister segOverride;             
00403     X86BranchPrediction branchPrediction;       /*FIXME: this seems to set only to x86_branch_prediction_true [RPM 2009-06-16] */
00404     bool branchPredictionEnabled;
00405     bool rexPresent, rexW, rexR, rexX, rexB;    
00406     bool sizeMustBe64Bit;                       
00407     bool operandSizeOverride;                   
00408     bool addressSizeOverride;                   
00409     bool lock;                                  
00410     X86RepeatPrefix repeatPrefix;               
00411     bool modregrmByteSet;                       
00412     uint8_t modregrmByte;                       
00413     uint8_t modeField;                          
00414     uint8_t regField;                           
00415     uint8_t rmField;                            
00416     SgAsmExpression *modrm;                     
00417     SgAsmExpression *reg;                       
00418     bool isUnconditionalJump;                   
00419 };
00420 
00421 #endif

Generated on Tue Jan 31 05:31:37 2012 for ROSE by  doxygen 1.4.7