ROSE  0.11.145.0
Disassembler/X86.h
1 /* Disassembly specific to the x86 architecture. */
2 #ifndef ROSE_BinaryAnalysis_Disassembler_X86_H
3 #define ROSE_BinaryAnalysis_Disassembler_X86_H
4 #include <featureTests.h>
5 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
6 #include <Rose/BinaryAnalysis/Disassembler/Base.h>
7 
8 #include <Rose/BinaryAnalysis/InstructionEnumsX86.h>
9 #include "Cxx_GrammarSerialization.h"
10 
11 #include <boost/serialization/access.hpp>
12 #include <boost/serialization/base_object.hpp>
13 #include <boost/serialization/export.hpp>
14 #include <boost/serialization/split_member.hpp>
15 
16 namespace Rose {
17 namespace BinaryAnalysis {
18 namespace Disassembler {
19 
22 class X86: public Base {
23 public:
25  using Ptr = X86Ptr;
26 
27 private:
28  /* Per-disassembler settings; see init() */
29  X86InstructionSize insnSize;
30  size_t wordSize;
32  /* Per-instruction settings; see startInstruction() */
33  struct State {
34  uint64_t ip;
35  SgUnsignedCharList insnbuf;
36  size_t insnbufat;
38  /* Temporary flags set by the instruction; initialized by startInstruction() */
39  X86SegmentRegister segOverride;
40  X86BranchPrediction branchPrediction; /*FIXME: this seems to set only to x86_branch_prediction_true [RPM 2009-06-16] */
41  bool branchPredictionEnabled;
42  bool rexPresent, rexW, rexR, rexX, rexB;
43  bool sizeMustBe64Bit;
44  bool operandSizeOverride;
45  bool addressSizeOverride;
46  bool lock;
47  X86RepeatPrefix repeatPrefix;
48  bool modregrmByteSet;
49  uint8_t modregrmByte;
50  uint8_t modeField;
51  uint8_t regField;
52  uint8_t rmField;
53  SgAsmExpression *modrm;
54  SgAsmExpression *reg;
55  bool isUnconditionalJump;
57  State()
58  : ip(0), insnbufat(0), segOverride(x86_segreg_none), branchPrediction(x86_branch_prediction_none),
59  branchPredictionEnabled(false), rexPresent(false), rexW(false), rexR(false), rexX(false), rexB(false),
60  sizeMustBe64Bit(false), operandSizeOverride(false), addressSizeOverride(false), lock(false),
61  repeatPrefix(x86_repeat_none), modregrmByteSet(false), modregrmByte(0), modeField(0), regField(0),
62  rmField(0), modrm(nullptr), reg(nullptr), isUnconditionalJump(false) {}
63  };
64 
66  // Serialization
68 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
69 private:
70  friend class boost::serialization::access;
71 
72  template<class S>
73  void serialize_common(S &s, const unsigned /*version*/) {
74  // Most of the data members don't need to be saved because we'll only save/restore disassemblers that are between
75  // instructions (we never save one while it's processing an instruction). Therefore, most of the data members can be
76  // constructed in their initial state by a combination of default constructor and init().
77  s & BOOST_SERIALIZATION_BASE_OBJECT_NVP(Base);
78  s & BOOST_SERIALIZATION_NVP(wordSize);
79  }
80 
81  template<class S>
82  void save(S &s, const unsigned version) const {
83  serialize_common(s, version);
84  }
85 
86  template<class S>
87  void load(S &s, const unsigned version) {
88  serialize_common(s, version);
89  init(wordSize);
90  }
91 
92  BOOST_SERIALIZATION_SPLIT_MEMBER();
93 #endif
94 
96  // Constructors
98 
99 protected:
100  // Default constructor for serialization
101  X86();
102 
103  explicit X86(size_t wordsize);
104 
105 public:
107  static Ptr instance(size_t wordSize);
108 
109  virtual ~X86() {}
110 
111  virtual Base::Ptr clone() const override;
112 
114  // Public methods
116 public:
117  virtual bool canDisassemble(SgAsmGenericHeader*) const override;
118 
119  virtual Unparser::BasePtr unparser() const override;
120 
121  virtual SgAsmInstruction *disassembleOne(const MemoryMap::Ptr &map, rose_addr_t va,
122  AddressSet *successors=nullptr) override;
123 
124  virtual SgAsmInstruction *makeUnknownInstruction(const Exception&) override;
125 
126 
127  /*========================================================================================================================
128  * Data types
129  *========================================================================================================================*/
130 private:
131 
135  class ExceptionX86: public Exception {
136  public:
137  ExceptionX86(const std::string &mesg, const State &state)
138  : Exception(mesg, state.ip) {
139  ASSERT_require(state.insnbufat <= state.insnbuf.size());
140  if (state.insnbufat > 0)
141  bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
142  bit = 8 * state.insnbufat;
143  }
144 
145  ExceptionX86(const std::string &mesg, const State &state, size_t bit)
146  : Exception(mesg, state.ip) {
147  ASSERT_require(state.insnbufat <= state.insnbuf.size());
148  if (state.insnbufat > 0)
149  bytes = SgUnsignedCharList(&state.insnbuf[0], &state.insnbuf[0] + state.insnbufat);
150  this->bit = bit;
151  }
152  };
153 
155  enum RegisterMode {
156  rmLegacyByte, rmRexByte, rmWord, rmDWord, rmQWord, rmSegment, rmST, rmMM, rmXMM, rmControl, rmDebug, rmReturnNull
157  };
158 
159  /* MMX registers? See mmPrefix method */
160  enum MMPrefix {
161  mmNone, mmF3, mm66, mmF2
162  };
163 
164 
165  /*========================================================================================================================
166  * Methods for reading and writing bytes of the instruction. These keep track of how much has been read or written.
167  *========================================================================================================================*/
168 private:
169 
173  uint8_t getByte(State &state) const;
174 
178  uint16_t getWord(State &state) const;
179 
183  uint32_t getDWord(State &state) const;
184 
188  uint64_t getQWord(State &state) const;
189 
190  /*========================================================================================================================
191  * Miscellaneous helper methods
192  *========================================================================================================================*/
193 private:
196  SgAsmExpression *currentDataSegment(State &state) const;
197 
201  X86InstructionSize effectiveAddressSize(State &state) const;
202 
204  RegisterMode effectiveOperandMode(State &state) const {
205  return sizeToMode(effectiveOperandSize(state));
206  }
207 
211  X86InstructionSize effectiveOperandSize(State &state) const;
212 
214  SgAsmType *effectiveOperandType(State &state) const {
215  return sizeToType(effectiveOperandSize(state));
216  }
217 
219  bool longMode() const {
220  return insnSize == x86_insnsize_64;
221  }
222 
223  /* FIXME: documentation? */
224  MMPrefix mmPrefix(State &state) const;
225 
227  void not64(State &state) const {
228  if (longMode())
229  throw ExceptionX86("not valid for 64-bit code", state);
230  }
231 
234  void setRex(State &state, uint8_t prefix) const;
235 
237  static RegisterMode sizeToMode(X86InstructionSize);
238 
241  static SgAsmType *sizeToType(X86InstructionSize s);
242 
243 
244 
245  /*========================================================================================================================
246  * Methods that construct something. (Their names all start with "make".)
247  *========================================================================================================================*/
248 private:
249 
252  SgAsmExpression *makeAddrSizeValue(State &state, int64_t val, size_t bit_offset, size_t bit_size) const;
253 
258  SgAsmX86Instruction *makeInstruction(State &state, X86InstructionKind kind, const std::string &mnemonic,
259  SgAsmExpression *op1=nullptr, SgAsmExpression *op2=nullptr,
260  SgAsmExpression *op3=nullptr, SgAsmExpression *op4=nullptr) const;
261 
263  SgAsmRegisterReferenceExpression *makeIP() const;
264 
265  /* FIXME: documentation? */
266  SgAsmRegisterReferenceExpression *makeOperandRegisterByte(State &state, bool rexExtension, uint8_t registerNumber) const;
267 
268  /* FIXME: documentation? */
269  SgAsmRegisterReferenceExpression *makeOperandRegisterFull(State &state, bool rexExtension, uint8_t registerNumber) const;
270 
273  SgAsmRegisterReferenceExpression *makeRegister(State &state, uint8_t fullRegisterNumber, RegisterMode,
274  SgAsmType *registerType=nullptr) const;
275 
276  /* FIXME: documentation? */
277  SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, uint8_t fullRegisterNumber) const {
278  return makeRegister(state, fullRegisterNumber, effectiveOperandMode(state));
279  }
280 
281  /* FIXME: documentation? */
282  SgAsmRegisterReferenceExpression *makeRegisterEffective(State &state, bool rexExtension, uint8_t registerNumber) const {
283  return makeRegister(state, registerNumber + (rexExtension ? 8 : 0), effectiveOperandMode(state));
284  }
285 
287  SgAsmExpression *makeSegmentRegister(State &state, X86SegmentRegister so, bool insn64) const;
288 
289 
290 
291  /*========================================================================================================================
292  * Methods for operating on the ModR/M byte.
293  *========================================================================================================================*/
294 private:
295 
309  void getModRegRM(State &state, RegisterMode regMode, RegisterMode rmMode, SgAsmType *t, SgAsmType *tForReg = nullptr) const;
310 
312  SgAsmMemoryReferenceExpression *decodeModrmMemory(State &state) const;
313 
316  void fillInModRM(State &state, RegisterMode rmMode, SgAsmType *t) const;
317 
319  SgAsmExpression *makeModrmNormal(State &state, RegisterMode, SgAsmType *mrType) const;
320 
323  SgAsmRegisterReferenceExpression *makeModrmRegister(State &state, RegisterMode, SgAsmType* mrType=nullptr) const;
324 
326  void requireMemory(State &state) const {
327  if (!state.modregrmByteSet)
328  throw ExceptionX86("requires Mod/RM byte", state);
329  if (state.modeField == 3)
330  throw ExceptionX86("requires memory", state);
331  }
332 
333 
334 
335  /*========================================================================================================================
336  * Methods that construct an SgAsmExpression for an immediate operand.
337  *========================================================================================================================*/
338 private:
339 
340  SgAsmExpression *getImmByte(State &state) const;
341  SgAsmExpression *getImmWord(State &state) const;
342  SgAsmExpression* getImmDWord(State &state) const;
343  SgAsmExpression* getImmQWord(State &state) const;
344  SgAsmExpression *getImmForAddr(State &state) const;
345  SgAsmExpression *getImmIv(State &state) const;
346  SgAsmExpression *getImmJz(State &state) const;
347  SgAsmExpression *getImmByteAsIv(State &state) const;
348  SgAsmExpression *getImmIzAsIv(State &state) const;
349  SgAsmExpression *getImmJb(State &state) const;
350 
351 
352 
353 
354  /*========================================================================================================================
355  * Main disassembly functions, each generally containing a huge "switch" statement based on one of the opcode bytes.
356  *========================================================================================================================*/
357 private:
358 
361  SgAsmX86Instruction *disassemble(State &state) const;
362 
364  SgAsmX86Instruction *decodeOpcode0F(State &state) const;
365 
367  SgAsmX86Instruction *decodeOpcode0F38(State &state) const;
368 
370  SgAsmX86Instruction *decodeX87InstructionD8(State &state) const;
371 
373  SgAsmX86Instruction *decodeX87InstructionD9(State &state) const;
374 
376  SgAsmX86Instruction *decodeX87InstructionDA(State &state) const;
377 
379  SgAsmX86Instruction *decodeX87InstructionDB(State &state) const;
380 
382  SgAsmX86Instruction *decodeX87InstructionDC(State &state) const;
383 
385  SgAsmX86Instruction *decodeX87InstructionDD(State &state) const;
386 
388  SgAsmX86Instruction *decodeX87InstructionDE(State &state) const;
389 
391  SgAsmX86Instruction *decodeX87InstructionDF(State &state) const;
392 
394  SgAsmX86Instruction *decodeGroup1(State &state, SgAsmExpression *imm) const;
395 
397  SgAsmX86Instruction *decodeGroup1a(State &state) const;
398 
400  SgAsmX86Instruction *decodeGroup2(State &state, SgAsmExpression *count) const;
401 
403  SgAsmX86Instruction *decodeGroup3(State &state, SgAsmExpression *immMaybe) const;
404 
406  SgAsmX86Instruction *decodeGroup4(State &state) const;
407 
409  SgAsmX86Instruction *decodeGroup5(State &state) const;
410 
412  SgAsmX86Instruction *decodeGroup6(State &state) const;
413 
416  SgAsmX86Instruction *decodeGroup7(State &state) const;
417 
419  SgAsmX86Instruction *decodeGroup8(State &state, SgAsmExpression *imm) const;
420 
422  SgAsmX86Instruction *decodeGroup11(State &state, SgAsmExpression *imm) const;
423 
425  SgAsmX86Instruction *decodeGroup15(State &state) const;
426 
428  SgAsmX86Instruction *decodeGroup16(State &state) const;
429 
431  SgAsmX86Instruction *decodeGroupP(State &state) const;
432 
433 
434 
435  /*========================================================================================================================
436  * Supporting functions
437  *========================================================================================================================*/
438 private:
439  // Initialize instances of this class. Called by constructor.
440  void init(size_t wordsize);
441 
442 #if 0 // is this ever used?
443 
444  void startInstruction(State &state, SgAsmX86Instruction *insn) const {
445  startInstruction(insn->get_address(), nullptr, 0);
446  insnSize = insn->get_baseSize();
447  state.lock = insn->get_lockPrefix();
448  state.branchPrediction = insn->get_branchPrediction();
449  state.branchPredictionEnabled = state.branchPrediction != x86_branch_prediction_none;
450  state.segOverride = insn->get_segmentOverride();
451  }
452 #endif
453 
454  // Resets disassembler state to beginning of an instruction for disassembly.
455  void startInstruction(State &state, rose_addr_t start_va, const uint8_t *buf, size_t bufsz) const {
456  state.ip = start_va;
457  state.insnbuf = SgUnsignedCharList(buf, buf+bufsz);
458  state.insnbufat = 0;
459 
460  // Prefix flags
461  state.segOverride = x86_segreg_none;
462  state.branchPrediction = x86_branch_prediction_none;
463  state.branchPredictionEnabled = false;
464  state.rexPresent = state.rexW = state.rexR = state.rexX = state.rexB = false;
465  state.sizeMustBe64Bit = false;
466  state.operandSizeOverride = false;
467  state.addressSizeOverride = false;
468  state.lock = false;
469  state.repeatPrefix = x86_repeat_none;
470  state.modregrmByteSet = false;
471  state.modregrmByte = state.modeField = state.regField = state.rmField = 0; /*arbitrary since modregrmByteSet is false*/
472  state.modrm = state.reg = nullptr;
473  state.isUnconditionalJump = false;
474  }
475 
476  // Add comments to any IP relative addition expressions. We're not constant folding these because it's sometimes useful to
477  // know that the address is relative to the instruction address, but the comment is useful for understanding the disassembly.
478  void commentIpRelative(SgAsmInstruction*);
479 };
480 
481 } // namespace
482 } // namespace
483 } // namespace
484 
485 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
486 BOOST_CLASS_EXPORT_KEY(Rose::BinaryAnalysis::Disassembler::X86);
487 #endif
488 
489 #endif
490 #endif
X86Ptr Ptr
Reference counting pointer.
Instruction is for a 64-bit architecture.
static Ptr instance(size_t wordSize)
Allocating constructor.
Base class for references to a machine register.
virtual Base::Ptr clone() const override
Creates a new copy of a disassembler.
virtual bool canDisassemble(SgAsmGenericHeader *) const override
Predicate determining the suitability of a disassembler for a specific file header.
rose_addr_t ip
Virtual address where failure occurred; zero if no associated instruction.
virtual SgAsmInstruction * makeUnknownInstruction(const Exception &) override
Makes an unknown instruction from an exception.
Base class for machine instructions.
X86BranchPrediction
Intel x86 branch prediction types.
size_t bit
Bit offset in instruction byte sequence where disassembly failed (bit/8 is the index into the "bytes"...
Rose::BinaryAnalysis::X86BranchPrediction const & get_branchPrediction() const
Property: An enum constant describing branch prediction.
rose_addr_t const & get_address() const
Property: Starting virtual address.
bool const & get_lockPrefix() const
Property: Whether the x86 lock prefix was present.
Rose::BinaryAnalysis::X86SegmentRegister const & get_segmentOverride() const
Property: The segment override register.
Main namespace for the ROSE library.
Rose::BinaryAnalysis::X86InstructionSize const & get_baseSize() const
Property: An enum constant describing the base size of an x86 instruction.
MemoryMapPtr Ptr
Reference counting pointer.
Definition: MemoryMap.h:115
Sawyer::SharedPointer< X86 > X86Ptr
Reference counted pointer for Intel X86 decoder.
Reference to memory locations.
Base class for container file headers.
Exception(const std::string &reason)
A bare exception not bound to any particular instruction.
Disassembler for the x86 architecture.
virtual Unparser::BasePtr unparser() const override
Unparser.
Represents one Intel x86 machine instruction.
Base class for expressions.
Base class for binary types.
X86SegmentRegister
Intel x86 segment registers.
virtual SgAsmInstruction * disassembleOne(const MemoryMap::Ptr &map, rose_addr_t va, AddressSet *successors=nullptr) override
This is the lowest level disassembly function and is implemented in the architecture-specific subclas...
SgUnsignedCharList bytes
Bytes (partial) of failed disassembly, including byte at failure.
X86InstructionKind
List of all x86 instructions known to the ROSE disassembler/assembler.
X86InstructionSize
Intel x86 instruction size constants.
Virtual base class for instruction disassemblers.
X86RepeatPrefix
Intel x86 instruction repeat prefix.