ROSE  0.11.31.0
BinaryInstructionCache.h
1 #ifndef ROSE_BinaryAnalysis_InstructionCache_H
2 #define ROSE_BinaryAnalysis_InstructionCache_H
3 #include <featureTests.h>
4 #if defined(ROSE_ENABLE_BINARY_ANALYSIS) && __cplusplus >= 201103L
5 #include <memory>
6 #include <unordered_map>
7 
8 namespace Rose {
9 namespace BinaryAnalysis {
10 
11 class Disassembler; // from Disassembler.h, but we only need the forward here.
12 class InstructionCache;
13 class InstructionPtr;
14 class LockedInstruction;
15 class ManagedInstruction;
16 
18 // ManagedInstruction
20 
28 class ManagedInstruction {
29 private:
30  // Every ManagedInstruction is owned by a cache. The user might create additional shared pointers to this object, but as long
31  // as any ManagedInstruction object exists and can potentially be in the ABSENT state, we need to have a cache that can
32  // reconstruct the AST.
33  InstructionCache *cache; // not null, set by constructor and never changed
34 
35  // Protects all following data members
36  mutable SAWYER_THREAD_TRAITS::Mutex mutex_;
37 
38  // As is typical of cache-like objects, most of the data members are mutable because the some of the member functions that
39  // modify them are conceptually const. For example, the operator-> is simply a dereference from the caller's point of void
40  // and is thus const, but under the covers it needs to be able to convert this object from the ABSENT state to the PRESENT
41  // state.
42 
43  // "time" of last dereference. This informs the cache eviction algorithm.
44  mutable size_t lastAccess;
45 
46  // C++11 doesn't have discriminated unions like C++17, so we do it the hard way.
47  enum State {
48  ABSENT, // the pointer is non-null but the AST is not present
49  PRESENT // the AST is present or is a null pointer
50  };
51  mutable State state;
52  union U {
53  SgAsmInstruction *ast; // the AST or null when in the PRESENT state
54  rose_addr_t va; // the instruction starting address when in the ABSENT state.
55  };
56  mutable U u;
57 
58 private:
59  friend class InstructionCache;
60 
61  ManagedInstruction() = delete;
62  ManagedInstruction(const ManagedInstruction&) = delete;
63  ManagedInstruction& operator=(const ManagedInstruction&) = delete;
64 
65  ManagedInstruction(InstructionCache *cache, rose_addr_t va)
66  : cache{cache}, state{ABSENT}, u{.va = va} {
67  ASSERT_not_null(cache);
68  }
69 
70  explicit ManagedInstruction(InstructionCache *cache)
71  : cache{cache}, state{PRESENT}, u{.ast = nullptr} {
72  ASSERT_not_null(cache);
73  }
74 
75  // There is no safe way to do this with implicit locking. Any reference we would return could be held indefinitely by the
76  // caller and there's no way we can automatically lock it.
77  SgAsmInstruction& operator*() const = delete;
78 
79 public:
88  LockedInstruction operator->() const; // hot
89 
90 private:
91  friend class InstructionPtr;
92 
93  // True if the underlying instructon is a null pointer.
94  bool isNull() const; // hot
95 
96  // Create a locking pointer around the AST, and mark the AST as having been accessed.
97  LockedInstruction lock() const; // hot
98 
99  // Make sure the AST is present and return a special pointer that causes it to be locked in the cache. The function is const
100  // because it's typically called from a const context (pointer dereference) and from the user's point of void is constant even
101  // though under the covers it's creating a new AST and swapping it into this object.
102  LockedInstruction makePresentNS() const; // hot
103 
104  // Evicts the AST from memory, deleting it from this object and replacing it with only the instruction address. The
105  // instruction address, together with the information stored in the cache, is enough to recreate the AST if we ever need it
106  // again.
107  void evict();
108 
109  // Update the last access time used by the cache eviction algorithm. The function is const because it's typically called
110  // in a const context (pointer dereferencing).
111  void updateTimerNS() const; // hot
112 
113  // Take the AST and its ownership away from this object, returning the AST. Throws an exception if the AST is locked, since
114  // its not possible for the returned raw pointer and the cache to share ownership.
115  SgAsmInstruction* take();
116 };
117 
119 // LockedInstruction
121 
131 class LockedInstruction {
132 private:
133  mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
134  SgAsmInstruction *insn;
135 
136 public:
138  LockedInstruction();
139 
143  explicit LockedInstruction(SgAsmInstruction *insn); // hot
144 
149  explicit LockedInstruction(const InstructionPtr &insn);
150 
154  LockedInstruction(const LockedInstruction &other);
155 
161  LockedInstruction& operator=(const LockedInstruction &other);
162 
166  ~LockedInstruction(); // hot
167 
174  void reset();
175 
182  SgAsmInstruction& operator*() const;
183 
189  SgAsmInstruction* operator->() const; // hot
190 
194  SgAsmInstruction* get() const;
195 
201  explicit operator bool() const;
202 };
203 
205 // InstructionPtr
207 
267 class InstructionPtr {
268  mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
269  std::shared_ptr<ManagedInstruction> mi_;
270 
271 public:
273  InstructionPtr() {}
274 
276  InstructionPtr(const InstructionPtr &other)
277  : mi_(other.mi_) {}
278 
279 
283  InstructionPtr& operator=(const InstructionPtr &other); // hot
284 
290  void reset();
291 
292  // Dereferences are inherently unsafe because we have no opportunity to lock the instruction in a way that we can then unlock
293  // it, and we have no control over the lifetime of the reference that we would return.
294  SgAsmInstruction& operator*() const = delete;
295 
301  LockedInstruction operator->() const; // hot
302 
308  explicit operator bool() const; // hot
309 
314  LockedInstruction lock() const;
315 
323  SgAsmInstruction* take();
324 
330  bool operator==(const InstructionPtr &other) const;
331  bool operator!=(const InstructionPtr &other) const;
332  bool operator<=(const InstructionPtr &other) const;
333  bool operator>=(const InstructionPtr &other) const;
334  bool operator<(const InstructionPtr &other) const;
335  bool operator>(const InstructionPtr &other) const;
336  bool operator==(std::nullptr_t) const;
337  bool operator!=(std::nullptr_t) const; // hot
340 private:
341  friend class InstructionCache;
342 
343  // Construct pointer to a ManagedInstruction that exists in an instruction cache. */
344  static InstructionPtr instance(InstructionCache *cache, rose_addr_t va);
345  static InstructionPtr instance(InstructionCache *cache);
346 };
347 
349 // InstructionCache
351 
358 class InstructionCache: public Sawyer::SharedObject {
359 public:
361  class Exception: public Rose::Exception {
362  public:
363  Exception(const std::string &mesg)
364  : Rose::Exception(mesg) {}
365  ~Exception() throw() {}
366  };
367 
368 private:
369  MemoryMap::Ptr memory_; // not null, constant for life of object
370  Disassembler *decoder_; // not null, constant for life of object
371 
372  mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
373  std::unordered_map<rose_addr_t, InstructionPtr> insns_;
374 
375  InstructionCache(const InstructionCache&) = delete;
376  InstructionCache& operator=(const InstructionCache&) = delete;
377 
378 public:
384  InstructionCache(const MemoryMap::Ptr &memory, Disassembler *decoder)
385  : memory_(memory), decoder_(decoder) {
386  ASSERT_not_null(memory);
387  ASSERT_not_null(decoder);
388  }
389 
395  MemoryMap::Ptr memoryMap() const {
396  return memory_; // mo lock necessary since memory_ can never change
397  }
398 
402  Disassembler* decoder() const {
403  return decoder_; // no lock necessary since decoder_ can never change.
404  }
405 
414  InstructionPtr get(rose_addr_t va);
415 
419  LockedInstruction lock(rose_addr_t va);
420 
424  void evict();
425 
426 private:
427  friend class ManagedInstruction;
428 
429  // Decode a single instruction at the specified address. This function is thread safe.
430  SgAsmInstruction* decode(rose_addr_t);
431 };
432 
434 // InstructionGuard
436 
440 class InstructionGuard {
441  // The InstructionGuard was originally slightly more complicated, but the intruction of the automatic temporary locking
442  // made it a lot simpler! All we need to do is hold onto a locked instruction pointer. However, we keep this class around because
443  // it's better documentation for the programmer's intent than simply holding a locked pointer.
444  LockedInstruction lock;
445 
446 public:
448  explicit InstructionGuard(const InstructionPtr &insn)
449  : lock(insn) {}
450 };
451 
453 // Inline definitions for hot functions
455 
456 inline InstructionPtr&
457 InstructionPtr::operator=(const InstructionPtr &other) {
458  SAWYER_THREAD_TRAITS::LockGuard2 lock(mutex_, other.mutex_);
459  mi_ = other.mi_;
460  return *this;
461 }
462 
463 inline LockedInstruction
464 InstructionPtr::operator->() const {
465  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
466  ASSERT_not_null(mi_);
467  ManagedInstruction &mi = *mi_.get();
468  return mi.lock();
469 }
470 
471 inline LockedInstruction
472 ManagedInstruction::operator->() const {
473  return lock();
474 }
475 
476 inline LockedInstruction
477 ManagedInstruction::lock() const {
478  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
479  updateTimerNS();
480  return makePresentNS();
481 }
482 
483 inline void
484 ManagedInstruction::updateTimerNS() const {
485  static size_t nextTimer = 0;
486  lastAccess = ++nextTimer;
487 }
488 
489 inline LockedInstruction
490 ManagedInstruction::makePresentNS() const {
491  if (ABSENT == state) { // unlikely
492  SgAsmInstruction *decoded = cache->decode(u.va);
493  ASSERT_not_null(decoded); // at worst, the decoder will return an unknown instruction
494  state = PRESENT; // no-throw
495  u.ast = decoded; // no-throw
496  }
497  return LockedInstruction{u.ast};
498 }
499 
500 inline
501 LockedInstruction::LockedInstruction(SgAsmInstruction *insn)
502  : insn(insn) {
503  if (insn)
504  insn->adjustCacheLockCount(+1); // ROSETTA generated, thus cannot be inlined
505 }
506 
507 inline
508 LockedInstruction::~LockedInstruction() {
509  if (insn)
510  insn->adjustCacheLockCount(-1);
511 }
512 
513 inline SgAsmInstruction*
514 LockedInstruction::operator->() const {
515  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
516  ASSERT_not_null(insn);
517  return insn;
518 }
519 
520 inline
521 InstructionPtr::operator bool() const {
522  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
523  return mi_.get() ? !(*mi_).isNull() : false;
524 }
525 
526 inline bool
527 ManagedInstruction::isNull() const {
528  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
529  // A null pointer can be in the absent state only if it was never yet in the present state. This is because all we know
530  // about an absent pointer is it's address, not whether we can create an instruction AST at that address. Therefore, we
531  // have to try to create the AST.
532  makePresentNS();
533  return u.ast == nullptr;
534 }
535 
536 inline bool
537 InstructionPtr::operator!=(const std::nullptr_t) const {
538  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
539  return mi_ && !(*mi_).isNull()? true : false;
540 }
541 
542 
543 
544 
545 } // namespace
546 } // namespacd
547 #endif
548 #endif
Base class for machine instructions.
Main namespace for the ROSE library.
Base class for reference counted objects.
Definition: SharedObject.h:64
Base class for all ROSE exceptions.
Definition: RoseException.h:9
void adjustCacheLockCount(int increment)
Property: Cache lock count.