ROSE  0.11.145.0
InstructionCache.h
1 #ifndef ROSE_BinaryAnalysis_InstructionCache_H
2 #define ROSE_BinaryAnalysis_InstructionCache_H
3 #include <featureTests.h>
4 #if defined(ROSE_ENABLE_BINARY_ANALYSIS) && __cplusplus >= 201103L
5 
6 #include <Rose/BinaryAnalysis/Disassembler/BasicTypes.h>
7 
8 #include <memory>
9 #include <unordered_map>
10 
11 namespace Rose {
12 namespace BinaryAnalysis {
13 
15 // ManagedInstruction
17 
25 class ManagedInstruction {
26 private:
27  // Every ManagedInstruction is owned by a cache. The user might create additional shared pointers to this object, but as long
28  // as any ManagedInstruction object exists and can potentially be in the ABSENT state, we need to have a cache that can
29  // reconstruct the AST.
30  InstructionCache *cache; // not null, set by constructor and never changed
31 
32  // Protects all following data members
33  mutable SAWYER_THREAD_TRAITS::Mutex mutex_;
34 
35  // As is typical of cache-like objects, most of the data members are mutable because the some of the member functions that
36  // modify them are conceptually const. For example, the operator-> is simply a dereference from the caller's point of void
37  // and is thus const, but under the covers it needs to be able to convert this object from the ABSENT state to the PRESENT
38  // state.
39 
40  // "time" of last dereference. This informs the cache eviction algorithm.
41  mutable size_t lastAccess;
42 
43  // C++11 doesn't have discriminated unions like C++17, so we do it the hard way.
44  enum State {
45  ABSENT, // the pointer is non-null but the AST is not present
46  PRESENT // the AST is present or is a null pointer
47  };
48  mutable State state;
49  union U {
50  SgAsmInstruction *ast; // the AST or null when in the PRESENT state
51  rose_addr_t va; // the instruction starting address when in the ABSENT state.
52  };
53  mutable U u;
54 
55 private:
56  friend class InstructionCache;
57 
58  ManagedInstruction() = delete;
59  ManagedInstruction(const ManagedInstruction&) = delete;
60  ManagedInstruction& operator=(const ManagedInstruction&) = delete;
61 
62  ManagedInstruction(InstructionCache *cache, rose_addr_t va)
63  : cache{cache}, state{ABSENT}, u{.va = va} {
64  ASSERT_not_null(cache);
65  }
66 
67  explicit ManagedInstruction(InstructionCache *cache)
68  : cache{cache}, state{PRESENT}, u{.ast = nullptr} {
69  ASSERT_not_null(cache);
70  }
71 
72  // There is no safe way to do this with implicit locking. Any reference we would return could be held indefinitely by the
73  // caller and there's no way we can automatically lock it.
74  SgAsmInstruction& operator*() const = delete;
75 
76 public:
85  LockedInstruction operator->() const; // hot
86 
87 private:
88  friend class InstructionPtr;
89 
90  // True if the underlying instructon is a null pointer.
91  bool isNull() const; // hot
92 
93  // Create a locking pointer around the AST, and mark the AST as having been accessed.
94  LockedInstruction lock() const; // hot
95 
96  // Make sure the AST is present and return a special pointer that causes it to be locked in the cache. The function is const
97  // because it's typically called from a const context (pointer dereference) and from the user's point of void is constant even
98  // though under the covers it's creating a new AST and swapping it into this object.
99  LockedInstruction makePresentNS() const; // hot
100 
101  // Evicts the AST from memory, deleting it from this object and replacing it with only the instruction address. The
102  // instruction address, together with the information stored in the cache, is enough to recreate the AST if we ever need it
103  // again.
104  void evict();
105 
106  // Update the last access time used by the cache eviction algorithm. The function is const because it's typically called
107  // in a const context (pointer dereferencing).
108  void updateTimerNS() const; // hot
109 
110  // Take the AST and its ownership away from this object, returning the AST. Throws an exception if the AST is locked, since
111  // its not possible for the returned raw pointer and the cache to share ownership.
112  SgAsmInstruction* take();
113 };
114 
116 // LockedInstruction
118 
128 class LockedInstruction {
129 private:
130  mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
131  SgAsmInstruction *insn;
132 
133 public:
135  LockedInstruction();
136 
140  explicit LockedInstruction(SgAsmInstruction *insn); // hot
141 
146  explicit LockedInstruction(const InstructionPtr &insn);
147 
151  LockedInstruction(const LockedInstruction &other);
152 
158  LockedInstruction& operator=(const LockedInstruction &other);
159 
163  ~LockedInstruction(); // hot
164 
171  void reset();
172 
179  SgAsmInstruction& operator*() const;
180 
186  SgAsmInstruction* operator->() const; // hot
187 
191  SgAsmInstruction* get() const;
192 
198  explicit operator bool() const;
199 };
200 
202 // InstructionPtr
204 
264 class InstructionPtr {
265  mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
266  std::shared_ptr<ManagedInstruction> mi_;
267 
268 public:
270  InstructionPtr() {}
271 
273  InstructionPtr(const InstructionPtr &other)
274  : mi_(other.mi_) {}
275 
276 
280  InstructionPtr& operator=(const InstructionPtr &other); // hot
281 
287  void reset();
288 
289  // Dereferences are inherently unsafe because we have no opportunity to lock the instruction in a way that we can then unlock
290  // it, and we have no control over the lifetime of the reference that we would return.
291  SgAsmInstruction& operator*() const = delete;
292 
298  LockedInstruction operator->() const; // hot
299 
305  explicit operator bool() const; // hot
306 
311  LockedInstruction lock() const;
312 
320  SgAsmInstruction* take();
321 
327  bool operator==(const InstructionPtr &other) const;
328  bool operator!=(const InstructionPtr &other) const;
329  bool operator<=(const InstructionPtr &other) const;
330  bool operator>=(const InstructionPtr &other) const;
331  bool operator<(const InstructionPtr &other) const;
332  bool operator>(const InstructionPtr &other) const;
333  bool operator==(std::nullptr_t) const;
334  bool operator!=(std::nullptr_t) const; // hot
337 private:
338  friend class InstructionCache;
339 
340  // Construct pointer to a ManagedInstruction that exists in an instruction cache. */
341  static InstructionPtr instance(InstructionCache *cache, rose_addr_t va);
342  static InstructionPtr instance(InstructionCache *cache);
343 };
344 
346 // InstructionCache
348 
355 class InstructionCache: public Sawyer::SharedObject {
356 public:
358  class Exception: public Rose::Exception {
359  public:
360  Exception(const std::string &mesg)
361  : Rose::Exception(mesg) {}
362  ~Exception() throw() {}
363  };
364 
365 private:
366  MemoryMap::Ptr memory_; // not null, constant for life of object
367  Disassembler::BasePtr decoder_; // not null, constant for life of object
368 
369  mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
370  std::unordered_map<rose_addr_t, InstructionPtr> insns_;
371 
372  InstructionCache(const InstructionCache&) = delete;
373  InstructionCache& operator=(const InstructionCache&) = delete;
374 
375 public:
381  InstructionCache(const MemoryMap::Ptr &memory, const Disassembler::BasePtr &decoder)
382  : memory_(memory), decoder_(decoder) {
383  ASSERT_not_null(memory);
384  ASSERT_not_null(decoder);
385  }
386 
392  MemoryMap::Ptr memoryMap() const {
393  return memory_; // mo lock necessary since memory_ can never change
394  }
395 
399  Disassembler::BasePtr decoder() const {
400  return decoder_; // no lock necessary since decoder_ can never change.
401  }
402 
411  InstructionPtr get(rose_addr_t va);
412 
416  LockedInstruction lock(rose_addr_t va);
417 
421  void evict();
422 
423 private:
424  friend class ManagedInstruction;
425 
426  // Decode a single instruction at the specified address. This function is thread safe.
427  SgAsmInstruction* decode(rose_addr_t);
428 };
429 
431 // InstructionGuard
433 
437 class InstructionGuard {
438  // The InstructionGuard was originally slightly more complicated, but the intruction of the automatic temporary locking
439  // made it a lot simpler! All we need to do is hold onto a locked instruction pointer. However, we keep this class around because
440  // it's better documentation for the programmer's intent than simply holding a locked pointer.
441  LockedInstruction lock;
442 
443 public:
445  explicit InstructionGuard(const InstructionPtr &insn)
446  : lock(insn) {}
447 };
448 
450 // Inline definitions for hot functions
452 
453 inline InstructionPtr&
454 InstructionPtr::operator=(const InstructionPtr &other) {
455  SAWYER_THREAD_TRAITS::LockGuard2 lock(mutex_, other.mutex_);
456  mi_ = other.mi_;
457  return *this;
458 }
459 
460 inline LockedInstruction
461 InstructionPtr::operator->() const {
462  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
463  ASSERT_not_null(mi_);
464  ManagedInstruction &mi = *mi_.get();
465  return mi.lock();
466 }
467 
468 inline LockedInstruction
469 ManagedInstruction::operator->() const {
470  return lock();
471 }
472 
473 inline LockedInstruction
474 ManagedInstruction::lock() const {
475  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
476  updateTimerNS();
477  return makePresentNS();
478 }
479 
480 inline void
481 ManagedInstruction::updateTimerNS() const {
482  static size_t nextTimer = 0;
483  lastAccess = ++nextTimer;
484 }
485 
486 inline LockedInstruction
487 ManagedInstruction::makePresentNS() const {
488  if (ABSENT == state) { // unlikely
489  SgAsmInstruction *decoded = cache->decode(u.va);
490  ASSERT_not_null(decoded); // at worst, the decoder will return an unknown instruction
491  state = PRESENT; // no-throw
492  u.ast = decoded; // no-throw
493  }
494  return LockedInstruction{u.ast};
495 }
496 
497 inline
498 LockedInstruction::LockedInstruction(SgAsmInstruction *insn)
499  : insn(insn) {
500  if (insn)
501  insn->adjustCacheLockCount(+1); // ROSETTA generated, thus cannot be inlined
502 }
503 
504 inline
505 LockedInstruction::~LockedInstruction() {
506  if (insn)
507  insn->adjustCacheLockCount(-1);
508 }
509 
510 inline SgAsmInstruction*
511 LockedInstruction::operator->() const {
512  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
513  ASSERT_not_null(insn);
514  return insn;
515 }
516 
517 inline
518 InstructionPtr::operator bool() const {
519  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
520  return mi_.get() ? !(*mi_).isNull() : false;
521 }
522 
523 inline bool
524 ManagedInstruction::isNull() const {
525  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
526  // A null pointer can be in the absent state only if it was never yet in the present state. This is because all we know
527  // about an absent pointer is it's address, not whether we can create an instruction AST at that address. Therefore, we
528  // have to try to create the AST.
529  makePresentNS();
530  return u.ast == nullptr;
531 }
532 
533 inline bool
534 InstructionPtr::operator!=(const std::nullptr_t) const {
535  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
536  return mi_ && !(*mi_).isNull()? true : false;
537 }
538 
539 
540 
541 
542 } // namespace
543 } // namespacd
544 #endif
545 #endif
Base class for machine instructions.
Main namespace for the ROSE library.
MemoryMapPtr Ptr
Reference counting pointer.
Definition: MemoryMap.h:115
Base class for reference counted objects.
Definition: SharedObject.h:64
Sawyer::SharedPointer< Base > BasePtr
Reference counted pointer for disassemblers.
Base class for all ROSE exceptions.
Definition: Rose/Exception.h:9
void adjustCacheLockCount(int increment)
Property: Cache lock count.
State
Decoder state.
Definition: String.h:198