ROSE  0.11.145.0
Partitioner2/Engine.h
1 #ifndef ROSE_BinaryAnalysis_Partitioner2_Engine_H
2 #define ROSE_BinaryAnalysis_Partitioner2_Engine_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
5 
6 #include <Rose/BasicTypes.h>
7 #include <Rose/BinaryAnalysis/Partitioner2/Exception.h>
8 #include <Rose/BinaryAnalysis/Partitioner2/Modules.h>
9 #include <Rose/BinaryAnalysis/SerialIo.h>
10 
11 #include <Sawyer/DistinctList.h>
12 #include <Sawyer/SharedObject.h>
13 #include <Sawyer/SharedPointer.h>
14 
15 namespace Rose {
16 namespace BinaryAnalysis {
17 namespace Partitioner2 {
18 
155 class Engine: public Sawyer::SharedObject, public Sawyer::SharedFromThis<Engine> {
157  // Internal data structures
159 public:
161  using Ptr = EnginePtr;
162 
163  //--------------------------------------------------------------------------------------------------------------------------
164 public:
168  struct Settings {
175  private:
176  friend class boost::serialization::access;
177 
178  template<class S>
179  void serialize(S &s, unsigned version) {
180  s & loader & disassembler & partitioner & engine & astConstruction;
181  }
182 
183  public:
184  ~Settings();
185  Settings();
186  };
187 
188  //--------------------------------------------------------------------------------------------------------------------------
189 public:
192  public:
193  ~Exception() throw ();
194 
196  explicit Exception(const std::string&);
197  };
198 
199  //--------------------------------------------------------------------------------------------------------------------------
200 public:
203  public:
204  virtual ~PositionalArgumentParser();
206 
213  virtual std::vector<std::string> specimen(const std::vector<std::string>&) const = 0;
214  };
215 
218  public:
219  virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
220  };
221 
224  size_t n_;
225  public:
227  explicit FirstPositionalArguments(size_t n);
228  virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
229  };
230 
233  size_t n_ = 0;
234  public:
238  explicit GroupedPositionalArguments(size_t);
239  virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
240  };
241 
242  //--------------------------------------------------------------------------------------------------------------------------
243 protected:
244  // Engine callback for handling instructions added to basic blocks. This is called when a basic block is discovered,
245  // before it's attached to a partitioner, so it shouldn't really be modifying any state in the engine, but rather only
246  // preparing the basic block to be processed.
248  typedef Sawyer::Container::Map<rose_addr_t /*target*/, std::vector<rose_addr_t> /*sources*/> WorkList;
249  public:
251  protected:
253  public:
254  static Ptr instance();
255  virtual bool operator()(bool chain, const Args &args) override;
256  private:
257  void fixFunctionReturnEdge(const Args&);
258  void fixFunctionCallEdges(const Args&);
259  void addPossibleIndeterminateEdge(const Args&);
260  };
261 
262  //--------------------------------------------------------------------------------------------------------------------------
263 private:
264  // Basic blocks that need to be worked on next. These lists are adjusted whenever a new basic block (or placeholder) is
265  // inserted or erased from the CFG.
266  class BasicBlockWorkList: public CfgAdjustmentCallback {
267  // The following lists are used for adding outgoing E_CALL_RETURN edges to basic blocks based on whether the basic
268  // block is a call to a function that might return. When a new basic block is inserted into the CFG (or a previous
269  // block is removed, modified, and re-inserted), the operator() is called and conditionally inserts the block into the
270  // "pendingCallReturn" list (if the block is a function call that lacks an E_CALL_RETURN edge and the function is known
271  // to return or the analysis was incomplete).
272  //
273  // When we run out of other ways to create basic blocks, we process the pendingCallReturn list from back to front. If
274  // the back block (which gets popped) has a positive may-return result then an E_CALL_RETURN edge is added to the CFG
275  // and the normal recursive BB discovery is resumed. Otherwise if the analysis is incomplete the basic block is moved
276  // to the processedCallReturn list. The entire pendingCallReturn list is processed before proceeding.
277  //
278  // If there is no more pendingCallReturn work to be done, then the processedCallReturn blocks are moved to the
279  // finalCallReturn list and finalCallReturn is sorted by approximate CFG height (i.e., leafs first). The contents
280  // of the finalCallReturn list is then analyzed and the result (or the default may-return value for failed analyses)
281  // is used to decide whether a new CFG edge should be created, possibly adding new basic block addresses to the
282  // list of undiscovered blocks.
283  //
284  Sawyer::Container::DistinctList<rose_addr_t> pendingCallReturn_; // blocks that might need an E_CALL_RETURN edge
285  Sawyer::Container::DistinctList<rose_addr_t> processedCallReturn_; // call sites whose may-return was indeterminate
286  Sawyer::Container::DistinctList<rose_addr_t> finalCallReturn_; // indeterminate call sites awaiting final analysis
287 
288  Sawyer::Container::DistinctList<rose_addr_t> undiscovered_; // undiscovered basic block list (last-in-first-out)
289  EnginePtr engine_; // engine to which this callback belongs
290  size_t maxSorts_; // max sorts before using unsorted lists
291  public:
292  ~BasicBlockWorkList();
293  protected:
294  BasicBlockWorkList(const EnginePtr &engine, size_t maxSorts);
295  public:
297  static Ptr instance(const EnginePtr &engine, size_t maxSorts);
298  virtual bool operator()(bool chain, const AttachedBasicBlock &args) override;
299  virtual bool operator()(bool chain, const DetachedBasicBlock &args) override;
301  Sawyer::Container::DistinctList<rose_addr_t>& processedCallReturn();
304  void moveAndSortCallReturn(const PartitionerConstPtr&);
305  };
306 
307  //--------------------------------------------------------------------------------------------------------------------------
308 protected:
309  // A work list providing constants from instructions that are part of the CFG.
311  public:
313 
314  private:
315  std::set<rose_addr_t> toBeExamined_; // instructions waiting to be examined
316  std::set<rose_addr_t> wasExamined_; // instructions we've already examined
317  rose_addr_t inProgress_; // instruction that is currently in progress
318  std::vector<rose_addr_t> constants_; // constants for the instruction in progress
319 
320  public:
321  ~CodeConstants();
322  protected:
323  CodeConstants();
324 
325  public:
326  static Ptr instance();
327 
328  // Address of instruction being examined.
329  rose_addr_t inProgress();
330 
331  // Possibly insert more instructions into the work list when a basic block is added to the CFG
332  virtual bool operator()(bool chain, const AttachedBasicBlock &attached) override;
333 
334  // Possibly remove instructions from the worklist when a basic block is removed from the CFG
335  virtual bool operator()(bool chain, const DetachedBasicBlock &detached) override;
336 
337  // Return the next available constant if any.
338  Sawyer::Optional<rose_addr_t> nextConstant(const PartitionerConstPtr &partitioner);
339  };
340 
342  // Data members
344 private:
345  std::string name_; // factory name
346  Settings settings_; // Settings for the partitioner.
347  SgAsmInterpretation *interp_; // interpretation set by loadSpecimen
348  Disassembler::BasePtr disassembler_; // not ref-counted yet, but don't destroy it since user owns it
349  MemoryMapPtr map_; // memory map initialized by load()
350  BasicBlockWorkList::Ptr basicBlockWorkList_; // what blocks to work on next
351  CodeConstants::Ptr codeFunctionPointers_; // generates constants that are found in instruction ASTs
352  ProgressPtr progress_; // optional progress reporting
353  std::vector<std::string> specimen_; // list of additional command line arguments (often file names)
354 
356  // Construction and destruction
358 public:
359  virtual ~Engine();
360 
361 protected:
363  Engine() = delete;
364  Engine(const Engine&) = delete;
365  Engine& operator=(const Engine&) = delete;
366 
367 protected:
369  Engine(const std::string &name, const Settings &settings);
370 
371 public:
372  // [Robb Matzke 2023-03-03]: deprecated.
373  // This used to create a binary engine, so we leave it in place for a while for improved backward compatibility
374  static EngineBinaryPtr instance() ROSE_DEPRECATED("use Engine::forge or EngineBinary::instance");
375 
376 private:
377  void init();
378 
380  // Command-line processing
382 public:
383 
400  virtual std::list<Sawyer::CommandLine::SwitchGroup> commandLineSwitches();
401 
407  std::list<Sawyer::CommandLine::SwitchGroup> allCommandLineSwitches();
408 
420  virtual std::pair<std::string/*title*/, std::string /*doc*/> specimenNameDocumentation() = 0;
421 
428  static std::list<std::pair<std::string /*title*/, std::string /*doc*/>> allSpecimenNameDocumentation();
429 
437  virtual void addToParser(Sawyer::CommandLine::Parser&);
438 
444  void addAllToParser(Sawyer::CommandLine::Parser&);
445 
458  virtual Sawyer::CommandLine::Parser commandLineParser(const std::string &purpose, const std::string &description);
459 
461  // Factories
463 public:
472  static void registerFactory(const EnginePtr &factory);
473 
480  static bool deregisterFactory(const EnginePtr &factory);
481 
488  static std::vector<EnginePtr> registeredFactories();
489 
515  //---------------------------------------------------------
516  // These operate on specimens
517  //---------------------------------------------------------
518 
519  static EnginePtr forge(const std::vector<std::string> &specimen);
520  static EnginePtr forge(const std::string &specimen);
521 
522  //---------------------------------------------------------
523  // These operate on arguments as std::vector<std::string>
524  //---------------------------------------------------------
525 
526  // all args
527  static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&,
528  const PositionalArgumentParser&, const Settings&);
529 
530  // default settings
531  static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&,
532  const PositionalArgumentParser&);
533 
534  // default positional parser
535  static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&, const Settings&);
536 
537  // default positional parser and settings
538  static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&);
539 
540  //---------------------------------------------------------
541  // These operate on arguments as argc and argv
542  //---------------------------------------------------------
543 
544  // all args
545  static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&, const PositionalArgumentParser&, const Settings&);
546 
547  // default settings
548  static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&, const PositionalArgumentParser&);
549 
550  // default positional parser
551  static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&, const Settings&);
552 
553  // default positional parser and settings
554  static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&);
558  virtual bool matchFactory(const std::vector<std::string> &specimen) const = 0;
559 
564  virtual EnginePtr instanceFromFactory(const Settings&) = 0;
565 
571  bool isFactory() const;
572 
574  // Top-level, do everything functions
576 public:
601  SgAsmBlock* frontend(int argc, char *argv[],
602  const std::string &purpose, const std::string &description);
603  virtual SgAsmBlock* frontend(const std::vector<std::string> &args,
604  const std::string &purpose, const std::string &description) = 0;
607  // Basic top-level steps
610 public:
616  virtual void reset();
617 
644  Sawyer::CommandLine::ParserResult parseCommandLine(int argc, char *argv[],
645  const std::string &purpose, const std::string &description) /*final*/;
646  virtual Sawyer::CommandLine::ParserResult parseCommandLine(const std::vector<std::string> &args,
647  const std::string &purpose, const std::string &description);
664  virtual SgAsmBlock* buildAst(const std::vector<std::string> &fileNames = std::vector<std::string>()) = 0;
665  SgAsmBlock *buildAst(const std::string &fileName) /*final*/;
668  // [Robb Matzke 2023-03-03]: deprecated
669  // Save a partitioner and AST to a file.
670  //
671  // The specified partitioner and the binary analysis components of the AST are saved into the specified file, which is
672  // created if it doesn't exist and truncated if it does exist. The name should end with a ".rba" extension. The file can
673  // be loaded by passing its name to the @ref partition function or by calling @ref loadPartitioner.
674  virtual void savePartitioner(const PartitionerConstPtr&, const boost::filesystem::path&, SerialIo::Format = SerialIo::BINARY)
675  ROSE_DEPRECATED("use Partitioner::saveAsRbaFile");
676 
677  // [Robb Matzke 2023-03-03]: deprecated
678  // Load a partitioner and an AST from a file.
679  //
680  // The specified RBA file is opened and read to create a new @ref Partitioner object and associated AST. The @ref
681  // partition function also understands how to open RBA files.
682  virtual PartitionerPtr loadPartitioner(const boost::filesystem::path&, SerialIo::Format = SerialIo::BINARY)
683  ROSE_DEPRECATED("use Partitioner::instanceFromRbaFile");
684 
686  // Command-line parsing
688 public:
708  virtual SgAsmInterpretation* parseContainers(const std::vector<std::string> &fileNames) = 0;
709  SgAsmInterpretation* parseContainers(const std::string &fileName) /*final*/;
732  virtual MemoryMapPtr loadSpecimens(const std::vector<std::string> &fileNames = std::vector<std::string>()) = 0;
733  MemoryMapPtr loadSpecimens(const std::string &fileName) /*final*/;
755  virtual PartitionerPtr partition(const std::vector<std::string> &fileNames = std::vector<std::string>()) = 0;
756  PartitionerPtr partition(const std::string &fileName) /*final*/;
765  virtual void checkSettings();
766 
768  // Container parsing
769  //
770  // top-level: parseContainers
772 public:
777  virtual bool isRbaFile(const std::string&);
778 
783  virtual bool isNonContainer(const std::string&) = 0;
784 
790  virtual bool areContainersParsed() const = 0;
791 
793  // Load specimens
794  //
795  // top-level: loadSpecimens
797 public:
801  virtual bool areSpecimensLoaded() const;
802 
806  virtual void adjustMemoryMap();
807 
816  MemoryMapPtr memoryMap() const /*final*/;
817  virtual void memoryMap(const MemoryMapPtr&);
820  // Disassembler
823 public:
842  virtual Disassembler::BasePtr obtainDisassembler();
843  virtual Disassembler::BasePtr obtainDisassembler(const Disassembler::BasePtr &hint);
847  // Partitioner high-level functions
849  //
850  // top-level: partition
852 public:
854  virtual void checkCreatePartitionerPrerequisites() const;
855 
863  virtual PartitionerPtr createBarePartitioner();
864 
868  virtual PartitionerPtr createPartitioner() = 0;
869 
873  virtual void runPartitionerInit(const PartitionerPtr&) = 0;
874 
878  virtual void runPartitionerRecursive(const PartitionerPtr&) = 0;
879 
884  virtual void runPartitionerFinal(const PartitionerPtr&) = 0;
885 
891  virtual void runPartitioner(const PartitionerPtr&);
892 
894  // Partitioner mid-level functions
895  //
896  // These are the functions called by the partitioner high-level stuff. These are sometimes overridden in subclasses,
897  // although it is more likely that the high-level stuff is overridden.
899 public:
904  virtual void labelAddresses(const PartitionerPtr&, const Configuration&);
905 
909  virtual std::vector<DataBlockPtr> makeConfiguredDataBlocks(const PartitionerPtr&, const Configuration&);
910 
914  virtual std::vector<FunctionPtr> makeConfiguredFunctions(const PartitionerPtr&, const Configuration&);
915 
921  virtual void updateAnalysisResults(const PartitionerPtr&);
922 
924  // Partitioner low-level functions
925  //
926  // These are functions that a subclass seldom overrides, and maybe even shouldn't override because of their complexity or
927  // the way the interact with one another.
929 public:
930 
931 
933  // Build AST
935 public:
936  // Used internally by ROSE's ::frontend disassemble instructions to build the AST that goes under each SgAsmInterpretation.
937  static void disassembleForRoseFrontend(SgAsmInterpretation*);
938 
940  // Settings and properties
942 public:
948  const std::string& name() const /*final*/;
949  void name(const std::string&);
958  const Settings& settings() const /*final*/;
959  Settings& settings() /*final*/;
960  void settings(const Settings&) /*final*/;
968  BasicBlockWorkList::Ptr basicBlockWorkList() const /*final*/;
969  void basicBlockWorkList(const BasicBlockWorkList::Ptr&) /*final*/;
970 
976  CodeConstants::Ptr codeFunctionPointers() const /*final*/;
977  void codeFunctionPointers(const CodeConstants::Ptr&) /*final*/;
986  Disassembler::BasePtr disassembler() const;
987  virtual void disassembler(const Disassembler::BasePtr&);
997  SgAsmInterpretation* interpretation() const /*final*/;
998  virtual void interpretation(SgAsmInterpretation*);
1006  ProgressPtr progress() const /*final*/;
1007  virtual void progress(const ProgressPtr&);
1015  const std::vector<std::string>& specimen() const /*final*/;
1016  virtual void specimen(const std::vector<std::string>&);
1019  // Internal stuff
1022 protected:
1023  // Similar to ::frontend but a lot less complicated.
1024  virtual SgProject* roseFrontendReplacement(const std::vector<boost::filesystem::path> &fileNames) = 0;
1025 };
1026 
1027 } // namespace
1028 } // namespace
1029 } // namespace
1030 
1031 #endif
1032 #endif
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const =0
Return specimen from positional arguments.
Instruction basic block.
Base class for CFG-adjustment callbacks.
Base class for adjusting basic blocks during discovery.
Definition: Modules.h:39
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
GroupedPositionalArguments()
Constructor returning first group of arguments.
Engine for specimens containing machine instructions.
Definition: EngineBinary.h:47
Base class for engines driving the partitioner.
Engine()=delete
Default constructor.
STL namespace.
Main namespace for the ROSE library.
Base class for binary state input and output.
Definition: SerialIo.h:112
Exception(const std::string &)
Construct an exception with a message string.
Name space for the entire library.
Definition: FeasiblePath.h:767
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
FirstPositionalArguments(size_t n)
Constructor returning up to n arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
Creates SharedPointer from this.
DisassemblerSettings disassembler
Settings for creating the disassembler.
Sawyer::SharedPointer< Engine > EnginePtr
Shared-ownership pointer for Engine.
virtual bool operator()(bool chain, const AttachedBasicBlock &attached) override
Called when basic block is attached or placeholder inserted.
AstConstructionSettings astConstruction
Settings for constructing the AST.
virtual bool operator()(bool chain, const Args &args) override
Callback method.
Base class for reference counted objects.
Definition: SharedObject.h:64
This class represents a source project, with a list of SgFile objects and global information about th...
PartitionerSettings partitioner
Settings for creating a partitioner.
LoaderSettings loader
Settings used during specimen loading.
Partitions instructions into basic blocks and functions.
Definition: Partitioner.h:293
Represents an interpretation of a binary container.
EngineSettings engine
Settings that control engine behavior.
Container associating values with keys.
Definition: Sawyer/Map.h:66
Return all positional arguments as the specimen.