ROSE  0.11.145.0
BinaryAnalysis/Partitioner2/BasicTypes.h
1 #ifndef ROSE_BinaryAnalysis_Partitioner2_BasicTypes_H
2 #define ROSE_BinaryAnalysis_Partitioner2_BasicTypes_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
5 
6 #include <boost/filesystem.hpp>
7 #include <boost/regex.hpp>
8 #include <boost/serialization/access.hpp>
9 #include <boost/serialization/nvp.hpp>
10 #include <boost/serialization/version.hpp>
11 #include <Sawyer/Graph.h>
12 #include <Sawyer/Map.h>
13 #include <Sawyer/Set.h>
14 #include <set>
15 #include <string>
16 #include <vector>
17 
18 // Define this as one if you want extra invariant checks that are quite expensive, or define as zero. This only makes a
19 // difference if NDEBUG and SAWYER_NDEBUG are both undefined--if either one of them are defined then no expensive (or
20 // inexpensive) checks are performed.
21 #define ROSE_PARTITIONER_EXPENSIVE_CHECKS 0
22 
23 namespace Rose {
24 namespace BinaryAnalysis {
25 namespace Partitioner2 {
26 
28 namespace Precision {
29 enum Level {
30  LOW,
34 };
35 } // namespace
36 
38 namespace AllowParallelEdges {
39 enum Type {
40  NO,
41  YES
42 };
43 } // namespace
44 
46 enum VertexType {
54 };
55 
57 enum EdgeType {
58  E_NORMAL = 0x00000001,
59  E_FUNCTION_CALL = 0x00000002,
60  E_FUNCTION_RETURN = 0x00000004,
63  E_CALL_RETURN = 0x00000008,
69  E_FUNCTION_XFER = 0x00000010,
75  E_USER_DEFINED = 0x00000020,
78 };
79 
81 enum Confidence {
84 };
85 
90 };
91 
101  bool allowEmptyGlobalBlock = false;
102 
109 
116  bool allowEmptyBasicBlocks = false;
117 
129  bool copyAllInstructions = true;
130 
131 private:
132  friend class boost::serialization::access;
133 
134  template<class S>
135  void serialize(S &s, unsigned version) {
136  s & BOOST_SERIALIZATION_NVP(allowEmptyGlobalBlock);
137  s & BOOST_SERIALIZATION_NVP(allowFunctionWithNoBasicBlocks);
138  s & BOOST_SERIALIZATION_NVP(allowEmptyBasicBlocks);
139  s & BOOST_SERIALIZATION_NVP(copyAllInstructions);
140  }
141 
142 public:
149  s.allowEmptyGlobalBlock = false;
151  s.allowEmptyBasicBlocks = false;
152  s.copyAllInstructions = true;
153  return s;
154  }
155 
163  s.allowEmptyGlobalBlock = true;
165  s.allowEmptyBasicBlocks = true;
166  s.copyAllInstructions = true; // true keeps the AST a tree instead of a lattice
167  return s;
168  }
169 };
170 
172 // Settings. All settings must act like properties, which means the following:
173 // 1. Each setting must have a name that does not begin with a verb.
174 // 2. Each setting must have a command-line switch to manipulate it.
175 // 3. Each setting must have a method that queries the property (same name as the property and taking no arguments).
176 // 4. Each setting must have a modifier method (same name as property but takes a value and returns void)
178 
190 };
191 
223 
230  bool memoryIsExecutable = false;
231 
239  bool linkObjectFiles = true;
240 
248  bool linkStaticArchives = true;
249 
260  std::string linker = "ld -o %o --unresolved-symbols=ignore-all --whole-archive %f";
261 
266  std::vector<std::string> envEraseNames;
267 
272  std::vector<boost::regex> envErasePatterns;
273 
279  std::vector<std::string> envInsert;
280 
281 private:
282  friend class boost::serialization::access;
283 
284  template<class S>
285  void serialize(S &s, unsigned version) {
286  s & BOOST_SERIALIZATION_NVP(deExecuteZerosThreshold);
287  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtFront);
288  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtBack);
289  s & BOOST_SERIALIZATION_NVP(memoryDataAdjustment);
290  s & BOOST_SERIALIZATION_NVP(memoryIsExecutable);
291  if (version >= 1) {
292  s & BOOST_SERIALIZATION_NVP(envEraseNames);
293  s & BOOST_SERIALIZATION_NVP(envInsert);
294 
295  // There's no serialization for boost::regex, so we do it ourselves.
296  std::vector<std::string> reStrings;
297  for (const boost::regex &re: envErasePatterns)
298  reStrings.push_back(re.str());
299  s & BOOST_SERIALIZATION_NVP(reStrings);
300  if (envErasePatterns.empty()) {
301  for (const std::string &reStr: reStrings)
302  envErasePatterns.push_back(boost::regex(reStr));
303  }
304  }
305  }
306 };
307 
317  bool doDisassemble = true;
318 
324  std::string isaName;
325 
326 private:
327  friend class boost::serialization::access;
328 
329  template<class S>
330  void serialize(S &s, unsigned version) {
331  if (version >= 1)
332  s & BOOST_SERIALIZATION_NVP(doDisassemble);
333  s & BOOST_SERIALIZATION_NVP(isaName);
334  }
335 };
336 
347 };
348 
357  bool usingSemantics = false;
358 
362  bool checkingCallBranch = false;
363 
374 
381  bool ignoringUnknownInsns = false;
382 
383 private:
384  friend class boost::serialization::access;
385 
386  template<class S>
387  void serialize(S &s, const unsigned version) {
388  s & BOOST_SERIALIZATION_NVP(usingSemantics);
389  s & BOOST_SERIALIZATION_NVP(checkingCallBranch);
390  s & BOOST_SERIALIZATION_NVP(basicBlockSemanticsAutoDrop);
391  if (version >= 1)
392  s & BOOST_SERIALIZATION_NVP(ignoringUnknownInsns);
393  }
394 };
395 
407 
412  std::vector<rose_addr_t> functionStartingVas;
413 
438  bool followingGhostEdges = false;
439 
445  bool discontiguousBlocks = true;
446 
452  size_t maxBasicBlockSize = 0;
453 
460  std::vector<rose_addr_t> ipRewrites;
461 
467 
474  bool findingDeadCode = true;
475 
480  rose_addr_t peScramblerDispatcherVa = 0;
481 
488 
494 
500 
506 
511 
517 
523 
529 
535 
541 
546  bool doingPostAnalysis = true;
547 
552 
557 
562 
567  bool doingPostFunctionNoop = false;
568 
574 
586 
592 
598 
604  bool findingThunks = true;
605 
610  bool splittingThunks = false;
611 
617 
626 
633 
638  bool namingSyscalls = true;
639 
646  boost::filesystem::path syscallHeader;
647 
652  bool demangleNames = true;
653 
654 private:
655  friend class boost::serialization::access;
656 
657  template<class S>
658  void serialize(S &s, unsigned version) {
659  s & BOOST_SERIALIZATION_NVP(base);
660  s & BOOST_SERIALIZATION_NVP(functionStartingVas);
661  s & BOOST_SERIALIZATION_NVP(followingGhostEdges);
662  s & BOOST_SERIALIZATION_NVP(discontiguousBlocks);
663  s & BOOST_SERIALIZATION_NVP(maxBasicBlockSize);
664  if (version >= 6)
665  s & BOOST_SERIALIZATION_NVP(ipRewrites);
666  s & BOOST_SERIALIZATION_NVP(findingFunctionPadding);
667  s & BOOST_SERIALIZATION_NVP(findingDeadCode);
668  s & BOOST_SERIALIZATION_NVP(peScramblerDispatcherVa);
669  if (version >= 2) {
670  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionCode);
671  } else {
672  bool temp = false;
673  if (S::is_saving::value)
674  temp = findingIntraFunctionCode > 0;
675  s & boost::serialization::make_nvp("findingIntraFunctionCode", temp);
676  if (S::is_loading::value)
677  findingIntraFunctionCode = temp ? 10 : 0; // arbitrary number of passes
678  }
679  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionData);
680  s & BOOST_SERIALIZATION_NVP(findingInterFunctionCalls);
681  if (version >= 4)
682  s & BOOST_SERIALIZATION_NVP(findingFunctionCallFunctions);
683  if (version >= 5) {
684  s & BOOST_SERIALIZATION_NVP(findingEntryFunctions);
685  s & BOOST_SERIALIZATION_NVP(findingErrorFunctions);
686  s & BOOST_SERIALIZATION_NVP(findingImportFunctions);
687  s & BOOST_SERIALIZATION_NVP(findingExportFunctions);
688  s & BOOST_SERIALIZATION_NVP(findingSymbolFunctions);
689  }
690  s & BOOST_SERIALIZATION_NVP(interruptVector);
691  s & BOOST_SERIALIZATION_NVP(doingPostAnalysis);
692  s & BOOST_SERIALIZATION_NVP(doingPostFunctionMayReturn);
693  s & BOOST_SERIALIZATION_NVP(doingPostFunctionStackDelta);
694  s & BOOST_SERIALIZATION_NVP(doingPostCallingConvention);
695  s & BOOST_SERIALIZATION_NVP(doingPostFunctionNoop);
696  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysis);
697  if (version >= 3)
698  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysisMaxSorts);
699  s & BOOST_SERIALIZATION_NVP(findingDataFunctionPointers);
700  s & BOOST_SERIALIZATION_NVP(findingCodeFunctionPointers);
701  s & BOOST_SERIALIZATION_NVP(findingThunks);
702  s & BOOST_SERIALIZATION_NVP(splittingThunks);
703  s & BOOST_SERIALIZATION_NVP(semanticMemoryParadigm);
704  if (version >= 8) {
705  s & BOOST_SERIALIZATION_NVP(namingConstants);
706  } else if (S::is_loading()) {
707  bool b;
708  s & boost::serialization::make_nvp("namingConstants", b);
709  if (b) {
710  namingConstants = AddressInterval::whole();
711  } else {
712  namingConstants = AddressInterval();
713  }
714  }
715  if (version >= 7) {
716  s & BOOST_SERIALIZATION_NVP(namingStrings);
717  } else if (S::is_loading()) {
718  bool b;
719  s & boost::serialization::make_nvp("namingStrings", b);
720  if (b) {
721  namingStrings = AddressInterval::whole();
722  } else {
723  namingStrings = AddressInterval();
724  }
725  }
726  s & BOOST_SERIALIZATION_NVP(demangleNames);
727  if (version >= 1) {
728  s & BOOST_SERIALIZATION_NVP(namingSyscalls);
729 
730  // There is no support for boost::filesystem serialization due to arguments by the maintainers over who has
731  // responsibility, so we do it the hard way.
732  std::string temp;
733  if (S::is_saving::value)
734  temp = syscallHeader.string();
735  s & boost::serialization::make_nvp("syscallHeader", temp);
736  if (S::is_loading::value)
737  syscallHeader = temp;
738  }
739  }
740 };
741 
742 // BOOST_CLASS_VERSION(PartitionerSettings, 1); -- see end of file (cannot be in a namespace)
743 
752  std::vector<std::string> configurationNames;
753 
760  bool exitOnError = true;
761 
762 private:
763  friend class boost::serialization::access;
764 
765  template<class S>
766  void serialize(S &s, unsigned version) {
767  s & BOOST_SERIALIZATION_NVP(configurationNames);
768  s & BOOST_SERIALIZATION_NVP(exitOnError);
769  }
770 };
771 
772 // Additional declarations incomplete definitions.
773 class AddressUser;
774 class AddressUsers;
775 class AddressUsageMap;
776 
777 class BasicBlock;
780 class BasicBlockError;
781 
782 class BasicBlockCallback;
785 class BasicBlockSuccessor;
786 using BasicBlockSuccessors = std::vector<BasicBlockSuccessor>;
789 
790 class CfgEdge;
791 
792 class CfgVertex;
793 
796 class Configuration;
797 
798 class DataBlock;
801 class DataBlockError;
802 
803 class Engine;
805 
806 class EngineBinary;
808 
809 class EngineJvm;
811 
812 class Exception;
813 
814 class Function;
819 class FunctionCallGraph;
820 
829 class FunctionError;
830 
831 class Partitioner;
835 class PlaceholderError;
836 
837 class Reference;
838 using ReferenceSet = std::set<Reference>;
841 class ThunkPredicates;
844 } // namespace
845 } // namespace
846 } // namespace
847 
848 // Class versions must be at global scope
853 
854 #endif
855 #endif
bool findingInterFunctionCalls
Whether to search for function calls between exiting functions.
bool splittingThunks
Whether to split thunk instructions into mini functions.
bool memoryIsExecutable
Whether all of memory should be made executable.
std::set< Reference > ReferenceSet
Set of references.
bool linkStaticArchives
Whether to link library archives before parsing.
bool findingExportFunctions
Whether to make functions at export addresses.
Base class for matching function padding.
Definition: Modules.h:134
MemoryDataAdjustment
How the partitioner should globally treat memory.
bool findingDataFunctionPointers
Whether to search static data for function pointers.
Assume a function returns if the may-return analysis cannot decide whether it may return...
Base class for CFG-adjustment callbacks.
Base class for adjusting basic blocks during discovery.
Definition: Modules.h:39
Base class for matching function prologues.
Definition: Modules.h:108
Reference to a function, basic block, instruction, or address.
Definition: Reference.h:23
bool doingPostFunctionStackDelta
Whether to run the function stack delta analysis.
Engine for specimens containing machine instructions.
Definition: EngineBinary.h:47
Base class for engines driving the partitioner.
size_t deExecuteZerosLeaveAtBack
When to remove execute permission from zero bytes.
AddressInterval namingStrings
Addresses where strings might start.
bool copyAllInstructions
Whether to allow shared instructions in the AST.
AddressInterval namingConstants
Whether to give names to constants.
Special vertex destination for indeterminate edges.
static AstConstructionSettings permissive()
Default permissive settings.
Assume that all functions return without ever running the may-return analysis.
Main namespace for the ROSE library.
bool doingPostCallingConvention
Whether to run calling-convention analysis.
std::vector< rose_addr_t > functionStartingVas
Starting addresses for disassembly.
bool linkObjectFiles
Whether to link object files before parsing.
VertexType
Partitioner control flow vertex types.
static AstConstructionSettings strict()
Default strict settings.
bool discontiguousBlocks
Whether to allow discontiguous basic blocks.
size_t functionReturnAnalysisMaxSorts
Maximum number of function may-return sorting operations.
FunctionReturnAnalysis functionReturnAnalysis
How to run the function may-return analysis.
std::vector< std::string > configurationNames
Configuration files names.
bool doingPostFunctionMayReturn
Whether to run the function may-return analysis.
std::vector< std::string > envInsert
Environment variables to insert.
The value is an assumption without any proof.
bool doingPostAnalysis
Whether to perform any post-partitioning analysis steps.
static Interval whole()
Construct an interval that covers the entire domain.
Definition: Interval.h:180
static Interval hull(T v1, T v2)
Construct an interval from two endpoints.
Definition: Interval.h:151
bool findingImportFunctions
Whether to make functions at import addresses.
MemoryDataAdjustment memoryDataAdjustment
How to globally adjust memory segment access bits for data areas.
FunctionReturnAnalysis
Controls whether the function may-return analysis runs.
size_t deExecuteZerosLeaveAtFront
When to remove execute permission from zero bytes.
bool findingEntryFunctions
Whether to make functions at program entry points.
std::vector< BasicBlockSuccessor > BasicBlockSuccessors
All successors in no particular order.
SemanticMemoryParadigm semanticMemoryParadigm
Type of container for semantic memory.
bool checkingCallBranch
Whether to look for function calls used as branches.
bool findingSymbolFunctions
Whether to make functions according to symbol tables.
std::vector< boost::regex > envErasePatterns
Patterns to erase from the environment.
bool basicBlockSemanticsAutoDrop
Whether to automatically drop semantics for attached basic blocks.
Assume a function cannot return if the may-return analysis cannot decide whether it may return...
size_t deExecuteZerosThreshold
When to remove execute permission from zero bytes.
std::vector< std::string > envEraseNames
Names to erase from the environment.
bool findingCodeFunctionPointers
Whether to search existing instructions for function pointers.
Special vertex destination for non-existing basic blocks.
Assume that a function cannot return without ever running the may-return analysis.
Partitions instructions into basic blocks and functions.
Definition: Partitioner.h:293
bool allowFunctionWithNoBasicBlocks
Whether to allow functions with no basic blocks.
AddressInterval interruptVector
Property: Location of machine interrupt vector.
bool findingFunctionCallFunctions
Whether to turn function call targets into functions.
Engine for Java Virtual Machine (JVM) specimens.
Definition: EngineJvm.h:20
boost::filesystem::path syscallHeader
Header file in which system calls are defined.